Skip to content

Commit 1de2d2f

Browse files
committed
Merge remote-tracking branch 'origin/master' into fix-regexpp
2 parents 8a95004 + 8d2fd5d commit 1de2d2f

20 files changed

+1011
-27
lines changed

.changeset/cold-nails-teach.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"eslint-plugin-regexp": minor
3+
---
4+
5+
Add support for string literal to `regexp/no-empty-alternative`

.changeset/early-islands-press.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"eslint-plugin-regexp": major
3+
---
4+
5+
Add `regexp/simplify-set-operations` rule

.changeset/early-islands-press2.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"eslint-plugin-regexp": minor
3+
---
4+
5+
Improve `regexp/negation` rule to report nested negation character classes

.changeset/hungry-eels-check.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"eslint-plugin-regexp": major
3+
---
4+
5+
Add `regexp/no-empty-string-literal` rule

README.md

+2
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ The `plugin:regexp/all` config enables all rules. It's meant for testing, not fo
140140
| [control-character-escape](https://ota-meshi.github.io/eslint-plugin-regexp/rules/control-character-escape.html) | enforce consistent escaping of control characters || | 🔧 | |
141141
| [negation](https://ota-meshi.github.io/eslint-plugin-regexp/rules/negation.html) | enforce use of escapes on negation || | 🔧 | |
142142
| [no-dupe-characters-character-class](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-dupe-characters-character-class.html) | disallow duplicate characters in the RegExp character class || | 🔧 | |
143+
| [no-empty-string-literal](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-empty-string-literal.html) | disallow empty string literals in character classes || | | |
143144
| [no-extra-lookaround-assertions](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-extra-lookaround-assertions.html) | disallow unnecessary nested lookaround assertions || | 🔧 | |
144145
| [no-invisible-character](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-invisible-character.html) | disallow invisible raw character || | 🔧 | |
145146
| [no-legacy-features](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-legacy-features.html) | disallow legacy RegExp features || | | |
@@ -169,6 +170,7 @@ The `plugin:regexp/all` config enables all rules. It's meant for testing, not fo
169170
| [prefer-set-operation](https://ota-meshi.github.io/eslint-plugin-regexp/rules/prefer-set-operation.html) | prefer character class set operations instead of lookarounds || | 🔧 | |
170171
| [require-unicode-regexp](https://ota-meshi.github.io/eslint-plugin-regexp/rules/require-unicode-regexp.html) | enforce the use of the `u` flag | | | 🔧 | |
171172
| [require-unicode-sets-regexp](https://ota-meshi.github.io/eslint-plugin-regexp/rules/require-unicode-sets-regexp.html) | enforce the use of the `v` flag | | | 🔧 | |
173+
| [simplify-set-operations](https://ota-meshi.github.io/eslint-plugin-regexp/rules/simplify-set-operations.html) | require simplify set operations || | 🔧 | |
172174
| [sort-alternatives](https://ota-meshi.github.io/eslint-plugin-regexp/rules/sort-alternatives.html) | sort alternatives if order doesn't matter | | | 🔧 | |
173175
| [use-ignore-case](https://ota-meshi.github.io/eslint-plugin-regexp/rules/use-ignore-case.html) | use the `i` flag if it simplifies the pattern || | 🔧 | |
174176

docs/rules/index.md

+2
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ sidebarDepth: 0
4747
| [control-character-escape](control-character-escape.md) | enforce consistent escaping of control characters || | 🔧 | |
4848
| [negation](negation.md) | enforce use of escapes on negation || | 🔧 | |
4949
| [no-dupe-characters-character-class](no-dupe-characters-character-class.md) | disallow duplicate characters in the RegExp character class || | 🔧 | |
50+
| [no-empty-string-literal](no-empty-string-literal.md) | disallow empty string literals in character classes || | | |
5051
| [no-extra-lookaround-assertions](no-extra-lookaround-assertions.md) | disallow unnecessary nested lookaround assertions || | 🔧 | |
5152
| [no-invisible-character](no-invisible-character.md) | disallow invisible raw character || | 🔧 | |
5253
| [no-legacy-features](no-legacy-features.md) | disallow legacy RegExp features || | | |
@@ -76,6 +77,7 @@ sidebarDepth: 0
7677
| [prefer-set-operation](prefer-set-operation.md) | prefer character class set operations instead of lookarounds || | 🔧 | |
7778
| [require-unicode-regexp](require-unicode-regexp.md) | enforce the use of the `u` flag | | | 🔧 | |
7879
| [require-unicode-sets-regexp](require-unicode-sets-regexp.md) | enforce the use of the `v` flag | | | 🔧 | |
80+
| [simplify-set-operations](simplify-set-operations.md) | require simplify set operations || | 🔧 | |
7981
| [sort-alternatives](sort-alternatives.md) | sort alternatives if order doesn't matter | | | 🔧 | |
8082
| [use-ignore-case](use-ignore-case.md) | use the `i` flag if it simplifies the pattern || | 🔧 | |
8183

docs/rules/negation.md

+6
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,12 @@ var foo = /[^\P{ASCII}]/u
5353

5454
Nothing.
5555

56+
## :couple: Related rules
57+
58+
- [regexp/simplify-set-operations]
59+
60+
[regexp/simplify-set-operations]: ./simplify-set-operations.md
61+
5662
## :rocket: Version
5763

5864
This rule was introduced in eslint-plugin-regexp v0.4.0

docs/rules/no-empty-alternative.md

+1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ var foo = /a+|b*/
3737
var foo = /a+|b+|/
3838
var foo = /\|\||\|||\|\|\|/
3939
var foo = /a(?:a|bc|def|h||ij|k)/
40+
var foo = /[abc\q{def|}]/v
4041
```
4142

4243
</eslint-code-block>

docs/rules/no-empty-string-literal.md

+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
---
2+
pageClass: "rule-details"
3+
sidebarDepth: 0
4+
title: "regexp/no-empty-string-literal"
5+
description: "disallow empty string literals in character classes"
6+
---
7+
# regexp/no-empty-string-literal
8+
9+
💼 This rule is enabled in the ✅ `plugin:regexp/recommended` config.
10+
11+
<!-- end auto-generated rule header -->
12+
13+
> disallow empty string literals in character classes
14+
15+
## :book: Rule Details
16+
17+
This rule reports empty string literals in character classes.
18+
19+
If the empty string literal is supposed to match the empty string, then use a
20+
quantifier instead. For example, `[ab\q{}]` should be written as `[ab]?`.
21+
22+
This rule does not report empty alternatives in string literals. (e.g. `/[\q{a|}]/v`)\
23+
If you want to report empty alternatives in string literals, use the [regexp/no-empty-alternative] rule.
24+
25+
<eslint-code-block>
26+
27+
```js
28+
/* eslint regexp/no-empty-string-literal: "error" */
29+
30+
/* ✓ GOOD */
31+
var foo = /[\q{a}]/v;
32+
var foo = /[\q{abc}]/v;
33+
var foo = /[\q{a|}]/v;
34+
35+
/* ✗ BAD */
36+
var foo = /[\q{}]/v;
37+
var foo = /[\q{|}]/v;
38+
```
39+
40+
</eslint-code-block>
41+
42+
## :wrench: Options
43+
44+
Nothing.
45+
46+
## :couple: Related rules
47+
48+
- [regexp/no-empty-alternative]
49+
50+
[regexp/no-empty-alternative]: ./no-empty-alternative.md
51+
52+
## :rocket: Version
53+
54+
:exclamation: <badge text="This rule has not been released yet." vertical="middle" type="error"> ***This rule has not been released yet.*** </badge>
55+
56+
## :mag: Implementation
57+
58+
- [Rule source](https://github.com./ota-meshi/eslint-plugin-regexp/blob/master/lib/rules/no-empty-string-literal.ts)
59+
- [Test source](https://github.com./ota-meshi/eslint-plugin-regexp/blob/master/tests/lib/rules/no-empty-string-literal.ts)

docs/rules/simplify-set-operations.md

+92
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
---
2+
pageClass: "rule-details"
3+
sidebarDepth: 0
4+
title: "regexp/simplify-set-operations"
5+
description: "require simplify set operations"
6+
---
7+
# regexp/simplify-set-operations
8+
9+
💼 This rule is enabled in the ✅ `plugin:regexp/recommended` config.
10+
11+
🔧 This rule is automatically fixable by the [`--fix` CLI option](https://eslint.org/docs/latest/user-guide/command-line-interface#--fix).
12+
13+
<!-- end auto-generated rule header -->
14+
15+
> require simplify set operations
16+
17+
## :book: Rule Details
18+
19+
This rule aims to optimize patterns by simplifying set operations in character classes (with `v` flag).
20+
21+
This rule does not report simple nested negations. (e.g. `/[^[^abc]]/v`)\
22+
If you want to report simple nested negations, use the [regexp/negation] rule.
23+
24+
<eslint-code-block fix>
25+
26+
```js
27+
/* eslint regexp/simplify-set-operations: "error" */
28+
29+
/* ✗ BAD */
30+
var re = /[a&&[^b]]/v; // -> /[a--b]/v
31+
var re = /[[^b]&&a]/v; // -> /[a--b]/v
32+
var re = /[a--[^b]]/v; // -> /[a&&b]/v
33+
var re = /[[^a]&&[^b]]/v; // -> /[^ab]/v
34+
var re = /[[^a][^b]]/v; // -> /[^a&&b]/v
35+
36+
/* ✓ GOOD */
37+
var re = /[a--b]/v;
38+
var re = /[a&&b]/v;
39+
var re = /[^ab]/v;
40+
var re = /[^a&&b]/v;
41+
```
42+
43+
</eslint-code-block>
44+
45+
### How does this rule work?
46+
47+
This rule attempts to simplify set operations in the ways listed below:
48+
49+
#### De Morgan's laws
50+
51+
This rule uses De Morgan's laws to look for patterns that can convert multiple negations into a single negation, reports on them, and auto-fix them.\
52+
For example, `/[[^a]&&[^b]]/v` is equivalent to `/[^ab]/v`, and `/[[^a][^b]]/v` is equivalent to `/[^a&&b]/v`.
53+
54+
See <https://en.wikipedia.org/wiki/De_Morgan's_laws>.
55+
56+
#### Conversion from the intersection to the subtraction
57+
58+
Intersection sets with complement operands can be converted to difference sets.\
59+
The rule looks for character class intersection with negation operands, reports on them, auto-fix them.\
60+
For example, `/[a&&[^b]]/v` is equivalent to `/[a--b]/v`, `/[[^a]&&b]/v` is equivalent to `/[b--a]/v`.
61+
62+
#### Conversion from the subtraction to the intersection
63+
64+
Difference set with a complement operand on the right side can be converted to intersection sets.\
65+
The rule looks for character class subtraction with negation operand on the right side, reports on them, auto-fix them.\
66+
For example, `/[a--[^b]]/v` is equivalent to `/[a&&b]/v`.
67+
68+
### Auto Fixes
69+
70+
This rule's auto-fix does not remove unnecessary brackets. For example, `/[[^a]&&[^b]]/v` will be automatically fixed to `/[^[a][b]]/v`.\
71+
If you want to remove unnecessary brackets (e.g. auto-fixed to `/[^ab]/v`), use [regexp/no-useless-character-class] rule together.
72+
73+
## :wrench: Options
74+
75+
Nothing.
76+
77+
## :couple: Related rules
78+
79+
- [regexp/negation]
80+
- [regexp/no-useless-character-class]
81+
82+
[regexp/negation]: ./negation.md
83+
[regexp/no-useless-character-class]: ./no-useless-character-class.md
84+
85+
## :rocket: Version
86+
87+
:exclamation: <badge text="This rule has not been released yet." vertical="middle" type="error"> ***This rule has not been released yet.*** </badge>
88+
89+
## :mag: Implementation
90+
91+
- [Rule source](https://github.com./ota-meshi/eslint-plugin-regexp/blob/master/lib/rules/simplify-set-operations.ts)
92+
- [Test source](https://github.com./ota-meshi/eslint-plugin-regexp/blob/master/tests/lib/rules/simplify-set-operations.ts)

lib/configs/recommended.ts

+2
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ export const rules = {
2323
"regexp/no-empty-character-class": "error",
2424
"regexp/no-empty-group": "error",
2525
"regexp/no-empty-lookarounds-assertion": "error",
26+
"regexp/no-empty-string-literal": "error",
2627
"regexp/no-escape-backspace": "error",
2728
"regexp/no-extra-lookaround-assertions": "error",
2829
"regexp/no-invalid-regexp": "error",
@@ -65,6 +66,7 @@ export const rules = {
6566
"regexp/prefer-star-quantifier": "error",
6667
"regexp/prefer-unicode-codepoint-escapes": "error",
6768
"regexp/prefer-w": "error",
69+
"regexp/simplify-set-operations": "error",
6870
"regexp/sort-flags": "error",
6971
"regexp/strict": "error",
7072
"regexp/use-ignore-case": "error",

lib/rules/negation.ts

+46-19
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,33 @@
1-
import { toCharSet, toUnicodeSet } from "regexp-ast-analysis"
1+
import { toUnicodeSet } from "regexp-ast-analysis"
22
import type {
3+
CharacterClass,
4+
CharacterClassElement,
5+
CharacterUnicodePropertyCharacterSet,
36
EscapeCharacterSet,
4-
UnicodePropertyCharacterSet,
7+
ExpressionCharacterClass,
58
} from "@eslint-community/regexpp/ast"
69
import type { RegExpVisitor } from "@eslint-community/regexpp/visitor"
710
import type { RegExpContext } from "../utils"
811
import { createRule, defineRegexpVisitor } from "../utils"
12+
import { assertNever } from "../utils/util"
13+
14+
type NegatableCharacterClassElement =
15+
| CharacterClass
16+
| ExpressionCharacterClass
17+
| EscapeCharacterSet
18+
| CharacterUnicodePropertyCharacterSet
19+
20+
/** Checks whether the given character class is negatable. */
21+
function isNegatableCharacterClassElement<N extends CharacterClassElement>(
22+
node: N,
23+
): node is N & NegatableCharacterClassElement {
24+
return (
25+
node.type === "CharacterClass" ||
26+
node.type === "ExpressionCharacterClass" ||
27+
(node.type === "CharacterSet" &&
28+
(node.kind !== "property" || !node.strings))
29+
)
30+
}
931

1032
export default createRule("negation", {
1133
meta: {
@@ -36,19 +58,17 @@ export default createRule("negation", {
3658
}
3759

3860
const element = ccNode.elements[0]
39-
if (element.type !== "CharacterSet") {
61+
if (!isNegatableCharacterClassElement(element)) {
4062
return
4163
}
42-
if (element.kind === "property" && element.strings) {
43-
// Unicode property escape with property of strings.
44-
// Actually the pattern passing through this branch is an invalid pattern,
45-
// but it has to be checked because of the type guards.
64+
if (element.type !== "CharacterSet" && !element.negate) {
4665
return
4766
}
4867

4968
if (
5069
flags.ignoreCase &&
5170
!flags.unicodeSets &&
71+
element.type === "CharacterSet" &&
5272
element.kind === "property"
5373
) {
5474
// The ignore case canonicalization affects negated
@@ -61,7 +81,7 @@ export default createRule("negation", {
6181
// (/./, /\s/, /\d/) or inconsistent (/\w/).
6282
const ccSet = toUnicodeSet(ccNode, flags)
6383

64-
const negatedElementSet = toCharSet(
84+
const negatedElementSet = toUnicodeSet(
6585
{
6686
...element,
6787
negate: !element.negate,
@@ -96,17 +116,24 @@ export default createRule("negation", {
96116
/**
97117
* Gets the text that negation the CharacterSet.
98118
*/
99-
function getNegationText(
100-
node: EscapeCharacterSet | UnicodePropertyCharacterSet,
101-
) {
102-
// they are all of the form: /\\[dswp](?:\{[^{}]+\})?/
103-
let kind = node.raw[1]
119+
function getNegationText(node: NegatableCharacterClassElement) {
120+
if (node.type === "CharacterSet") {
121+
// they are all of the form: /\\[dswp](?:\{[^{}]+\})?/
122+
let kind = node.raw[1]
104123

105-
if (kind.toLowerCase() === kind) {
106-
kind = kind.toUpperCase()
107-
} else {
108-
kind = kind.toLowerCase()
109-
}
124+
if (kind.toLowerCase() === kind) {
125+
kind = kind.toUpperCase()
126+
} else {
127+
kind = kind.toLowerCase()
128+
}
110129

111-
return `\\${kind}${node.raw.slice(2)}`
130+
return `\\${kind}${node.raw.slice(2)}`
131+
}
132+
if (node.type === "CharacterClass") {
133+
return `[${node.elements.map((e) => e.raw).join("")}]`
134+
}
135+
if (node.type === "ExpressionCharacterClass") {
136+
return `[${node.raw.slice(2, -1)}]`
137+
}
138+
return assertNever(node)
112139
}

0 commit comments

Comments
 (0)