Skip to content

Commit fb20f68

Browse files
authored
feat: add support for ES2025 duplicate named capturing groups (#195)
close #194
1 parent f38e97a commit fb20f68

19 files changed

+3367
-87
lines changed

src/ast.ts

+12-1
Original file line numberDiff line numberDiff line change
@@ -428,10 +428,21 @@ export interface Character extends NodeBase {
428428
* The backreference.
429429
* E.g. `\1`, `\k<name>`
430430
*/
431-
export interface Backreference extends NodeBase {
431+
export type Backreference = AmbiguousBackreference | UnambiguousBackreference
432+
interface BaseBackreference extends NodeBase {
432433
type: "Backreference"
433434
parent: Alternative | Quantifier
434435
ref: number | string
436+
ambiguous: boolean
437+
resolved: CapturingGroup | CapturingGroup[]
438+
}
439+
export interface AmbiguousBackreference extends BaseBackreference {
440+
ref: string
441+
ambiguous: true
442+
resolved: CapturingGroup[]
443+
}
444+
export interface UnambiguousBackreference extends BaseBackreference {
445+
ambiguous: false
435446
resolved: CapturingGroup
436447
}
437448

src/ecma-versions.ts

+2-1
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,5 @@ export type EcmaVersion =
1010
| 2022
1111
| 2023
1212
| 2024
13-
export const latestEcmaVersion = 2024
13+
| 2025
14+
export const latestEcmaVersion = 2025

src/group-specifiers.ts

+167
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
/**
2+
* Holds information for all GroupSpecifiers included in the pattern.
3+
*/
4+
export interface GroupSpecifiers {
5+
/**
6+
* @returns true if there are no GroupSpecifiers included in the pattern.
7+
*/
8+
isEmpty: () => boolean
9+
clear: () => void
10+
/**
11+
* Called when visiting the Disjunction.
12+
* For ES2025, manage nesting with new Disjunction scopes.
13+
*/
14+
enterDisjunction: () => void
15+
/**
16+
* Called when visiting the Alternative.
17+
* For ES2025, manage nesting with new Alternative scopes.
18+
*/
19+
enterAlternative: (index: number) => void
20+
/**
21+
* Called when leaving the Disjunction.
22+
*/
23+
leaveDisjunction: () => unknown
24+
/**
25+
* Checks whether the given group name is within the pattern.
26+
*/
27+
hasInPattern: (name: string) => boolean
28+
/**
29+
* Checks whether the given group name is within the current scope.
30+
*/
31+
hasInScope: (name: string) => boolean
32+
/**
33+
* Adds the given group name to the current scope.
34+
*/
35+
addToScope: (name: string) => void
36+
}
37+
38+
export class GroupSpecifiersAsES2018 implements GroupSpecifiers {
39+
private readonly groupName = new Set<string>()
40+
41+
public clear(): void {
42+
this.groupName.clear()
43+
}
44+
45+
public isEmpty(): boolean {
46+
return !this.groupName.size
47+
}
48+
49+
public hasInPattern(name: string): boolean {
50+
return this.groupName.has(name)
51+
}
52+
53+
public hasInScope(name: string): boolean {
54+
return this.hasInPattern(name)
55+
}
56+
57+
public addToScope(name: string): void {
58+
this.groupName.add(name)
59+
}
60+
61+
// eslint-disable-next-line class-methods-use-this
62+
public enterDisjunction(): void {
63+
// Prior to ES2025, it does not manage disjunction scopes.
64+
}
65+
66+
// eslint-disable-next-line class-methods-use-this
67+
public enterAlternative(): void {
68+
// Prior to ES2025, it does not manage alternative scopes.
69+
}
70+
71+
// eslint-disable-next-line class-methods-use-this
72+
public leaveDisjunction(): void {
73+
// Prior to ES2025, it does not manage disjunction scopes.
74+
}
75+
}
76+
77+
/**
78+
* Track disjunction structure to determine whether a duplicate
79+
* capture group name is allowed because it is in a separate branch.
80+
*/
81+
class BranchID {
82+
public readonly parent: BranchID | null
83+
private readonly base: BranchID
84+
public constructor(parent: BranchID | null, base: BranchID | null) {
85+
// Parent disjunction branch
86+
this.parent = parent
87+
// Identifies this set of sibling branches
88+
this.base = base ?? this
89+
}
90+
91+
/**
92+
* A branch is separate from another branch if they or any of
93+
* their parents are siblings in a given disjunction
94+
*/
95+
public separatedFrom(other: BranchID): boolean {
96+
if (this.base === other.base && this !== other) {
97+
return true
98+
}
99+
if (other.parent && this.separatedFrom(other.parent)) {
100+
return true
101+
}
102+
return this.parent?.separatedFrom(other) ?? false
103+
}
104+
105+
public child() {
106+
return new BranchID(this, null)
107+
}
108+
109+
public sibling() {
110+
return new BranchID(this.parent, this.base)
111+
}
112+
}
113+
114+
export class GroupSpecifiersAsES2025 implements GroupSpecifiers {
115+
private branchID = new BranchID(null, null)
116+
private readonly groupNames = new Map<string, BranchID[]>()
117+
118+
public clear(): void {
119+
this.branchID = new BranchID(null, null)
120+
this.groupNames.clear()
121+
}
122+
123+
public isEmpty(): boolean {
124+
return !this.groupNames.size
125+
}
126+
127+
public enterDisjunction(): void {
128+
this.branchID = this.branchID.child()
129+
}
130+
131+
public enterAlternative(index: number): void {
132+
if (index === 0) {
133+
return
134+
}
135+
this.branchID = this.branchID.sibling()
136+
}
137+
138+
public leaveDisjunction(): void {
139+
this.branchID = this.branchID.parent!
140+
}
141+
142+
public hasInPattern(name: string): boolean {
143+
return this.groupNames.has(name)
144+
}
145+
146+
public hasInScope(name: string): boolean {
147+
const branches = this.groupNames.get(name)
148+
if (!branches) {
149+
return false
150+
}
151+
for (const branch of branches) {
152+
if (!branch.separatedFrom(this.branchID)) {
153+
return true
154+
}
155+
}
156+
return false
157+
}
158+
159+
public addToScope(name: string): void {
160+
const branches = this.groupNames.get(name)
161+
if (branches) {
162+
branches.push(this.branchID)
163+
return
164+
}
165+
this.groupNames.set(name, [this.branchID])
166+
}
167+
}

src/parser.ts

+17-6
Original file line numberDiff line numberDiff line change
@@ -148,12 +148,21 @@ class RegExpParserState {
148148

149149
for (const reference of this._backreferences) {
150150
const ref = reference.ref
151-
const group =
151+
const groups =
152152
typeof ref === "number"
153-
? this._capturingGroups[ref - 1]
154-
: this._capturingGroups.find((g) => g.name === ref)!
155-
reference.resolved = group
156-
group.references.push(reference)
153+
? [this._capturingGroups[ref - 1]]
154+
: this._capturingGroups.filter((g) => g.name === ref)
155+
if (groups.length === 1) {
156+
const group = groups[0]
157+
reference.ambiguous = false
158+
reference.resolved = group
159+
} else {
160+
reference.ambiguous = true
161+
reference.resolved = groups
162+
}
163+
for (const group of groups) {
164+
group.references.push(reference)
165+
}
157166
}
158167
}
159168

@@ -480,6 +489,7 @@ class RegExpParserState {
480489
end,
481490
raw: this.source.slice(start, end),
482491
ref,
492+
ambiguous: false,
483493
resolved: DUMMY_CAPTURING_GROUP,
484494
}
485495
parent.elements.push(node)
@@ -747,14 +757,15 @@ export namespace RegExpParser {
747757
strict?: boolean
748758

749759
/**
750-
* ECMAScript version. Default is `2024`.
760+
* ECMAScript version. Default is `2025`.
751761
* - `2015` added `u` and `y` flags.
752762
* - `2018` added `s` flag, Named Capturing Group, Lookbehind Assertion,
753763
* and Unicode Property Escape.
754764
* - `2019`, `2020`, and `2021` added more valid Unicode Property Escapes.
755765
* - `2022` added `d` flag.
756766
* - `2023` added more valid Unicode Property Escapes.
757767
* - `2024` added `v` flag.
768+
* - `2025` added duplicate named capturing groups.
758769
*/
759770
ecmaVersion?: EcmaVersion
760771
}

src/validator.ts

+20-7
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
import type { EcmaVersion } from "./ecma-versions"
22
import { latestEcmaVersion } from "./ecma-versions"
3+
import type { GroupSpecifiers } from "./group-specifiers"
4+
import {
5+
GroupSpecifiersAsES2018,
6+
GroupSpecifiersAsES2025,
7+
} from "./group-specifiers"
38
import { Reader } from "./reader"
49
import { newRegExpSyntaxError } from "./regexp-syntax-error"
510
import {
@@ -231,14 +236,15 @@ export namespace RegExpValidator {
231236
strict?: boolean
232237

233238
/**
234-
* ECMAScript version. Default is `2024`.
239+
* ECMAScript version. Default is `2025`.
235240
* - `2015` added `u` and `y` flags.
236241
* - `2018` added `s` flag, Named Capturing Group, Lookbehind Assertion,
237242
* and Unicode Property Escape.
238243
* - `2019`, `2020`, and `2021` added more valid Unicode Property Escapes.
239244
* - `2022` added `d` flag.
240245
* - `2023` added more valid Unicode Property Escapes.
241246
* - `2024` added `v` flag.
247+
* - `2025` added duplicate named capturing groups.
242248
*/
243249
ecmaVersion?: EcmaVersion
244250

@@ -631,7 +637,7 @@ export class RegExpValidator {
631637

632638
private _numCapturingParens = 0
633639

634-
private _groupNames = new Set<string>()
640+
private _groupSpecifiers: GroupSpecifiers
635641

636642
private _backreferenceNames = new Set<string>()
637643

@@ -643,6 +649,10 @@ export class RegExpValidator {
643649
*/
644650
public constructor(options?: RegExpValidator.Options) {
645651
this._options = options ?? {}
652+
this._groupSpecifiers =
653+
this.ecmaVersion >= 2025
654+
? new GroupSpecifiersAsES2025()
655+
: new GroupSpecifiersAsES2018()
646656
}
647657

648658
/**
@@ -763,7 +773,7 @@ export class RegExpValidator {
763773
if (
764774
!this._nFlag &&
765775
this.ecmaVersion >= 2018 &&
766-
this._groupNames.size > 0
776+
!this._groupSpecifiers.isEmpty()
767777
) {
768778
this._nFlag = true
769779
this.rewind(start)
@@ -1301,7 +1311,7 @@ export class RegExpValidator {
13011311
private consumePattern(): void {
13021312
const start = this.index
13031313
this._numCapturingParens = this.countCapturingParens()
1304-
this._groupNames.clear()
1314+
this._groupSpecifiers.clear()
13051315
this._backreferenceNames.clear()
13061316

13071317
this.onPatternEnter(start)
@@ -1322,7 +1332,7 @@ export class RegExpValidator {
13221332
this.raise(`Unexpected character '${c}'`)
13231333
}
13241334
for (const name of this._backreferenceNames) {
1325-
if (!this._groupNames.has(name)) {
1335+
if (!this._groupSpecifiers.hasInPattern(name)) {
13261336
this.raise("Invalid named capture referenced")
13271337
}
13281338
}
@@ -1378,6 +1388,7 @@ export class RegExpValidator {
13781388
const start = this.index
13791389
let i = 0
13801390

1391+
this._groupSpecifiers.enterDisjunction()
13811392
this.onDisjunctionEnter(start)
13821393
do {
13831394
this.consumeAlternative(i++)
@@ -1390,6 +1401,7 @@ export class RegExpValidator {
13901401
this.raise("Lone quantifier brackets")
13911402
}
13921403
this.onDisjunctionLeave(start, this.index)
1404+
this._groupSpecifiers.leaveDisjunction()
13931405
}
13941406

13951407
/**
@@ -1403,6 +1415,7 @@ export class RegExpValidator {
14031415
private consumeAlternative(i: number): void {
14041416
const start = this.index
14051417

1418+
this._groupSpecifiers.enterAlternative(i)
14061419
this.onAlternativeEnter(start, i)
14071420
while (this.currentCodePoint !== -1 && this.consumeTerm()) {
14081421
// do nothing.
@@ -1846,8 +1859,8 @@ export class RegExpValidator {
18461859
private consumeGroupSpecifier(): boolean {
18471860
if (this.eat(QUESTION_MARK)) {
18481861
if (this.eatGroupName()) {
1849-
if (!this._groupNames.has(this._lastStrValue)) {
1850-
this._groupNames.add(this._lastStrValue)
1862+
if (!this._groupSpecifiers.hasInScope(this._lastStrValue)) {
1863+
this._groupSpecifiers.addToScope(this._lastStrValue)
18511864
return true
18521865
}
18531866
this.raise("Duplicate capture group name")

test/fixtures/parser/literal/basic-valid-2015-u.json

+4
Original file line numberDiff line numberDiff line change
@@ -1690,6 +1690,7 @@
16901690
"end": 6,
16911691
"raw": "\\1",
16921692
"ref": 1,
1693+
"ambiguous": false,
16931694
"resolved": "♻️../0"
16941695
}
16951696
]
@@ -1741,6 +1742,7 @@
17411742
"end": 3,
17421743
"raw": "\\1",
17431744
"ref": 1,
1745+
"ambiguous": false,
17441746
"resolved": "♻️../1"
17451747
},
17461748
{
@@ -2104,6 +2106,7 @@
21042106
"end": 34,
21052107
"raw": "\\10",
21062108
"ref": 10,
2109+
"ambiguous": false,
21072110
"resolved": "♻️../9"
21082111
}
21092112
]
@@ -2465,6 +2468,7 @@
24652468
"end": 37,
24662469
"raw": "\\11",
24672470
"ref": 11,
2471+
"ambiguous": false,
24682472
"resolved": "♻️../10"
24692473
}
24702474
]

0 commit comments

Comments
 (0)