From 6cf15f523afce21125e413a188c081f14b50c001 Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Thu, 27 Apr 2017 18:50:59 +0200 Subject: [PATCH] RegExp named group tests (#998) Tests against the Stage 3 named capture groups proposal https://tc39.github.io/proposal-regexp-named-groups --- .../named-groups/functional-replace-global.js | 56 +++++++++++++++++++ .../functional-replace-non-global.js | 43 ++++++++++++++ .../RegExp/named-groups/groups-properties.js | 39 +++++++++++++ .../RegExp/named-groups/lookbehind.js | 46 +++++++++++++++ .../named-groups/non-unicode-malformed.js | 44 +++++++++++++++ .../RegExp/named-groups/non-unicode-match.js | 43 ++++++++++++++ .../non-unicode-property-names.js | 45 +++++++++++++++ .../named-groups/non-unicode-references.js | 34 +++++++++++ .../RegExp/named-groups/string-replace-get.js | 29 ++++++++++ .../named-groups/string-replace-missing.js | 25 +++++++++ .../named-groups/string-replace-nocaptures.js | 31 ++++++++++ .../named-groups/string-replace-numbered.js | 29 ++++++++++ .../named-groups/string-replace-unclosed.js | 24 ++++++++ .../named-groups/string-replace-undefined.js | 28 ++++++++++ .../RegExp/named-groups/unicode-malformed.js | 27 +++++++++ .../RegExp/named-groups/unicode-match.js | 48 ++++++++++++++++ .../named-groups/unicode-property-names.js | 46 +++++++++++++++ .../RegExp/named-groups/unicode-references.js | 47 ++++++++++++++++ 18 files changed, 684 insertions(+) create mode 100644 test/built-ins/RegExp/named-groups/functional-replace-global.js create mode 100644 test/built-ins/RegExp/named-groups/functional-replace-non-global.js create mode 100644 test/built-ins/RegExp/named-groups/groups-properties.js create mode 100644 test/built-ins/RegExp/named-groups/lookbehind.js create mode 100644 test/built-ins/RegExp/named-groups/non-unicode-malformed.js create mode 100644 test/built-ins/RegExp/named-groups/non-unicode-match.js create mode 100644 test/built-ins/RegExp/named-groups/non-unicode-property-names.js create mode 100644 test/built-ins/RegExp/named-groups/non-unicode-references.js create mode 100644 test/built-ins/RegExp/named-groups/string-replace-get.js create mode 100644 test/built-ins/RegExp/named-groups/string-replace-missing.js create mode 100644 test/built-ins/RegExp/named-groups/string-replace-nocaptures.js create mode 100644 test/built-ins/RegExp/named-groups/string-replace-numbered.js create mode 100644 test/built-ins/RegExp/named-groups/string-replace-unclosed.js create mode 100644 test/built-ins/RegExp/named-groups/string-replace-undefined.js create mode 100644 test/built-ins/RegExp/named-groups/unicode-malformed.js create mode 100644 test/built-ins/RegExp/named-groups/unicode-match.js create mode 100644 test/built-ins/RegExp/named-groups/unicode-property-names.js create mode 100644 test/built-ins/RegExp/named-groups/unicode-references.js diff --git a/test/built-ins/RegExp/named-groups/functional-replace-global.js b/test/built-ins/RegExp/named-groups/functional-replace-global.js new file mode 100644 index 0000000000..2fb5d49c82 --- /dev/null +++ b/test/built-ins/RegExp/named-groups/functional-replace-global.js @@ -0,0 +1,56 @@ +// Copyright 2017 the V8 project authors. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +description: > + Function argument to String.prototype.replace gets groups as the last argument +esid: sec-regexp.prototype-@@replace +features: [regexp-named-groups] +info: > + RegExp.prototype [ @@replace ] ( string, replaceValue ) + 14. Repeat, for each result in results, + j. Let namedCaptures be ? Get(result, "groups"). + k. If functionalReplace is true, then + iv. If namedCaptures is not undefined, + 1. Append namedCaptures as the last element of replacerArgs. +---*/ + +let source = "(?.)(?.)"; +let alternateSource = "(?.)|(?.)"; + +for (let flags of ["g", "gu"]) { + let i = 0; + let re = new RegExp(source, flags); + let result = "abcd".replace(re, + (match, fst, snd, offset, str, groups) => { + if (i == 0) { + assert.sameValue("ab", match); + assert.sameValue("a", groups.fst); + assert.sameValue("b", groups.snd); + assert.sameValue("a", fst); + assert.sameValue("b", snd); + assert.sameValue(0, offset); + assert.sameValue("abcd", str); + } else if (i == 1) { + assert.sameValue("cd", match); + assert.sameValue("c", groups.fst); + assert.sameValue("d", groups.snd); + assert.sameValue("c", fst); + assert.sameValue("d", snd); + assert.sameValue(2, offset); + assert.sameValue("abcd", str); + } else { + assertUnreachable(); + } + i++; + return `${groups.snd}${groups.fst}`; + }); + assert.sameValue("badc", result); + assert.sameValue(i, 2); + + let re2 = new RegExp(alternateSource, flags); + assert.sameValue("undefinedundefinedundefinedundefined", + "abcd".replace(re2, + (match, fst, snd, offset, str, groups) => groups.snd)); +} + diff --git a/test/built-ins/RegExp/named-groups/functional-replace-non-global.js b/test/built-ins/RegExp/named-groups/functional-replace-non-global.js new file mode 100644 index 0000000000..5186a88fa2 --- /dev/null +++ b/test/built-ins/RegExp/named-groups/functional-replace-non-global.js @@ -0,0 +1,43 @@ +// Copyright 2017 the V8 project authors. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +description: > + Function argument to String.prototype.replace gets groups as the last argument +esid: sec-regexp.prototype-@@replace +features: [regexp-named-groups] +info: > + RegExp.prototype [ @@replace ] ( string, replaceValue ) + 14. Repeat, for each result in results, + j. Let namedCaptures be ? Get(result, "groups"). + k. If functionalReplace is true, then + iv. If namedCaptures is not undefined, + 1. Append namedCaptures as the last element of replacerArgs. +---*/ + +let source = "(?.)(?.)"; +let alternateSource = "(?.)|(?.)"; + +for (let flags of ["", "u"]) { + let i = 0; + let re = new RegExp(source, flags); + let result = "abcd".replace(re, + (match, fst, snd, offset, str, groups) => { + assert.sameValue(i++, 0); + assert.sameValue("ab", match); + assert.sameValue("a", groups.fst); + assert.sameValue("b", groups.snd); + assert.sameValue("a", fst); + assert.sameValue("b", snd); + assert.sameValue(0, offset); + assert.sameValue("abcd", str); + return `${groups.snd}${groups.fst}`; + }); + assert.sameValue("bacd", result); + assert.sameValue(i, 1); + + let re2 = new RegExp(alternateSource, flags); + assert.sameValue("undefinedbcd", + "abcd".replace(re2, + (match, fst, snd, offset, str, groups) => groups.snd)); +} diff --git a/test/built-ins/RegExp/named-groups/groups-properties.js b/test/built-ins/RegExp/named-groups/groups-properties.js new file mode 100644 index 0000000000..6d7b46ec63 --- /dev/null +++ b/test/built-ins/RegExp/named-groups/groups-properties.js @@ -0,0 +1,39 @@ +// Copyright 2017 the V8 project authors. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +description: Properties of the groups object are created with CreateDataProperty +includes: [compareArray.js, propertyHelper.js] +esid: sec-regexpbuiltinexec +features: [regexp-named-groups] +info: > + Runtime Semantics: RegExpBuiltinExec ( R, S ) + 25. For each integer i such that i > 0 and i ≤ n + f. If the ith capture of R was defined with a GroupName, + i. Let s be the StringValue of the corresponding RegExpIdentifierName. + ii. Perform ! CreateDataProperty(groups, s, capturedValue). +---*/ + +// Properties created on result.groups in textual order. +assert(compareArray(["fst", "snd"], + Object.getOwnPropertyNames( + /(?.)|(?.)/u.exec("abcd").groups))); + +// Properties are created with Define, not Set +let counter = 0; +Object.defineProperty(Object.prototype, 'x', {set() { counter++; }}); +let match = /(?.)/.exec('a'); +let groups = match.groups; +assert.sameValue(counter, 0); + +// Properties are writable, enumerable and configurable +// (from CreateDataProperty) +verifyWritable(groups, "x"); +verifyEnumerable(groups, "x"); +verifyConfigurable(groups, "x"); + +// The '__proto__' property on the groups object is not special, +// and does not affect the [[Prototype]] of the resulting groups object. +groups = /(?<__proto__>a)/u.exec("a").groups; +assert.sameValue("a", groups.__proto__); +assert.sameValue(null, Object.getPrototypeOf(groups)); diff --git a/test/built-ins/RegExp/named-groups/lookbehind.js b/test/built-ins/RegExp/named-groups/lookbehind.js new file mode 100644 index 0000000000..0009bc5008 --- /dev/null +++ b/test/built-ins/RegExp/named-groups/lookbehind.js @@ -0,0 +1,46 @@ +// Copyright 2018 the V8 project authors. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +description: Named groups can be used in conjunction with lookbehind +esid: pending +features: [regexp-named-groups, regexp-lookbehind] +includes: [compareArray.js] +---*/ + +// Unicode mode +assert(compareArray(["f", "c"], "abcdef".match(/(?<=(?\w){3})f/u))); +assert.sameValue("c", "abcdef".match(/(?<=(?\w){3})f/u).groups.a); +assert.sameValue("b", "abcdef".match(/(?<=(?\w){4})f/u).groups.a); +assert.sameValue("a", "abcdef".match(/(?<=(?\w)+)f/u).groups.a); +assert.sameValue(null, "abcdef".match(/(?<=(?\w){6})f/u)); + +assert(compareArray(["f", ""], "abcdef".match(/((?<=\w{3}))f/u))); +assert(compareArray(["f", ""], "abcdef".match(/(?(?<=\w{3}))f/u))); + +assert(compareArray(["f", undefined], "abcdef".match(/(?\d){3})f/u))); +assert.sameValue(null, "abcdef".match(/(?\D){3})f/u)); + +assert(compareArray(["f", undefined], "abcdef".match(/(?\D){3})f|f/u))); +assert(compareArray(["f", undefined], "abcdef".match(/(?(?\w){3})f/))); +assert.sameValue("c", "abcdef".match(/(?<=(?\w){3})f/).groups.a); +assert.sameValue("b", "abcdef".match(/(?<=(?\w){4})f/).groups.a); +assert.sameValue("a", "abcdef".match(/(?<=(?\w)+)f/).groups.a); +assert.sameValue(null, "abcdef".match(/(?<=(?\w){6})f/)); + +assert(compareArray(["f", ""], "abcdef".match(/((?<=\w{3}))f/))); +assert(compareArray(["f", ""], "abcdef".match(/(?(?<=\w{3}))f/))); + +assert(compareArray(["f", undefined], "abcdef".match(/(?\d){3})f/))); +assert.sameValue(null, "abcdef".match(/(?\D){3})f/)); + +assert(compareArray(["f", undefined], "abcdef".match(/(?\D){3})f|f/))); +assert(compareArray(["f", undefined], "abcdef".match(/(?(?.)|(?.))/u.exec("abcd").groups))); diff --git a/test/built-ins/RegExp/named-groups/non-unicode-malformed.js b/test/built-ins/RegExp/named-groups/non-unicode-malformed.js new file mode 100644 index 0000000000..6a6d7bd9ec --- /dev/null +++ b/test/built-ins/RegExp/named-groups/non-unicode-malformed.js @@ -0,0 +1,44 @@ +// Copyright 2017 the V8 project authors. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +description: > + Named groups in Unicode RegExps have some syntax errors and some + compatibility escape fallback behavior. +esid: pending +features: [regexp-named-groups, regexp-lookbehind] +includes: [compareArray.js] +---*/ + +assert.throws(SyntaxError, () => eval("/(?<>a)/")); +assert.throws(SyntaxError, () => eval("/(? eval("/(?<42a>a)/")); +assert.throws(SyntaxError, () => eval("/(?<:a>a)/")); +assert.throws(SyntaxError, () => eval("/(?a)/")); +assert.throws(SyntaxError, () => eval("/(?a)(?a)/")); +assert.throws(SyntaxError, () => eval("/(?a)(?b)(?a)/")); +assert(/\k/.test("k")); +assert(/\k<4>/.test("k<4>")); +assert(/\k eval("/(?.)\\k/")); +assert.throws(SyntaxError, () => eval("/(?.)\\k eval("/(?.)\\k/")); +assert.throws(SyntaxError, () => eval("/(?a)\\k/")); +assert.throws(SyntaxError, () => eval("/(?a)\\k/")); +assert.throws(SyntaxError, () => eval("/\\k(?a)/")); +assert.throws(SyntaxError, () => eval("/\\ka)/")); +assert(/(?\a)/.test("a")); + +assert(compareArray(["k"], "xxxkxxx".match(/\k/))); +assert(compareArray(["kxxx".match(/\k(?<=>)a/.test("ka")); +assert(/\k(?a")); +assert(/\k(x)/.test("kx")); +assert(/\k(?x)/.test("x")); +assert.throws(SyntaxError, () => eval("/\\k(?x)/")); +assert.throws(SyntaxError, () => eval("/\\k.)/")); +assert.throws(SyntaxError, () => eval("/\\k(?.)/")); diff --git a/test/built-ins/RegExp/named-groups/non-unicode-match.js b/test/built-ins/RegExp/named-groups/non-unicode-match.js new file mode 100644 index 0000000000..4e29bb24a4 --- /dev/null +++ b/test/built-ins/RegExp/named-groups/non-unicode-match.js @@ -0,0 +1,43 @@ +// Copyright 2017 the V8 project authors. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +description: Basic matching cases with non-Unicode groups +esid: pending +features: [regexp-named-groups] +includes: [compareArray.js] +---*/ + +assert(compareArray(["a", "a"], "bab".match(/(?a)/))); +assert(compareArray(["a", "a"], "bab".match(/(?a)/))); +assert(compareArray(["a", "a"], "bab".match(/(?<_>a)/))); +assert(compareArray(["a", "a"], "bab".match(/(?<$>a)/))); +assert(compareArray(["bab", "a"], "bab".match(/.(?<$>a)./))); +assert(compareArray(["bab", "a", "b"], "bab".match(/.(?a)(.)/))); +assert(compareArray(["bab", "a", "b"], "bab".match(/.(?a)(?.)/))); +assert(compareArray(["bab", "ab"], "bab".match(/.(?\w\w)/))); +assert(compareArray(["bab", "bab"], "bab".match(/(?\w\w\w)/))); +assert(compareArray(["bab", "ba", "b"], "bab".match(/(?\w\w)(?\w)/))); + +let {a, b, c} = /(?.)(?.)(?.)\k\k\k/.exec("abccba").groups; +assert.sameValue(a, "a"); +assert.sameValue(b, "b"); +assert.sameValue(c, "c"); + +assert(compareArray("bab".match(/(a)/), "bab".match(/(?a)/))); +assert(compareArray("bab".match(/(a)/), "bab".match(/(?a)/))); +assert(compareArray("bab".match(/(a)/), "bab".match(/(?<_>a)/))); +assert(compareArray("bab".match(/(a)/), "bab".match(/(?<$>a)/))); +assert(compareArray("bab".match(/.(a)./), "bab".match(/.(?<$>a)./))); +assert(compareArray("bab".match(/.(a)(.)/), "bab".match(/.(?a)(.)/))); +assert(compareArray("bab".match(/.(a)(.)/), "bab".match(/.(?a)(?.)/))); +assert(compareArray("bab".match(/.(\w\w)/), "bab".match(/.(?\w\w)/))); +assert(compareArray("bab".match(/(\w\w\w)/), "bab".match(/(?\w\w\w)/))); +assert(compareArray("bab".match(/(\w\w)(\w)/), "bab".match(/(?\w\w)(?\w)/))); + +assert(compareArray(["bab", "b"], "bab".match(/(?b).\1/))); +assert(compareArray(["baba", "b", "a"], "baba".match(/(.)(?a)\1\2/))); +assert(compareArray(["baba", "b", "a", "b", "a"], + "baba".match(/(.)(?a)(?\1)(\2)/))); +assert(compareArray(["<)a/))); +assert(compareArray([">a", ">"], ">a".match(/(?>)a/))); diff --git a/test/built-ins/RegExp/named-groups/non-unicode-property-names.js b/test/built-ins/RegExp/named-groups/non-unicode-property-names.js new file mode 100644 index 0000000000..fea50314a4 --- /dev/null +++ b/test/built-ins/RegExp/named-groups/non-unicode-property-names.js @@ -0,0 +1,45 @@ +// Copyright 2017 the V8 project authors. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +description: Exotic named group names in non-Unicode RegExps +esid: pending +features: [regexp-named-groups] +includes: [compareArray.js] +---*/ + +assert.sameValue("a", /(?<π>a)/.exec("bab").groups.π); +assert.throws(SyntaxError, () => eval('/(?<\\u{03C0}>a)/'), "\\u{} escapes allowed only in Unicode mode"); +assert.sameValue("a", /(?<π>a)/.exec("bab").groups.\u03C0); +assert.sameValue("a", /(?<$>a)/.exec("bab").groups.$); +assert.sameValue("a", /(?<_>a)/.exec("bab").groups._); +assert.throws(SyntaxError, () => eval('/(?<$𐒤>a)/'), "Individual surrogates not in ID_Continue); +assert.sameValue("a", /(?<_\u200C>a)/.exec("bab").groups._\u200C); +assert.sameValue("a", /(?<_\u200D>a)/.exec("bab").groups._\u200D); +assert.sameValue("a", /(?<ಠ_ಠ>a)/.exec("bab").groups.ಠ_ಠ); +assert.throws(SyntaxError, () => eval('/(?<❤>a)/')); +assert.throws(SyntaxError, () => eval('/(?<𐒤>a)/'), "Individual surrogate not in ID_Start."); + +// Unicode escapes in capture names. +assert.throws(SyntaxError, () => eval("/(?.)/")); +assert.throws(SyntaxError, () => eval("/(?.)/")); +assert.throws(SyntaxError, () => eval("/(?.)/")); +assert(/(?<\u0041>.)/.test("a")); +assert.throws(SyntaxError, () => eval("/(?.)/")); +assert.throws(SyntaxError, () => eval("/(?.)/")); +assert.throws(SyntaxError, () => eval("/(?.)/"), "Lea"); +assert.throws(SyntaxError, () => eval("/(?.)/"), "Trai"); +assert(RegExp("(?<\u{0041}>.)").test("a"), "Non-surrogate"); +assert(RegExp("(?.)").test("a"), "Surrogate, ID_Continue"); + +// Bracketed escapes are not allowed; +// 4-char escapes must be the proper ID_Start/ID_Continue +assert.throws(SyntaxError, () => eval("/(?.)/"), "Lead"); +assert.throws(SyntaxError, () => eval("/(?.)/"), "Trail"); +assert.throws(SyntaxError, () => eval("/(?<\\u{0041}>.)/"), "Non-surrogate"); +assert.throws(SyntaxError, () => eval("/(?.)/"), "Surrogate, ID_Continue"); +assert(RegExp("(?<\\u0041>.)").test("a"), "Non-surrogate"); + +// Backslash is not allowed as ID_Start and ID_Continue +assert.throws(SyntaxError, () => eval("/(?<\\>.)/"), "'\' misclassified as ID_Start"); +assert.throws(SyntaxError, () => eval("/(?.)/"), "'\' misclassified as ID_Continue"); diff --git a/test/built-ins/RegExp/named-groups/non-unicode-references.js b/test/built-ins/RegExp/named-groups/non-unicode-references.js new file mode 100644 index 0000000000..bbf7a0fa5a --- /dev/null +++ b/test/built-ins/RegExp/named-groups/non-unicode-references.js @@ -0,0 +1,34 @@ +// Copyright 2017 the V8 project authors. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +description: Named backreferences in non-Unicode RegExps +esid: pending +features: [regexp-named-groups] +includes: [compareArray.js] +---*/ + +// Named references. +assert(compareArray(["bab", "b"], "bab".match(/(?.).\k/))); +assert.sameValue(null, "baa".match(/(?.).\k/)); + +// Reference inside group. +assert(compareArray(["bab", "b"], "bab".match(/(?\k\w)../))); +assert.sameValue("b", "bab".match(/(?\k\w)../).groups.a); + +// Reference before group. +assert(compareArray(["bab", "b"], "bab".match(/\k(?b)\w\k/))); +assert.sameValue("b", "bab".match(/\k(?b)\w\k/).groups.a); +assert(compareArray(["bab", "b", "a"], "bab".match(/(?b)\k(?a)\k/))); +let {a, b} = "bab".match(/(?b)\k(?a)\k/).groups; +assert.sameValue(a, "a"); +assert.sameValue(b, "b"); + +assert(compareArray(["bab", "b"], "bab".match(/\k(?b)\w\k/))); +assert(compareArray(["bab", "b", "a"], "bab".match(/(?b)\k(?a)\k/))); + +// Reference properties. +assert.sameValue("a", /(?a)(?b)\k/.exec("aba").groups.a); +assert.sameValue("b", /(?a)(?b)\k/.exec("aba").groups.b); +assert.sameValue(undefined, /(?a)(?b)\k/.exec("aba").groups.c); +assert.sameValue(undefined, /(?a)(?b)\k|(?c)/.exec("aba").groups.c); diff --git a/test/built-ins/RegExp/named-groups/string-replace-get.js b/test/built-ins/RegExp/named-groups/string-replace-get.js new file mode 100644 index 0000000000..2234d99512 --- /dev/null +++ b/test/built-ins/RegExp/named-groups/string-replace-get.js @@ -0,0 +1,29 @@ +// Copyright 2017 the V8 project authors. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +description: Named substitutions are found by getting the property from the groups object +esid: sec-getsubstitution +features: [regexp-named-groups] +info: > + Runtime Semantics: GetSubstitution( matched, str, position, captures, namedCaptures, replacement ) + + Table: Replacement Text Symbol Substitutions + + Unicode Characters: $< + Replacement text: + 2. Otherwise, + c. Let capture be ? Get(namedCaptures, groupName). + d. If capture is undefined, replace the text through > with the empty string. + e. Otherwise, replace the text through this following > with ? ToString(capture). +---*/ + +let source = "(?.)(?.)|(?x)"; +for (let flags of ["g", "gu"]) { + let re = new RegExp(source, flags); + assert.sameValue("badc", "abcd".replace(re, "$$")); +} +for (let flags of ["", "u"]) { + let re = new RegExp(source, flags); + assert.sameValue("bacd", "abcd".replace(re, "$$")); +} diff --git a/test/built-ins/RegExp/named-groups/string-replace-missing.js b/test/built-ins/RegExp/named-groups/string-replace-missing.js new file mode 100644 index 0000000000..6d253568c2 --- /dev/null +++ b/test/built-ins/RegExp/named-groups/string-replace-missing.js @@ -0,0 +1,25 @@ +// Copyright 2017 the V8 project authors. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +description: SyntaxError is thrown for malformed replacements +esid: sec-getsubstitution +features: [regexp-named-groups] +info: > + Runtime Semantics: GetSubstitution( matched, str, position, captures, namedCaptures, replacement ) + + Table: Replacement Text Symbol Substitutions + + Unicode Characters: $< + Replacement text: + 2. Otherwise, + b. If ? HasProperty(namedCaptures, groupName) is false, throw a SyntaxError exception. +---*/ + +let source = "(?.)(?.)|(?x)"; +for (let flags of ["", "u", "g", "gu"]) { + let re = new RegExp(source, flags); + assert.throws(SyntaxError, () => "abcd".replace(re, "$<42$1>")); + assert.throws(SyntaxError, () => "abcd".replace(re, "$")); + assert.throws(SyntaxError, () => "abcd".replace(re, "$<$1>")); +} diff --git a/test/built-ins/RegExp/named-groups/string-replace-nocaptures.js b/test/built-ins/RegExp/named-groups/string-replace-nocaptures.js new file mode 100644 index 0000000000..03fccdc87a --- /dev/null +++ b/test/built-ins/RegExp/named-groups/string-replace-nocaptures.js @@ -0,0 +1,31 @@ +// Copyright 2017 the V8 project authors. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +description: If there are no named captures, don't replace $<> +esid: sec-getsubstitution +features: [regexp-named-groups] +info: > + Runtime Semantics: GetSubstitution( matched, str, position, captures, namedCaptures, replacement ) + + Table: Replacement Text Symbol Substitutions + + Unicode Characters: $< + Replacement text: + 1. If namedCaptures is undefined, the replacement text is the literal string $<. +---*/ + +// @@replace with a string replacement argument (no named captures). + +let source = "(.)(.)|(x)"; +for (let flags of ["", "u", "g", "gu"]) { + let re = new RegExp(source, flags); + assert.sameValue("$$cd", "abcd".replace(re, "$$")); + assert.sameValue("bacd", "abcd".replace(re, "$2$1")); + assert.sameValue("cd", "abcd".replace(re, "$3")); + assert.sameValue("$cd", "abcd".replace(re, "$<42$1>")); + assert.sameValue("$cd", "abcd".replace(re, "$")); + assert.sameValue("$cd", "abcd".replace(re, "$<$1>")); +} + diff --git a/test/built-ins/RegExp/named-groups/string-replace-numbered.js b/test/built-ins/RegExp/named-groups/string-replace-numbered.js new file mode 100644 index 0000000000..0750bd124c --- /dev/null +++ b/test/built-ins/RegExp/named-groups/string-replace-numbered.js @@ -0,0 +1,29 @@ +// Copyright 2017 the V8 project authors. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +description: Named groups may be accessed in their replacement string by number +esid: sec-getsubstitution +features: [regexp-named-groups] +info: > + Runtime Semantics: GetSubstitution( matched, str, position, captures, namedCaptures, replacement ) + + Table: Replacement Text Symbol Substitutions + + Unicode Characters: $n + Replacement text: + The nth element of captures, where n is a single digit in the range 1 to 9. If + n≤m and the nth element of captures is undefined, use the empty String instead. + If n>m, the result is implementation-defined. +---*/ + +let source = "(?.)(?.)|(?x)"; +for (let flags of ["g", "gu"]) { + let re = new RegExp(source, flags); + assert.sameValue("badc", "abcd".replace(re, "$2$1")); +} +for (let flags of ["", "u"]) { + let re = new RegExp(source, flags); + assert.sameValue("bacd", "abcd".replace(re, "$2$1")); +} + diff --git a/test/built-ins/RegExp/named-groups/string-replace-unclosed.js b/test/built-ins/RegExp/named-groups/string-replace-unclosed.js new file mode 100644 index 0000000000..588bc3ed85 --- /dev/null +++ b/test/built-ins/RegExp/named-groups/string-replace-unclosed.js @@ -0,0 +1,24 @@ +// Copyright 2017 the V8 project authors. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +description: SyntaxError is thrown for malformed replacements +esid: sec-getsubstitution +features: [regexp-named-groups] +info: > + Runtime Semantics: GetSubstitution( matched, str, position, captures, namedCaptures, replacement ) + + Table: Replacement Text Symbol Substitutions + + Unicode Characters: $< + Replacement text: + 2. Otherwise, + a. Scan until the next >, throwing a SyntaxError exception if one is not found, and let the enclosed substring be groupName. +---*/ + +let source = "(?.)(?.)|(?x)"; +for (let flags of ["", "u", "g", "gu"]) { + let re = new RegExp(source, flags); + assert.throws(SyntaxError, () => "abcd".replace(re, "$ + Runtime Semantics: GetSubstitution( matched, str, position, captures, namedCaptures, replacement ) + + Table: Replacement Text Symbol Substitutions + + Unicode Characters: $< + Replacement text: + 2. Otherwise, + c. Let capture be ? Get(namedCaptures, groupName). + d. If capture is undefined, replace the text through > with the empty string. +---*/ + +let source = "(?.)(?.)|(?x)"; +for (let flags of ["g", "gu"]) { + let re = new RegExp(source, flags); + assert.sameValue("", "abcd".replace(re, "$")); +} +for (let flags of ["", "u"]) { + let re = new RegExp(source, flags); + assert.sameValue("cd", "abcd".replace(re, "$")); +} diff --git a/test/built-ins/RegExp/named-groups/unicode-malformed.js b/test/built-ins/RegExp/named-groups/unicode-malformed.js new file mode 100644 index 0000000000..f0f08a8ed4 --- /dev/null +++ b/test/built-ins/RegExp/named-groups/unicode-malformed.js @@ -0,0 +1,27 @@ +// Copyright 2017 the V8 project authors. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +description: Various syntax errors for Unicode RegExps containing named groups +esid: pending +features: [regexp-named-groups] +---*/ + +assert.throws(SyntaxError, () => eval("/(?<>a)/u"), "Empty name"); +assert.throws(SyntaxError, () => eval("/(? eval("/(?<42a>a)/u"), "Name starting with digits"); +assert.throws(SyntaxError, () => eval("/(?<:a>a)/u"), "Name starting with invalid char"); +assert.throws(SyntaxError, () => eval("/(?a)/u"), "Name containing with invalid char"); +assert.throws(SyntaxError, () => eval("/(?a)(?a)/u"), "Duplicate name"); +assert.throws(SyntaxError, () => eval("/(?a)(?b)(?a)/u"), "Duplicate name"); +assert.throws(SyntaxError, () => eval("/\\k/u"), "Invalid reference"); +assert.throws(SyntaxError, () => eval("/\\k eval("/\\k/u"), "Lone \k"); +assert.throws(SyntaxError, () => eval("/(?.)\\k/u"), "Lone \k"); +assert.throws(SyntaxError, () => eval("/(?.)\\k eval("/(?.)\\k/u"), "Invalid reference"); +assert.throws(SyntaxError, () => eval("/(?a)\\k/u"), "Invalid reference"); +assert.throws(SyntaxError, () => eval("/(?a)\\k/u"), "Invalid reference"); +assert.throws(SyntaxError, () => eval("/\\k(?a)/u"), "Invalid reference"); +assert.throws(SyntaxError, () => eval("/(?\\a)/u"), "Identity escape in capture"); + diff --git a/test/built-ins/RegExp/named-groups/unicode-match.js b/test/built-ins/RegExp/named-groups/unicode-match.js new file mode 100644 index 0000000000..f840d4cfe2 --- /dev/null +++ b/test/built-ins/RegExp/named-groups/unicode-match.js @@ -0,0 +1,48 @@ +// Copyright 2017 the V8 project authors. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +description: Basic matching cases with Unicode groups +esid: pending +features: [regexp-named-groups] +includes: [compareArray.js] +---*/ + +assert(compareArray(["a", "a"], "bab".match(/(?a)/u))); +assert(compareArray(["a", "a"], "bab".match(/(?a)/u))); +assert(compareArray(["a", "a"], "bab".match(/(?<_>a)/u))); +assert(compareArray(["a", "a"], "bab".match(/(?<$>a)/u))); +assert(compareArray(["bab", "a"], "bab".match(/.(?<$>a)./u))); +assert(compareArray(["bab", "a", "b"], "bab".match(/.(?a)(.)/u))); +assert(compareArray(["bab", "a", "b"], "bab".match(/.(?a)(?.)/u))); +assert(compareArray(["bab", "ab"], "bab".match(/.(?\w\w)/u))); +assert(compareArray(["bab", "bab"], "bab".match(/(?\w\w\w)/u))); +assert(compareArray(["bab", "ba", "b"], "bab".match(/(?\w\w)(?\w)/u))); + +let {a, b, c} = /(?.)(?.)(?.)\k\k\k/u.exec("abccba").groups; +assert.sameValue(a, "a"); +assert.sameValue(b, "b"); +assert.sameValue(c, "c"); + +assert(compareArray("bab".match(/(a)/u), "bab".match(/(?a)/u))); +assert(compareArray("bab".match(/(a)/u), "bab".match(/(?a)/u))); +assert(compareArray("bab".match(/(a)/u), "bab".match(/(?<_>a)/u))); +assert(compareArray("bab".match(/(a)/u), "bab".match(/(?<$>a)/u))); +assert(compareArray("bab".match(/.(a)./u), "bab".match(/.(?<$>a)./u))); +assert(compareArray("bab".match(/.(a)(.)/u), "bab".match(/.(?a)(.)/u))); +assert(compareArray("bab".match(/.(a)(.)/u), "bab".match(/.(?a)(?.)/u))); +assert(compareArray("bab".match(/.(\w\w)/u), "bab".match(/.(?\w\w)/u))); +assert(compareArray("bab".match(/(\w\w\w)/u), "bab".match(/(?\w\w\w)/u))); +assert(compareArray("bab".match(/(\w\w)(\w)/u), "bab".match(/(?\w\w)(?\w)/u))); + +assert(compareArray(["bab", "b"], "bab".match(/(?b).\1/u))); +assert(compareArray(["baba", "b", "a"], "baba".match(/(.)(?a)\1\2/u))); +assert(compareArray(["baba", "b", "a", "b", "a"], + "baba".match(/(.)(?a)(?\1)(\2)/u))); +assert(compareArray(["<)a/u))); +assert(compareArray([">a", ">"], ">a".match(/(?>)a/u))); + +// Nested groups. +assert(compareArray(["bab", "bab", "ab", "b"], "bab".match(/(?.(?.(?.)))/u))); +assert(compareArray({a: "bab", b: "ab", c: "b"}, + "bab".match(/(?.(?.(?.)))/u).groups)); diff --git a/test/built-ins/RegExp/named-groups/unicode-property-names.js b/test/built-ins/RegExp/named-groups/unicode-property-names.js new file mode 100644 index 0000000000..149322bb2d --- /dev/null +++ b/test/built-ins/RegExp/named-groups/unicode-property-names.js @@ -0,0 +1,46 @@ +// Copyright 2017 the V8 project authors. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +description: Exotic named group names in Unicode RegExps +esid: pending +features: [regexp-named-groups] +---*/ + +assert.sameValue("a", /(?<π>a)/u.exec("bab").groups.π); +assert.sameValue("a", /(?<\u{03C0}>a)/u.exec("bab").groups.π); +assert.sameValue("a", /(?<π>a)/u.exec("bab").groups.\u03C0); +assert.sameValue("a", /(?<\u{03C0}>a)/u.exec("bab").groups.\u03C0); +assert.sameValue("a", /(?<$>a)/u.exec("bab").groups.$); +assert.sameValue("a", /(?<_>a)/u.exec("bab").groups._); +assert.sameValue("a", /(?<$𐒤>a)/u.exec("bab").groups.$𐒤); +assert.sameValue("a", /(?<_\u200C>a)/u.exec("bab").groups._\u200C); +assert.sameValue("a", /(?<_\u200D>a)/u.exec("bab").groups._\u200D); +assert.sameValue("a", /(?<ಠ_ಠ>a)/u.exec("bab").groups.ಠ_ಠ); +assert.throws(SyntaxError, () => eval('/(?<❤>a)/u')); +assert.throws(SyntaxError, () => eval('/(?<𐒤>a)/u'), "ID_Continue but not ID_Start."); + +// Unicode escapes in capture names. +assert(/(?.)/u.test("a"), "\\u Lead \\u Trail"); +assert.throws(SyntaxError, () => eval("/(?.)/u"), "\\u Lea"); +assert.throws(SyntaxError, () => eval("/(?.)/u"), "\\u Trai"); +assert(/(?<\u0041>.)/u.test("a"), "\\u NonSurrogate"); +assert(/(?<\u{0041}>.)/u.test("a"), "\\u{ Non-surrogate }"); +assert(/(?.)/u.test("a"), "\\u{ Surrogate, ID_Continue }"); +assert.throws(SyntaxError, () => eval("/(?.)/u"), "\\u{ Out-of-bounds "); +assert.throws(SyntaxError, () => eval("/(?.)/u"), "Lea"); +assert.throws(SyntaxError, () => eval("/(?.)/u"), "Trai"); +assert(RegExp("(?<\u{0041}>.)", "u").test("a"), "Non-surrogate"); +assert(RegExp("(?.)", "u").test("a"), "Surrogate,ID_Continue"); + +// Bracketed escapes are not allowed; +// 4-char escapes must be the proper ID_Start/ID_Continue +assert.throws(SyntaxError, () => eval("/(?.)/u"), "Lead"); +assert.throws(SyntaxError, () => eval("/(?.)/u"), "Trail"); +assert((/(?<\u{0041}>.)/u).test("a"), "Non-surrogate"); +assert(/(?.)/u.test("a"), "Surrogate, ID_Continue"); +assert(RegExp("(?<\\u0041>.)", "u").test("a"), "Non-surrogate"); + +// Backslash is not allowed as ID_Start and ID_Continue +assert.throws(SyntaxError, () => eval("/(?<\\>.)/u"), "'\' misclassified as ID_Start"); +assert.throws(SyntaxError, () => eval("/(?.)/u"), "'\' misclassified as ID_Continue"); diff --git a/test/built-ins/RegExp/named-groups/unicode-references.js b/test/built-ins/RegExp/named-groups/unicode-references.js new file mode 100644 index 0000000000..080bda0beb --- /dev/null +++ b/test/built-ins/RegExp/named-groups/unicode-references.js @@ -0,0 +1,47 @@ +// Copyright 2017 the V8 project authors. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +description: Named backreferences in Unicode RegExps +esid: sec-atomescape +info: > + The production AtomEscape :: [+N] k GroupName evaluates as follows: + + 1. Search the enclosing RegExp for an instance of a GroupSpecifier for an + RegExpIdentifierName which has a StringValue equal to the StringValue + of the RegExpIdentifierName contained in GroupName. + 2. Assert: A unique such GroupSpecifier is found. + 3. Let parenIndex be the number of left capturing parentheses in the entire + regular expression that occur to the left of the located GroupSpecifier. + This is the total number of times the Atom::(GroupSpecifierDisjunction) + production is expanded prior to that production's Term plus the total + number of Atom :: (GroupSpecifierDisjunction) productions enclosing this Term. + 4. Call BackreferenceMatcher(parenIndex) and return its Matcher result. +features: [regexp-named-groups] +includes: [compareArray.js] +---*/ + +// Named references. +assert(compareArray(["bab", "b"], "bab".match(/(?.).\k/u))); +assert.sameValue(null, "baa".match(/(?.).\k/u)); + +// Reference inside group. +assert(compareArray(["bab", "b"], "bab".match(/(?\k\w)../u))); +assert.sameValue("b", "bab".match(/(?\k\w)../u).groups.a); + +// Reference before group. +assert(compareArray(["bab", "b"], "bab".match(/\k(?b)\w\k/u))); +assert.sameValue("b", "bab".match(/\k(?b)\w\k/u).groups.a); +assert(compareArray(["bab", "b", "a"], "bab".match(/(?b)\k(?a)\k/u))); +let {a, b} = "bab".match(/(?b)\k(?a)\k/u).groups; +assert.sameValue(a, "a"); +assert.sameValue(b, "b"); + +assert(compareArray(["bab", "b"], "bab".match(/\k(?b)\w\k/))); +assert(compareArray(["bab", "b", "a"], "bab".match(/(?b)\k(?a)\k/))); + +// Reference properties. +assert.sameValue("a", /(?a)(?b)\k/u.exec("aba").groups.a); +assert.sameValue("b", /(?a)(?b)\k/u.exec("aba").groups.b); +assert.sameValue(undefined, /(?a)(?b)\k/u.exec("aba").groups.c); +assert.sameValue(undefined, /(?a)(?b)\k|(?c)/u.exec("aba").groups.c);