diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape-flags-u.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape-flags-u.js index de3c16f4e5..0820f1c1c0 100644 --- a/test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape-flags-u.js +++ b/test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape-flags-u.js @@ -40,7 +40,7 @@ includes: [regExpUtils.js] const str = buildString({ loneCodePoints: [], ranges: [ - [0x0030, 0x0039], + [0x000030, 0x000039], ], }); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape-plus-quantifier-flags-u.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape-plus-quantifier-flags-u.js index 1c6462e766..9108d26f04 100644 --- a/test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape-plus-quantifier-flags-u.js +++ b/test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape-plus-quantifier-flags-u.js @@ -40,7 +40,7 @@ includes: [regExpUtils.js] const str = buildString({ loneCodePoints: [], ranges: [ - [0x0030, 0x0039], + [0x000030, 0x000039], ], }); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape-plus-quantifier.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape-plus-quantifier.js index 66418758b7..32b0adb0e7 100644 --- a/test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape-plus-quantifier.js +++ b/test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape-plus-quantifier.js @@ -40,7 +40,7 @@ includes: [regExpUtils.js] const str = buildString({ loneCodePoints: [], ranges: [ - [0x0030, 0x0039], + [0x000030, 0x000039], ], }); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape.js index 090e5a7e9a..1124993ff2 100644 --- a/test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape.js +++ b/test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape.js @@ -40,7 +40,7 @@ includes: [regExpUtils.js] const str = buildString({ loneCodePoints: [], ranges: [ - [0x0030, 0x0039], + [0x000030, 0x000039], ], }); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-flags-u.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-flags-u.js index e9385c0004..ab304348b8 100644 --- a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-flags-u.js +++ b/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-flags-u.js @@ -40,8 +40,10 @@ includes: [regExpUtils.js] const str = buildString({ loneCodePoints: [], ranges: [ + [0x00DC00, 0x00DFFF], [0x000000, 0x00002F], - [0x00003A, 0x10FFFF], + [0x00003A, 0x00DBFF], + [0x00E000, 0x10FFFF], ], }); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-plus-quantifier-flags-u.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-plus-quantifier-flags-u.js index bf534de854..14ae451846 100644 --- a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-plus-quantifier-flags-u.js +++ b/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-plus-quantifier-flags-u.js @@ -38,11 +38,13 @@ includes: [regExpUtils.js] ---*/ const str = buildString({ - loneCodePoints: [], - ranges: [ - [0x000000, 0x00002F], - [0x00003A, 0x10FFFF], - ], + loneCodePoints: [], + ranges: [ + [0x00DC00, 0x00DFFF], + [0x000000, 0x00002F], + [0x00003A, 0x00DBFF], + [0x00E000, 0x10FFFF], + ], }); const re = /\D+/ug; diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-plus-quantifier.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-plus-quantifier.js index 0c2c703ae8..f1e626f151 100644 --- a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-plus-quantifier.js +++ b/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-plus-quantifier.js @@ -38,11 +38,13 @@ includes: [regExpUtils.js] ---*/ const str = buildString({ - loneCodePoints: [], - ranges: [ - [0x000000, 0x00002F], - [0x00003A, 0x00FFFF], - ], + loneCodePoints: [], + ranges: [ + [0x00DC00, 0x00DFFF], + [0x000000, 0x00002F], + [0x00003A, 0x00DBFF], + [0x00E000, 0x00FFFF], + ], }); const re = /\D+/g; diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape.js index ac626beb49..394ede390a 100644 --- a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape.js +++ b/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape.js @@ -38,11 +38,13 @@ includes: [regExpUtils.js] ---*/ const str = buildString({ - loneCodePoints: [], - ranges: [ - [0x000000, 0x00002F], - [0x00003A, 0x00FFFF], - ], + loneCodePoints: [], + ranges: [ + [0x00DC00, 0x00DFFF], + [0x000000, 0x00002F], + [0x00003A, 0x00DBFF], + [0x00E000, 0x00FFFF], + ], }); const re = /\D/g; diff --git a/tools/regexp-generator/index.mjs b/tools/regexp-generator/index.mjs index 0f664b7726..694e94a318 100644 --- a/tools/regexp-generator/index.mjs +++ b/tools/regexp-generator/index.mjs @@ -1,7 +1,8 @@ import filenamify from 'filenamify'; import fs from 'node:fs'; -import jsesc from 'jsesc'; +import regenerate from 'regenerate'; import rewritePattern from 'regexpu-core'; +import ESCAPE_SETS from 'regexpu-core/data/character-class-escape-sets.js'; import slugify from 'slugify'; import header from './header.mjs'; @@ -15,30 +16,85 @@ const patterns = { 'non-digit class escape': '\\D', }; +// Pretty-printing code adapted from unicode-property-escapes-tests. +// https://github.com/mathiasbynens/unicode-property-escapes-tests/blob/60f2dbec2b2a840ee67aa04dbd3449bb90fd2999/regenerate.js + +function toHex(codePoint) { + return '0x' + ('00000' + codePoint.toString(16).toUpperCase()).slice(-6); +}; + +function toTestData(reg) { + const data = reg.data; + // Iterate over the data per `(start, end)` pair. + let index = 0; + const length = data.length; + const loneCodePoints = []; + const ranges = []; + while (index < length) { + let start = data[index]; + let end = data[index + 1] - 1; // Note: the `- 1` makes `end` inclusive. + if (start == end) { + loneCodePoints.push(start); + } else { + ranges.push([start, end]); + } + index += 2; + } + return [ loneCodePoints, ranges ]; +} + +function prettyPrint([ loneCodePoints, ranges ]) { + const indent = ' '; + loneCodePoints = loneCodePoints.map((codePoint) => toHex(codePoint)); + ranges = ranges.map( + (range) => `[${ toHex(range[0]) }, ${ toHex(range[1]) }]` + ); + const loneCodePointsOutput = loneCodePoints.length ? + loneCodePoints.length === 1 ? `[${loneCodePoints[0]}]` : + `[\n${indent}${indent}${ loneCodePoints.join(`,\n${indent}${indent}`) },\n${indent}]` : + `[]`; + const rangesOutput = ranges.length ? + `[\n${indent}${indent}${ ranges.join(`,\n${indent}${indent}`) },\n${indent}]` : + `[]`; + return `{\n${indent}loneCodePoints: ${ loneCodePointsOutput },\n${indent}ranges: ${ rangesOutput },\n}`; +} + +const LOW_SURROGATES = regenerate().addRange(0xDC00, 0xDFFF); + +function buildString(escapeChar, flags) { + const isUnicode = flags.includes('u'); + let escapeData = ESCAPE_SETS[isUnicode ? 'UNICODE' : 'REGULAR'].get(escapeChar); + + const lowSurrogates = escapeData.clone().intersection(LOW_SURROGATES); + if (lowSurrogates.data.length === 0) { + return prettyPrint(toTestData(escapeData)); + } + const rest = escapeData.clone().remove(LOW_SURROGATES); + const [ lowLoneCodePoints, lowRanges ] = toTestData(lowSurrogates); + const [ loneCodePoints, ranges ] = toTestData(rest); + loneCodePoints.unshift(...lowLoneCodePoints); + ranges.unshift(...lowRanges); + return prettyPrint([ loneCodePoints, ranges ]); +} + function buildContent(desc, pattern, range, max, flags, skip180e) { + let string = buildString(pattern[1], flags); let method; let features = []; let content = header(`Compare range for ${desc} ${pattern} with flags ${flags}`); content += ` -const str = buildString({ loneCodePoints: [], ranges: [[0, ${ - jsesc(max, { numbers: 'hexadecimal' }) -}]] }); +const str = buildString(${string}); const re = /${pattern}/${flags}; -const matchingRange = /${range}/${flags}; const errors = []; -function matching(str) { - return str.replace(re, '') === str.replace(matchingRange, ''); -} - -if (!matching(str)) { +if (!re.test(str)) { // Error, let's find out where for (const char of str) { - if (!matching(char)) { + if (!re.test(char)) { errors.push('0x' + char.codePointAt(0).toString(16)); } } diff --git a/tools/regexp-generator/package.json b/tools/regexp-generator/package.json index cffae050be..e8c9e4b2f0 100644 --- a/tools/regexp-generator/package.json +++ b/tools/regexp-generator/package.json @@ -13,8 +13,8 @@ "license": "MIT", "devDependencies": { "filenamify": "^6.0.0", - "jsesc": "^3.0.2", "mkdirp": "^3.0.1", + "regenerate": "^1.4.2", "regexpu-core": "^6.1.1", "rimraf": "^6.0.1", "slugify": "^1.6.6"