mirror of
https://github.com/tc39/test262.git
synced 2025-07-23 05:55:36 +02:00
regexp-generator: Implement downstream changes
The optimizations from commit e558b29b were never incorporated into the upstream test generator. This does so now. As far as I can tell, the changes to the Unicode ranges are purely cosmetic. Some are formatted as 6-digit hex numbers instead of 4-digit. Others move the low-surrogates range 0xDC00-0xDCFF to the beginning of the array, but the union of the ranges is still the same.
This commit is contained in:
parent
879326855b
commit
07ddc3b41b
@ -40,7 +40,7 @@ includes: [regExpUtils.js]
|
|||||||
const str = buildString({
|
const str = buildString({
|
||||||
loneCodePoints: [],
|
loneCodePoints: [],
|
||||||
ranges: [
|
ranges: [
|
||||||
[0x0030, 0x0039],
|
[0x000030, 0x000039],
|
||||||
],
|
],
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -40,7 +40,7 @@ includes: [regExpUtils.js]
|
|||||||
const str = buildString({
|
const str = buildString({
|
||||||
loneCodePoints: [],
|
loneCodePoints: [],
|
||||||
ranges: [
|
ranges: [
|
||||||
[0x0030, 0x0039],
|
[0x000030, 0x000039],
|
||||||
],
|
],
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -40,7 +40,7 @@ includes: [regExpUtils.js]
|
|||||||
const str = buildString({
|
const str = buildString({
|
||||||
loneCodePoints: [],
|
loneCodePoints: [],
|
||||||
ranges: [
|
ranges: [
|
||||||
[0x0030, 0x0039],
|
[0x000030, 0x000039],
|
||||||
],
|
],
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -40,7 +40,7 @@ includes: [regExpUtils.js]
|
|||||||
const str = buildString({
|
const str = buildString({
|
||||||
loneCodePoints: [],
|
loneCodePoints: [],
|
||||||
ranges: [
|
ranges: [
|
||||||
[0x0030, 0x0039],
|
[0x000030, 0x000039],
|
||||||
],
|
],
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -40,8 +40,10 @@ includes: [regExpUtils.js]
|
|||||||
const str = buildString({
|
const str = buildString({
|
||||||
loneCodePoints: [],
|
loneCodePoints: [],
|
||||||
ranges: [
|
ranges: [
|
||||||
|
[0x00DC00, 0x00DFFF],
|
||||||
[0x000000, 0x00002F],
|
[0x000000, 0x00002F],
|
||||||
[0x00003A, 0x10FFFF],
|
[0x00003A, 0x00DBFF],
|
||||||
|
[0x00E000, 0x10FFFF],
|
||||||
],
|
],
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -38,11 +38,13 @@ includes: [regExpUtils.js]
|
|||||||
---*/
|
---*/
|
||||||
|
|
||||||
const str = buildString({
|
const str = buildString({
|
||||||
loneCodePoints: [],
|
loneCodePoints: [],
|
||||||
ranges: [
|
ranges: [
|
||||||
[0x000000, 0x00002F],
|
[0x00DC00, 0x00DFFF],
|
||||||
[0x00003A, 0x10FFFF],
|
[0x000000, 0x00002F],
|
||||||
],
|
[0x00003A, 0x00DBFF],
|
||||||
|
[0x00E000, 0x10FFFF],
|
||||||
|
],
|
||||||
});
|
});
|
||||||
|
|
||||||
const re = /\D+/ug;
|
const re = /\D+/ug;
|
||||||
|
@ -38,11 +38,13 @@ includes: [regExpUtils.js]
|
|||||||
---*/
|
---*/
|
||||||
|
|
||||||
const str = buildString({
|
const str = buildString({
|
||||||
loneCodePoints: [],
|
loneCodePoints: [],
|
||||||
ranges: [
|
ranges: [
|
||||||
[0x000000, 0x00002F],
|
[0x00DC00, 0x00DFFF],
|
||||||
[0x00003A, 0x00FFFF],
|
[0x000000, 0x00002F],
|
||||||
],
|
[0x00003A, 0x00DBFF],
|
||||||
|
[0x00E000, 0x00FFFF],
|
||||||
|
],
|
||||||
});
|
});
|
||||||
|
|
||||||
const re = /\D+/g;
|
const re = /\D+/g;
|
||||||
|
@ -38,11 +38,13 @@ includes: [regExpUtils.js]
|
|||||||
---*/
|
---*/
|
||||||
|
|
||||||
const str = buildString({
|
const str = buildString({
|
||||||
loneCodePoints: [],
|
loneCodePoints: [],
|
||||||
ranges: [
|
ranges: [
|
||||||
[0x000000, 0x00002F],
|
[0x00DC00, 0x00DFFF],
|
||||||
[0x00003A, 0x00FFFF],
|
[0x000000, 0x00002F],
|
||||||
],
|
[0x00003A, 0x00DBFF],
|
||||||
|
[0x00E000, 0x00FFFF],
|
||||||
|
],
|
||||||
});
|
});
|
||||||
|
|
||||||
const re = /\D/g;
|
const re = /\D/g;
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
import filenamify from 'filenamify';
|
import filenamify from 'filenamify';
|
||||||
import fs from 'node:fs';
|
import fs from 'node:fs';
|
||||||
import jsesc from 'jsesc';
|
import regenerate from 'regenerate';
|
||||||
import rewritePattern from 'regexpu-core';
|
import rewritePattern from 'regexpu-core';
|
||||||
|
import ESCAPE_SETS from 'regexpu-core/data/character-class-escape-sets.js';
|
||||||
import slugify from 'slugify';
|
import slugify from 'slugify';
|
||||||
|
|
||||||
import header from './header.mjs';
|
import header from './header.mjs';
|
||||||
@ -15,30 +16,85 @@ const patterns = {
|
|||||||
'non-digit class escape': '\\D',
|
'non-digit class escape': '\\D',
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Pretty-printing code adapted from unicode-property-escapes-tests.
|
||||||
|
// https://github.com/mathiasbynens/unicode-property-escapes-tests/blob/60f2dbec2b2a840ee67aa04dbd3449bb90fd2999/regenerate.js
|
||||||
|
|
||||||
|
function toHex(codePoint) {
|
||||||
|
return '0x' + ('00000' + codePoint.toString(16).toUpperCase()).slice(-6);
|
||||||
|
};
|
||||||
|
|
||||||
|
function toTestData(reg) {
|
||||||
|
const data = reg.data;
|
||||||
|
// Iterate over the data per `(start, end)` pair.
|
||||||
|
let index = 0;
|
||||||
|
const length = data.length;
|
||||||
|
const loneCodePoints = [];
|
||||||
|
const ranges = [];
|
||||||
|
while (index < length) {
|
||||||
|
let start = data[index];
|
||||||
|
let end = data[index + 1] - 1; // Note: the `- 1` makes `end` inclusive.
|
||||||
|
if (start == end) {
|
||||||
|
loneCodePoints.push(start);
|
||||||
|
} else {
|
||||||
|
ranges.push([start, end]);
|
||||||
|
}
|
||||||
|
index += 2;
|
||||||
|
}
|
||||||
|
return [ loneCodePoints, ranges ];
|
||||||
|
}
|
||||||
|
|
||||||
|
function prettyPrint([ loneCodePoints, ranges ]) {
|
||||||
|
const indent = ' ';
|
||||||
|
loneCodePoints = loneCodePoints.map((codePoint) => toHex(codePoint));
|
||||||
|
ranges = ranges.map(
|
||||||
|
(range) => `[${ toHex(range[0]) }, ${ toHex(range[1]) }]`
|
||||||
|
);
|
||||||
|
const loneCodePointsOutput = loneCodePoints.length ?
|
||||||
|
loneCodePoints.length === 1 ? `[${loneCodePoints[0]}]` :
|
||||||
|
`[\n${indent}${indent}${ loneCodePoints.join(`,\n${indent}${indent}`) },\n${indent}]` :
|
||||||
|
`[]`;
|
||||||
|
const rangesOutput = ranges.length ?
|
||||||
|
`[\n${indent}${indent}${ ranges.join(`,\n${indent}${indent}`) },\n${indent}]` :
|
||||||
|
`[]`;
|
||||||
|
return `{\n${indent}loneCodePoints: ${ loneCodePointsOutput },\n${indent}ranges: ${ rangesOutput },\n}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
const LOW_SURROGATES = regenerate().addRange(0xDC00, 0xDFFF);
|
||||||
|
|
||||||
|
function buildString(escapeChar, flags) {
|
||||||
|
const isUnicode = flags.includes('u');
|
||||||
|
let escapeData = ESCAPE_SETS[isUnicode ? 'UNICODE' : 'REGULAR'].get(escapeChar);
|
||||||
|
|
||||||
|
const lowSurrogates = escapeData.clone().intersection(LOW_SURROGATES);
|
||||||
|
if (lowSurrogates.data.length === 0) {
|
||||||
|
return prettyPrint(toTestData(escapeData));
|
||||||
|
}
|
||||||
|
const rest = escapeData.clone().remove(LOW_SURROGATES);
|
||||||
|
const [ lowLoneCodePoints, lowRanges ] = toTestData(lowSurrogates);
|
||||||
|
const [ loneCodePoints, ranges ] = toTestData(rest);
|
||||||
|
loneCodePoints.unshift(...lowLoneCodePoints);
|
||||||
|
ranges.unshift(...lowRanges);
|
||||||
|
return prettyPrint([ loneCodePoints, ranges ]);
|
||||||
|
}
|
||||||
|
|
||||||
function buildContent(desc, pattern, range, max, flags, skip180e) {
|
function buildContent(desc, pattern, range, max, flags, skip180e) {
|
||||||
|
let string = buildString(pattern[1], flags);
|
||||||
let method;
|
let method;
|
||||||
let features = [];
|
let features = [];
|
||||||
|
|
||||||
let content = header(`Compare range for ${desc} ${pattern} with flags ${flags}`);
|
let content = header(`Compare range for ${desc} ${pattern} with flags ${flags}`);
|
||||||
|
|
||||||
content += `
|
content += `
|
||||||
const str = buildString({ loneCodePoints: [], ranges: [[0, ${
|
const str = buildString(${string});
|
||||||
jsesc(max, { numbers: 'hexadecimal' })
|
|
||||||
}]] });
|
|
||||||
|
|
||||||
const re = /${pattern}/${flags};
|
const re = /${pattern}/${flags};
|
||||||
const matchingRange = /${range}/${flags};
|
|
||||||
|
|
||||||
const errors = [];
|
const errors = [];
|
||||||
|
|
||||||
function matching(str) {
|
if (!re.test(str)) {
|
||||||
return str.replace(re, '') === str.replace(matchingRange, '');
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!matching(str)) {
|
|
||||||
// Error, let's find out where
|
// Error, let's find out where
|
||||||
for (const char of str) {
|
for (const char of str) {
|
||||||
if (!matching(char)) {
|
if (!re.test(char)) {
|
||||||
errors.push('0x' + char.codePointAt(0).toString(16));
|
errors.push('0x' + char.codePointAt(0).toString(16));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -13,8 +13,8 @@
|
|||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"filenamify": "^6.0.0",
|
"filenamify": "^6.0.0",
|
||||||
"jsesc": "^3.0.2",
|
|
||||||
"mkdirp": "^3.0.1",
|
"mkdirp": "^3.0.1",
|
||||||
|
"regenerate": "^1.4.2",
|
||||||
"regexpu-core": "^6.1.1",
|
"regexpu-core": "^6.1.1",
|
||||||
"rimraf": "^6.0.1",
|
"rimraf": "^6.0.1",
|
||||||
"slugify": "^1.6.6"
|
"slugify": "^1.6.6"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user