mirror of
https://github.com/tc39/test262.git
synced 2025-04-08 19:35:28 +02:00
regexp-generator: Implement downstream changes
The optimizations from commit e558b29b were never incorporated into the upstream test generator. This does so now. As far as I can tell, the changes to the Unicode ranges are purely cosmetic. Some are formatted as 6-digit hex numbers instead of 4-digit. Others move the low-surrogates range 0xDC00-0xDCFF to the beginning of the array, but the union of the ranges is still the same.
This commit is contained in:
parent
879326855b
commit
07ddc3b41b
@ -40,7 +40,7 @@ includes: [regExpUtils.js]
|
||||
const str = buildString({
|
||||
loneCodePoints: [],
|
||||
ranges: [
|
||||
[0x0030, 0x0039],
|
||||
[0x000030, 0x000039],
|
||||
],
|
||||
});
|
||||
|
||||
|
@ -40,7 +40,7 @@ includes: [regExpUtils.js]
|
||||
const str = buildString({
|
||||
loneCodePoints: [],
|
||||
ranges: [
|
||||
[0x0030, 0x0039],
|
||||
[0x000030, 0x000039],
|
||||
],
|
||||
});
|
||||
|
||||
|
@ -40,7 +40,7 @@ includes: [regExpUtils.js]
|
||||
const str = buildString({
|
||||
loneCodePoints: [],
|
||||
ranges: [
|
||||
[0x0030, 0x0039],
|
||||
[0x000030, 0x000039],
|
||||
],
|
||||
});
|
||||
|
||||
|
@ -40,7 +40,7 @@ includes: [regExpUtils.js]
|
||||
const str = buildString({
|
||||
loneCodePoints: [],
|
||||
ranges: [
|
||||
[0x0030, 0x0039],
|
||||
[0x000030, 0x000039],
|
||||
],
|
||||
});
|
||||
|
||||
|
@ -40,8 +40,10 @@ includes: [regExpUtils.js]
|
||||
const str = buildString({
|
||||
loneCodePoints: [],
|
||||
ranges: [
|
||||
[0x00DC00, 0x00DFFF],
|
||||
[0x000000, 0x00002F],
|
||||
[0x00003A, 0x10FFFF],
|
||||
[0x00003A, 0x00DBFF],
|
||||
[0x00E000, 0x10FFFF],
|
||||
],
|
||||
});
|
||||
|
||||
|
@ -38,11 +38,13 @@ includes: [regExpUtils.js]
|
||||
---*/
|
||||
|
||||
const str = buildString({
|
||||
loneCodePoints: [],
|
||||
ranges: [
|
||||
[0x000000, 0x00002F],
|
||||
[0x00003A, 0x10FFFF],
|
||||
],
|
||||
loneCodePoints: [],
|
||||
ranges: [
|
||||
[0x00DC00, 0x00DFFF],
|
||||
[0x000000, 0x00002F],
|
||||
[0x00003A, 0x00DBFF],
|
||||
[0x00E000, 0x10FFFF],
|
||||
],
|
||||
});
|
||||
|
||||
const re = /\D+/ug;
|
||||
|
@ -38,11 +38,13 @@ includes: [regExpUtils.js]
|
||||
---*/
|
||||
|
||||
const str = buildString({
|
||||
loneCodePoints: [],
|
||||
ranges: [
|
||||
[0x000000, 0x00002F],
|
||||
[0x00003A, 0x00FFFF],
|
||||
],
|
||||
loneCodePoints: [],
|
||||
ranges: [
|
||||
[0x00DC00, 0x00DFFF],
|
||||
[0x000000, 0x00002F],
|
||||
[0x00003A, 0x00DBFF],
|
||||
[0x00E000, 0x00FFFF],
|
||||
],
|
||||
});
|
||||
|
||||
const re = /\D+/g;
|
||||
|
@ -38,11 +38,13 @@ includes: [regExpUtils.js]
|
||||
---*/
|
||||
|
||||
const str = buildString({
|
||||
loneCodePoints: [],
|
||||
ranges: [
|
||||
[0x000000, 0x00002F],
|
||||
[0x00003A, 0x00FFFF],
|
||||
],
|
||||
loneCodePoints: [],
|
||||
ranges: [
|
||||
[0x00DC00, 0x00DFFF],
|
||||
[0x000000, 0x00002F],
|
||||
[0x00003A, 0x00DBFF],
|
||||
[0x00E000, 0x00FFFF],
|
||||
],
|
||||
});
|
||||
|
||||
const re = /\D/g;
|
||||
|
@ -1,7 +1,8 @@
|
||||
import filenamify from 'filenamify';
|
||||
import fs from 'node:fs';
|
||||
import jsesc from 'jsesc';
|
||||
import regenerate from 'regenerate';
|
||||
import rewritePattern from 'regexpu-core';
|
||||
import ESCAPE_SETS from 'regexpu-core/data/character-class-escape-sets.js';
|
||||
import slugify from 'slugify';
|
||||
|
||||
import header from './header.mjs';
|
||||
@ -15,30 +16,85 @@ const patterns = {
|
||||
'non-digit class escape': '\\D',
|
||||
};
|
||||
|
||||
// Pretty-printing code adapted from unicode-property-escapes-tests.
|
||||
// https://github.com/mathiasbynens/unicode-property-escapes-tests/blob/60f2dbec2b2a840ee67aa04dbd3449bb90fd2999/regenerate.js
|
||||
|
||||
function toHex(codePoint) {
|
||||
return '0x' + ('00000' + codePoint.toString(16).toUpperCase()).slice(-6);
|
||||
};
|
||||
|
||||
function toTestData(reg) {
|
||||
const data = reg.data;
|
||||
// Iterate over the data per `(start, end)` pair.
|
||||
let index = 0;
|
||||
const length = data.length;
|
||||
const loneCodePoints = [];
|
||||
const ranges = [];
|
||||
while (index < length) {
|
||||
let start = data[index];
|
||||
let end = data[index + 1] - 1; // Note: the `- 1` makes `end` inclusive.
|
||||
if (start == end) {
|
||||
loneCodePoints.push(start);
|
||||
} else {
|
||||
ranges.push([start, end]);
|
||||
}
|
||||
index += 2;
|
||||
}
|
||||
return [ loneCodePoints, ranges ];
|
||||
}
|
||||
|
||||
function prettyPrint([ loneCodePoints, ranges ]) {
|
||||
const indent = ' ';
|
||||
loneCodePoints = loneCodePoints.map((codePoint) => toHex(codePoint));
|
||||
ranges = ranges.map(
|
||||
(range) => `[${ toHex(range[0]) }, ${ toHex(range[1]) }]`
|
||||
);
|
||||
const loneCodePointsOutput = loneCodePoints.length ?
|
||||
loneCodePoints.length === 1 ? `[${loneCodePoints[0]}]` :
|
||||
`[\n${indent}${indent}${ loneCodePoints.join(`,\n${indent}${indent}`) },\n${indent}]` :
|
||||
`[]`;
|
||||
const rangesOutput = ranges.length ?
|
||||
`[\n${indent}${indent}${ ranges.join(`,\n${indent}${indent}`) },\n${indent}]` :
|
||||
`[]`;
|
||||
return `{\n${indent}loneCodePoints: ${ loneCodePointsOutput },\n${indent}ranges: ${ rangesOutput },\n}`;
|
||||
}
|
||||
|
||||
const LOW_SURROGATES = regenerate().addRange(0xDC00, 0xDFFF);
|
||||
|
||||
function buildString(escapeChar, flags) {
|
||||
const isUnicode = flags.includes('u');
|
||||
let escapeData = ESCAPE_SETS[isUnicode ? 'UNICODE' : 'REGULAR'].get(escapeChar);
|
||||
|
||||
const lowSurrogates = escapeData.clone().intersection(LOW_SURROGATES);
|
||||
if (lowSurrogates.data.length === 0) {
|
||||
return prettyPrint(toTestData(escapeData));
|
||||
}
|
||||
const rest = escapeData.clone().remove(LOW_SURROGATES);
|
||||
const [ lowLoneCodePoints, lowRanges ] = toTestData(lowSurrogates);
|
||||
const [ loneCodePoints, ranges ] = toTestData(rest);
|
||||
loneCodePoints.unshift(...lowLoneCodePoints);
|
||||
ranges.unshift(...lowRanges);
|
||||
return prettyPrint([ loneCodePoints, ranges ]);
|
||||
}
|
||||
|
||||
function buildContent(desc, pattern, range, max, flags, skip180e) {
|
||||
let string = buildString(pattern[1], flags);
|
||||
let method;
|
||||
let features = [];
|
||||
|
||||
let content = header(`Compare range for ${desc} ${pattern} with flags ${flags}`);
|
||||
|
||||
content += `
|
||||
const str = buildString({ loneCodePoints: [], ranges: [[0, ${
|
||||
jsesc(max, { numbers: 'hexadecimal' })
|
||||
}]] });
|
||||
const str = buildString(${string});
|
||||
|
||||
const re = /${pattern}/${flags};
|
||||
const matchingRange = /${range}/${flags};
|
||||
|
||||
const errors = [];
|
||||
|
||||
function matching(str) {
|
||||
return str.replace(re, '') === str.replace(matchingRange, '');
|
||||
}
|
||||
|
||||
if (!matching(str)) {
|
||||
if (!re.test(str)) {
|
||||
// Error, let's find out where
|
||||
for (const char of str) {
|
||||
if (!matching(char)) {
|
||||
if (!re.test(char)) {
|
||||
errors.push('0x' + char.codePointAt(0).toString(16));
|
||||
}
|
||||
}
|
||||
|
@ -13,8 +13,8 @@
|
||||
"license": "MIT",
|
||||
"devDependencies": {
|
||||
"filenamify": "^6.0.0",
|
||||
"jsesc": "^3.0.2",
|
||||
"mkdirp": "^3.0.1",
|
||||
"regenerate": "^1.4.2",
|
||||
"regexpu-core": "^6.1.1",
|
||||
"rimraf": "^6.0.1",
|
||||
"slugify": "^1.6.6"
|
||||
|
Loading…
x
Reference in New Issue
Block a user