mirror of
https://github.com/tc39/test262.git
synced 2025-07-23 14:04:51 +02:00
Fix Regex Character Class Escape Tests
For each character class escape (\d, \D, \s, \S, \w, \W), check positive cases (the escape matches all characters it's supposed to match) and negative cases (the escape doesn't match any of the characters it should not match). Each of these checks is also done in Unicode mode and with the v flag. This uses regenerate.js from the unicode-property-escapes-tests repo to generate strings that contain exactly the characters that are supposed to be matched or not matched for each escape. Comparison is done with regex test instead of regex replace to optimize the tests. This is part of my work at the SYSTEMF lab at EPFL. Avoid modifying the regenerate library object prototype.
This commit is contained in:
parent
48bb262183
commit
05fbae4993
@ -1,5 +1,6 @@
|
|||||||
export default description => {
|
export default description => {
|
||||||
let header = `// Copyright (C) 2018 Leo Balter. All rights reserved.
|
let header = `// Copyright (C) 2018 Leo Balter. All rights reserved.
|
||||||
|
// Copyright (C) 2024 Aurèle Barrière. All rights reserved.
|
||||||
// This code is governed by the BSD license found in the LICENSE file.
|
// This code is governed by the BSD license found in the LICENSE file.
|
||||||
|
|
||||||
/*---
|
/*---
|
||||||
|
@ -7,15 +7,6 @@ import slugify from 'slugify';
|
|||||||
|
|
||||||
import header from './header.mjs';
|
import header from './header.mjs';
|
||||||
|
|
||||||
const patterns = {
|
|
||||||
'whitespace class escape': '\\s',
|
|
||||||
'non-whitespace class escape': '\\S',
|
|
||||||
'word class escape': '\\w',
|
|
||||||
'non-word class escape': '\\W',
|
|
||||||
'digit class escape': '\\d',
|
|
||||||
'non-digit class escape': '\\D',
|
|
||||||
};
|
|
||||||
|
|
||||||
// Pretty-printing code adapted from unicode-property-escapes-tests.
|
// Pretty-printing code adapted from unicode-property-escapes-tests.
|
||||||
// https://github.com/mathiasbynens/unicode-property-escapes-tests/blob/60f2dbec2b2a840ee67aa04dbd3449bb90fd2999/regenerate.js
|
// https://github.com/mathiasbynens/unicode-property-escapes-tests/blob/60f2dbec2b2a840ee67aa04dbd3449bb90fd2999/regenerate.js
|
||||||
|
|
||||||
@ -44,27 +35,23 @@ function toTestData(reg) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function prettyPrint([ loneCodePoints, ranges ]) {
|
function prettyPrint([ loneCodePoints, ranges ]) {
|
||||||
const indent = ' ';
|
const indent = ' '; // Test 262 uses two-space indents.
|
||||||
loneCodePoints = loneCodePoints.map((codePoint) => toHex(codePoint));
|
loneCodePoints = loneCodePoints.map((codePoint) => toHex(codePoint));
|
||||||
ranges = ranges.map(
|
ranges = ranges.map(
|
||||||
(range) => `[${ toHex(range[0]) }, ${ toHex(range[1]) }]`
|
(range) => `[${ toHex(range[0]) }, ${ toHex(range[1]) }]`
|
||||||
);
|
);
|
||||||
const loneCodePointsOutput = loneCodePoints.length ?
|
const loneCodePointsOutput = loneCodePoints.length ?
|
||||||
loneCodePoints.length === 1 ? `[${loneCodePoints[0]}]` :
|
`[\n${indent}${indent}${ loneCodePoints.join(`,\n${indent}${indent}`) }\n${indent}]` :
|
||||||
`[\n${indent}${indent}${ loneCodePoints.join(`,\n${indent}${indent}`) },\n${indent}]` :
|
|
||||||
`[]`;
|
`[]`;
|
||||||
const rangesOutput = ranges.length ?
|
const rangesOutput = ranges.length ?
|
||||||
`[\n${indent}${indent}${ ranges.join(`,\n${indent}${indent}`) },\n${indent}]` :
|
`[\n${indent}${indent}${ ranges.join(`,\n${indent}${indent}`) }\n${indent}]` :
|
||||||
`[]`;
|
`[]`;
|
||||||
return `{\n${indent}loneCodePoints: ${ loneCodePointsOutput },\n${indent}ranges: ${ rangesOutput },\n}`;
|
return `{\n${indent}loneCodePoints: ${ loneCodePointsOutput },\n${indent}ranges: ${ rangesOutput }\n}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
const LOW_SURROGATES = regenerate().addRange(0xDC00, 0xDFFF);
|
const LOW_SURROGATES = regenerate().addRange(0xDC00, 0xDFFF);
|
||||||
|
|
||||||
function buildString(escapeChar, flags) {
|
function toTestCode(escapeData) {
|
||||||
const isUnicode = flags.includes('u');
|
|
||||||
let escapeData = ESCAPE_SETS[isUnicode ? 'UNICODE' : 'REGULAR'].get(escapeChar);
|
|
||||||
|
|
||||||
const lowSurrogates = escapeData.clone().intersection(LOW_SURROGATES);
|
const lowSurrogates = escapeData.clone().intersection(LOW_SURROGATES);
|
||||||
if (lowSurrogates.data.length === 0) {
|
if (lowSurrogates.data.length === 0) {
|
||||||
return prettyPrint(toTestData(escapeData));
|
return prettyPrint(toTestData(escapeData));
|
||||||
@ -77,23 +64,94 @@ function buildString(escapeChar, flags) {
|
|||||||
return prettyPrint([ loneCodePoints, ranges ]);
|
return prettyPrint([ loneCodePoints, ranges ]);
|
||||||
}
|
}
|
||||||
|
|
||||||
function buildContent(desc, pattern, flags) {
|
// The different character class escapes.
|
||||||
let string = buildString(pattern[1], flags);
|
const patterns = {
|
||||||
|
's': 'whitespace class escape',
|
||||||
|
'S': 'non-whitespace class escape',
|
||||||
|
'w': 'word class escape',
|
||||||
|
'W': 'non-word class escape',
|
||||||
|
'd': 'digit class escape',
|
||||||
|
'D': 'non-digit class escape',
|
||||||
|
};
|
||||||
|
|
||||||
let content = header(`Compare range for ${desc} ${pattern} with flags ${flags}`);
|
const negation = {
|
||||||
|
's': 'S',
|
||||||
|
'S': 's',
|
||||||
|
'w': 'W',
|
||||||
|
'W': 'w',
|
||||||
|
'd': 'D',
|
||||||
|
'D': 'd',
|
||||||
|
}
|
||||||
|
|
||||||
|
// In each test file, test all these flag configurations.
|
||||||
|
const flags_configs = {
|
||||||
|
'standard': '',
|
||||||
|
'unicode': 'u',
|
||||||
|
'vflag': 'v',
|
||||||
|
}
|
||||||
|
|
||||||
|
// For each character class escape, test positive and negative cases.
|
||||||
|
const test_cases = [
|
||||||
|
{ positivity: true,
|
||||||
|
suffix: '-positive-cases' },
|
||||||
|
{ positivity: false,
|
||||||
|
suffix: '-negative-cases' },
|
||||||
|
]
|
||||||
|
|
||||||
|
function buildRegex(pattern, positivity) {
|
||||||
|
return positivity ? `^\\${pattern}+$` : `\\${pattern}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildRegexes(pattern, positivity) {
|
||||||
|
const regex = buildRegex(pattern, positivity);
|
||||||
|
let regStr = '';
|
||||||
|
for (const [regexname, flags] of Object.entries(flags_configs)) {
|
||||||
|
regStr += `const ${regexname} = /${regex}/${flags};\n`;
|
||||||
|
}
|
||||||
|
const allRegexes = Object.keys(flags_configs).toString();
|
||||||
|
regStr += `const regexes = [${allRegexes}];`;
|
||||||
|
return regStr;
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildString(pattern, positivity) {
|
||||||
|
const escape = positivity ? pattern : negation[pattern];
|
||||||
|
const escapeData = ESCAPE_SETS.UNICODE.get(escape);
|
||||||
|
return toTestCode(escapeData);
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildDescr(pattern, positivity) {
|
||||||
|
let name = patterns[pattern];
|
||||||
|
let descr = positivity ? 'Check positive cases of' : 'Check negative cases of';
|
||||||
|
return `${descr} ${name} \\${pattern}.`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildContent(pattern, positivity) {
|
||||||
|
|
||||||
|
let regexes = buildRegexes(pattern, positivity);
|
||||||
|
let string = buildString(pattern, positivity);
|
||||||
|
let descr = buildDescr(pattern, positivity);
|
||||||
|
let testNegate = positivity ? '!' : '';
|
||||||
|
let errMsg = positivity ? 'Expected full match, but did not match: ' :
|
||||||
|
'Expected no match, but matched: ';
|
||||||
|
|
||||||
|
let content = header(`${descr}`);
|
||||||
|
|
||||||
content += `
|
content += `
|
||||||
const str = buildString(${string});
|
const str = buildString(
|
||||||
|
${string}
|
||||||
|
);
|
||||||
|
|
||||||
const re = /${pattern}/${flags};
|
${regexes}
|
||||||
|
|
||||||
const errors = [];
|
const errors = [];
|
||||||
|
|
||||||
if (!re.test(str)) {
|
for (const regex of regexes) {
|
||||||
// Error, let's find out where
|
if (${testNegate}regex.test(str)) {
|
||||||
for (const char of str) {
|
// Error, let's find out where
|
||||||
if (!re.test(char)) {
|
for (const char of str) {
|
||||||
errors.push('0x' + char.codePointAt(0).toString(16));
|
if (${testNegate}regex.test(char)) {
|
||||||
|
errors.push('0x' + char.codePointAt(0).toString(16));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -101,7 +159,7 @@ if (!re.test(str)) {
|
|||||||
assert.sameValue(
|
assert.sameValue(
|
||||||
errors.length,
|
errors.length,
|
||||||
0,
|
0,
|
||||||
'Expected matching code points, but received: ' + errors.join(',')
|
'${errMsg}' + errors.join(',')
|
||||||
);
|
);
|
||||||
`;
|
`;
|
||||||
|
|
||||||
@ -114,40 +172,9 @@ function writeFile(desc, content, suffix = '') {
|
|||||||
fs.writeFileSync(filename, content);
|
fs.writeFileSync(filename, content);
|
||||||
}
|
}
|
||||||
|
|
||||||
// No additions
|
for (const [pattern, desc] of Object.entries(patterns)) {
|
||||||
for (const [desc, escape] of Object.entries(patterns)) {
|
test_cases.forEach(({positivity, suffix}) => {
|
||||||
[
|
const content = buildContent(pattern, positivity);
|
||||||
{
|
|
||||||
quantifier: '',
|
|
||||||
flags: '',
|
|
||||||
},
|
|
||||||
{
|
|
||||||
quantifier: '+',
|
|
||||||
flags: '',
|
|
||||||
suffix: '-plus-quantifier',
|
|
||||||
},
|
|
||||||
{
|
|
||||||
quantifier: '',
|
|
||||||
flags: 'u',
|
|
||||||
suffix: '-flags-u',
|
|
||||||
},
|
|
||||||
{
|
|
||||||
quantifier: '+',
|
|
||||||
flags: 'u',
|
|
||||||
suffix: '-plus-quantifier-flags-u',
|
|
||||||
},
|
|
||||||
].forEach(({quantifier, flags, suffix}) => {
|
|
||||||
flags += 'g';
|
|
||||||
|
|
||||||
const pattern = `${escape}${quantifier}`;
|
|
||||||
const range = rewritePattern(pattern, flags, {
|
|
||||||
unicodeFlag: flags.includes('u') ? 'transform' : false,
|
|
||||||
});
|
|
||||||
|
|
||||||
console.log(`${pattern} => ${range}, flags: ${flags}`);
|
|
||||||
|
|
||||||
const content = buildContent(desc, pattern, flags);
|
|
||||||
|
|
||||||
writeFile(desc, content, suffix);
|
writeFile(desc, content, suffix);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user