mirror of
				https://github.com/tc39/test262.git
				synced 2025-10-25 09:43:57 +02:00 
			
		
		
		
	For each character class escape (\d, \D, \s, \S, \w, \W), check positive cases (the escape matches all characters it's supposed to match) and negative cases (the escape doesn't match any of the characters it should not match). Each of these checks is also done in Unicode mode and with the v flag. This uses regenerate.js from the unicode-property-escapes-tests repo to generate strings that contain exactly the characters that are supposed to be matched or not matched for each escape. Comparison is done with regex test instead of regex replace to optimize the tests. This is part of my work at the SYSTEMF lab at EPFL. Avoid modifying the regenerate library object prototype.
		
			
				
	
	
		
			181 lines
		
	
	
		
			5.1 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			181 lines
		
	
	
		
			5.1 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
| import filenamify from 'filenamify';
 | |
| import fs from 'node:fs';
 | |
| import regenerate from 'regenerate';
 | |
| import rewritePattern from 'regexpu-core';
 | |
| import ESCAPE_SETS from 'regexpu-core/data/character-class-escape-sets.js';
 | |
| import slugify from 'slugify';
 | |
| 
 | |
| import header from './header.mjs';
 | |
| 
 | |
| // Pretty-printing code adapted from unicode-property-escapes-tests.
 | |
| // https://github.com/mathiasbynens/unicode-property-escapes-tests/blob/60f2dbec2b2a840ee67aa04dbd3449bb90fd2999/regenerate.js
 | |
| 
 | |
| function toHex(codePoint) {
 | |
|   return '0x' + ('00000' + codePoint.toString(16).toUpperCase()).slice(-6);
 | |
| };
 | |
| 
 | |
| function toTestData(reg) {
 | |
|   const data = reg.data;
 | |
|   // Iterate over the data per `(start, end)` pair.
 | |
|   let index = 0;
 | |
|   const length = data.length;
 | |
|   const loneCodePoints = [];
 | |
|   const ranges = [];
 | |
|   while (index < length) {
 | |
|     let start = data[index];
 | |
|     let end = data[index + 1] - 1; // Note: the `- 1` makes `end` inclusive.
 | |
|     if (start == end) {
 | |
|       loneCodePoints.push(start);
 | |
|     } else {
 | |
|       ranges.push([start, end]);
 | |
|     }
 | |
|     index += 2;
 | |
|   }
 | |
|   return [ loneCodePoints, ranges ];
 | |
| }
 | |
| 
 | |
| function prettyPrint([ loneCodePoints, ranges ]) {
 | |
|   const indent = '  '; // Test 262 uses two-space indents.
 | |
|   loneCodePoints = loneCodePoints.map((codePoint) => toHex(codePoint));
 | |
|   ranges = ranges.map(
 | |
|     (range) => `[${ toHex(range[0]) }, ${ toHex(range[1]) }]`
 | |
|   );
 | |
|   const loneCodePointsOutput = loneCodePoints.length ?
 | |
|     `[\n${indent}${indent}${ loneCodePoints.join(`,\n${indent}${indent}`) }\n${indent}]` :
 | |
|     `[]`;
 | |
|   const rangesOutput = ranges.length ?
 | |
|     `[\n${indent}${indent}${ ranges.join(`,\n${indent}${indent}`) }\n${indent}]` :
 | |
|     `[]`;
 | |
|   return `{\n${indent}loneCodePoints: ${ loneCodePointsOutput },\n${indent}ranges: ${ rangesOutput }\n}`;
 | |
| }
 | |
| 
 | |
| const LOW_SURROGATES = regenerate().addRange(0xDC00, 0xDFFF);
 | |
| 
 | |
| function toTestCode(escapeData) {
 | |
|   const lowSurrogates = escapeData.clone().intersection(LOW_SURROGATES);
 | |
|   if (lowSurrogates.data.length === 0) {
 | |
|     return prettyPrint(toTestData(escapeData));
 | |
|   }
 | |
|   const rest = escapeData.clone().remove(LOW_SURROGATES);
 | |
|   const [ lowLoneCodePoints, lowRanges ] = toTestData(lowSurrogates);
 | |
|   const [ loneCodePoints, ranges ] = toTestData(rest);
 | |
|   loneCodePoints.unshift(...lowLoneCodePoints);
 | |
|   ranges.unshift(...lowRanges);
 | |
|   return prettyPrint([ loneCodePoints, ranges ]);
 | |
| }
 | |
| 
 | |
| // The different character class escapes.
 | |
| const patterns = {
 | |
|   's': 'whitespace class escape',
 | |
|   'S': 'non-whitespace class escape',
 | |
|   'w': 'word class escape',
 | |
|   'W': 'non-word class escape',
 | |
|   'd': 'digit class escape',
 | |
|   'D': 'non-digit class escape',
 | |
| };
 | |
| 
 | |
| const negation = {
 | |
|   's': 'S',
 | |
|   'S': 's',
 | |
|   'w': 'W',
 | |
|   'W': 'w',
 | |
|   'd': 'D',
 | |
|   'D': 'd',
 | |
| }
 | |
| 
 | |
| // In each test file, test all these flag configurations.
 | |
| const flags_configs = {
 | |
|   'standard': '',
 | |
|   'unicode': 'u',
 | |
|   'vflag': 'v',
 | |
| }
 | |
| 
 | |
| // For each character class escape, test positive and negative cases.
 | |
| const test_cases = [
 | |
|   { positivity: true,
 | |
|     suffix: '-positive-cases' },
 | |
|   { positivity: false,
 | |
|     suffix: '-negative-cases' },
 | |
| ]
 | |
| 
 | |
| function buildRegex(pattern, positivity) {
 | |
|   return positivity ? `^\\${pattern}+$` : `\\${pattern}`;
 | |
| }
 | |
| 
 | |
| function buildRegexes(pattern, positivity) {
 | |
|   const regex = buildRegex(pattern, positivity);
 | |
|   let regStr = '';
 | |
|   for (const [regexname, flags] of Object.entries(flags_configs)) {
 | |
|     regStr += `const ${regexname} = /${regex}/${flags};\n`;
 | |
|   }
 | |
|   const allRegexes = Object.keys(flags_configs).toString();
 | |
|   regStr += `const regexes = [${allRegexes}];`;
 | |
|   return regStr;
 | |
| }
 | |
| 
 | |
| function buildString(pattern, positivity) {
 | |
|   const escape = positivity ? pattern : negation[pattern];
 | |
|   const escapeData = ESCAPE_SETS.UNICODE.get(escape);
 | |
|   return toTestCode(escapeData);
 | |
| }
 | |
| 
 | |
| function buildDescr(pattern, positivity) {
 | |
|   let name = patterns[pattern];
 | |
|   let descr = positivity ? 'Check positive cases of' : 'Check negative cases of';
 | |
|   return `${descr} ${name} \\${pattern}.`;
 | |
| }
 | |
| 
 | |
| function buildContent(pattern, positivity) {
 | |
| 
 | |
|   let regexes = buildRegexes(pattern, positivity);
 | |
|   let string = buildString(pattern, positivity);
 | |
|   let descr = buildDescr(pattern, positivity);
 | |
|   let testNegate = positivity ? '!' : '';
 | |
|   let errMsg = positivity ? 'Expected full match, but did not match: ' :
 | |
|   'Expected no match, but matched: ';
 | |
| 
 | |
|   let content = header(`${descr}`);
 | |
| 
 | |
|   content += `
 | |
| const str = buildString(
 | |
| ${string}
 | |
| );
 | |
| 
 | |
| ${regexes}
 | |
| 
 | |
| const errors = [];
 | |
| 
 | |
| for (const regex of regexes) {
 | |
|   if (${testNegate}regex.test(str)) {
 | |
|     // Error, let's find out where
 | |
|     for (const char of str) {
 | |
|       if (${testNegate}regex.test(char)) {
 | |
|         errors.push('0x' + char.codePointAt(0).toString(16));
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| assert.sameValue(
 | |
|   errors.length,
 | |
|   0,
 | |
|   '${errMsg}' + errors.join(',')
 | |
| );
 | |
| `;
 | |
| 
 | |
|     return content;
 | |
| }
 | |
| 
 | |
| function writeFile(desc, content, suffix = '') {
 | |
|   const outPath = '../../test/built-ins/RegExp/CharacterClassEscapes';
 | |
|   const filename = `${outPath}/character-class-${slugify(filenamify(desc.toLowerCase()))}${suffix}.js`;
 | |
|   fs.writeFileSync(filename, content);
 | |
| }
 | |
| 
 | |
| for (const [pattern, desc] of Object.entries(patterns)) {
 | |
|   test_cases.forEach(({positivity, suffix}) => {
 | |
|     const content = buildContent(pattern, positivity);
 | |
|     writeFile(desc, content, suffix);
 | |
|   });
 | |
| }
 |