From 99be4228efcc0df79fef24621cdacb75853a75c1 Mon Sep 17 00:00:00 2001 From: Philip Chimento Date: Thu, 31 Oct 2024 17:49:21 -0700 Subject: [PATCH] Add files from bocoup/test262-regexp-generator Old upstream: https://github.com/bocoup/test262-regexp-generator --- tools/regexp-generator/LICENSE | 21 ++++++ tools/regexp-generator/README.md | 2 + tools/regexp-generator/header.js | 42 ++++++++++++ tools/regexp-generator/index.js | 102 ++++++++++++++++++++++++++++ tools/regexp-generator/package.json | 18 +++++ 5 files changed, 185 insertions(+) create mode 100644 tools/regexp-generator/LICENSE create mode 100644 tools/regexp-generator/README.md create mode 100644 tools/regexp-generator/header.js create mode 100644 tools/regexp-generator/index.js create mode 100644 tools/regexp-generator/package.json diff --git a/tools/regexp-generator/LICENSE b/tools/regexp-generator/LICENSE new file mode 100644 index 0000000000..6f3bc14776 --- /dev/null +++ b/tools/regexp-generator/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2018 Bocoup + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/tools/regexp-generator/README.md b/tools/regexp-generator/README.md new file mode 100644 index 0000000000..ab8da0b0d5 --- /dev/null +++ b/tools/regexp-generator/README.md @@ -0,0 +1,2 @@ +# test262-regexp-generator +Generete tests for RegExp based on unicode data diff --git a/tools/regexp-generator/header.js b/tools/regexp-generator/header.js new file mode 100644 index 0000000000..bedcf9d09a --- /dev/null +++ b/tools/regexp-generator/header.js @@ -0,0 +1,42 @@ +module.exports = description => { + let header = `// Copyright (C) 2018 Leo Balter. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +esid: prod-CharacterClassEscape +description: > + ${description} +info: | + This is a generated test. Please check out + https://github.com/bocoup/test262-regexp-generator + for any changes. + + CharacterClassEscape[U] :: + d + D + s + S + w + W + + 21.2.2.12 CharacterClassEscape + + The production CharacterClassEscape :: d evaluates as follows: + Return the ten-element set of characters containing the characters 0 through 9 inclusive. + The production CharacterClassEscape :: D evaluates as follows: + Return the set of all characters not included in the set returned by CharacterClassEscape :: d. + The production CharacterClassEscape :: s evaluates as follows: + Return the set of characters containing the characters that are on the right-hand side of + the WhiteSpace or LineTerminator productions. + The production CharacterClassEscape :: S evaluates as follows: + Return the set of all characters not included in the set returned by CharacterClassEscape :: s. + The production CharacterClassEscape :: w evaluates as follows: + Return the set of all characters returned by WordCharacters(). + The production CharacterClassEscape :: W evaluates as follows: + Return the set of all characters not included in the set returned by CharacterClassEscape :: w. +features: [String.fromCodePoint] +includes: [regExpUtils.js] +---*/\n`; + + return header; +}; diff --git a/tools/regexp-generator/index.js b/tools/regexp-generator/index.js new file mode 100644 index 0000000000..dd77b19135 --- /dev/null +++ b/tools/regexp-generator/index.js @@ -0,0 +1,102 @@ +const fs = require('fs'); +const rewritePattern = require('regexpu-core'); +const slugify = require('slugify'); +const filenamify = require('filenamify'); +const jsesc = require('jsesc'); +const header = require('./header'); + +const patterns = { + 'whitespace class escape': '\\s', + 'non-whitespace class escape': '\\S', + 'word class escape': '\\w', + 'non-word class escape': '\\W', + 'digit class escape': '\\d', + 'non-digit class escape': '\\D', +}; + +function buildContent(desc, pattern, range, max, flags, skip180e) { + let method; + let features = []; + + let content = header(`Compare range for ${desc} ${pattern} with flags ${flags}`); + + content += ` +const str = buildString({ loneCodePoints: [], ranges: [[0, ${ + jsesc(max, { numbers: 'hexadecimal' }) +}]] }); + +const re = /${pattern}/${flags}; +const matchingRange = /${range}/${flags}; + +const errors = []; + +function matching(str) { + return str.replace(re, '') === str.replace(matchingRange, ''); +} + +if (!matching(str)) { + // Error, let's find out where + for (const char of str) { + if (!matching(char)) { + errors.push('0x' + char.codePointAt(0).toString(16)); + } + } +} + +assert.sameValue( + errors.length, + 0, + 'Expected matching code points, but received: ' + errors.join(',') +); +`; + + return content; +} + +function writeFile(desc, content, suffix = '') { + const filename = `output/character-class-${slugify(filenamify(desc.toLowerCase()))}${suffix}.js`; + fs.writeFileSync(filename, content); +} + +// No additions +for (const [desc, escape] of Object.entries(patterns)) { + const skip180e = escape.toLowerCase().includes('s'); + [ + { + quantifier: '', + flags: '', + }, + { + quantifier: '+', + flags: '', + posCb(u) { return [u, u+u]}, + suffix: '-plus-quantifier', + }, + { + quantifier: '', + flags: 'u', + max: 0x10FFFF, + suffix: '-flags-u', + }, + { + quantifier: '+', + flags: 'u', + posCb(u) { return [u, u+u]}, + suffix: '-plus-quantifier-flags-u', + max: 0x10FFFF, + }, + ].forEach(({quantifier, max = 0xFFFF, flags, suffix, posCb = u => [u], negCb = u => [u]}) => { + flags += 'g'; + + const pattern = `${escape}${quantifier}`; + const range = rewritePattern(pattern, flags, { + useUnicodeFlag: flags.includes('u') + }); + + console.log(`${pattern} => ${range}, flags: ${flags}`); + + const content = buildContent(desc, pattern, range, max, flags, skip180e); + + writeFile(desc, content, suffix); + }); +} diff --git a/tools/regexp-generator/package.json b/tools/regexp-generator/package.json new file mode 100644 index 0000000000..57b661b8c0 --- /dev/null +++ b/tools/regexp-generator/package.json @@ -0,0 +1,18 @@ +{ + "name": "test262-regexp-class-escapes", + "version": "1.0.0", + "description": "", + "main": "index.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1", + "build": "node index.js" + }, + "author": "", + "license": "MIT", + "dependencies": { + "filenamify": "^2.1.0", + "jsesc": "^2.5.1", + "regexpu-core": "^4.2.0", + "slugify": "^1.3.0" + } +}