Extract list of structurally invalid tags from invalid-tags.js.

This commit is contained in:
Ms2ger 2018-04-27 11:49:37 +02:00 committed by Rick Waldron
parent 3b7a456ddb
commit 190cf5c730
2 changed files with 61 additions and 52 deletions

View File

@ -162,6 +162,66 @@ function getLocaleSupportInfo(Constructor) {
} }
/**
* Returns an array of strings for which IsStructurallyValidLanguageTag() returns false
*/
function getInvalidLanguageTags() {
var invalidLanguageTags = [
"", // empty tag
"i", // singleton alone
"x", // private use without subtag
"u", // extension singleton in first place
"419", // region code in first place
"u-nu-latn-cu-bob", // extension sequence without language
"hans-cmn-cn", // "hans" could theoretically be a 4-letter language code,
// but those can't be followed by extlang codes.
"cmn-hans-cn-u-u", // duplicate singleton
"cmn-hans-cn-t-u-ca-u", // duplicate singleton
"de-gregory-gregory", // duplicate variant
"*", // language range
"de-*", // language range
"中文", // non-ASCII letters
"en-ß", // non-ASCII letters
"ıd", // non-ASCII letters
// underscores in different parts of the language tag
"de_DE",
"DE_de",
"cmn_Hans",
"cmn-hans_cn",
"es_419",
"es-419-u-nu-latn-cu_bob",
"i_klingon",
"cmn-hans-cn-t-ca-u-ca-x_t-u",
"enochian_enochian",
"de-gregory_u-ca-gregory",
"en\u0000", // null-terminator sequence
" en", // leading whitespace
"en ", // trailing whitespace
"it-IT-Latn", // country before script tag
"de-u", // incomplete Unicode extension sequences
"de-u-",
"de-u-ca-",
"de-u-ca-gregory-",
"si-x", // incomplete private-use tags
"x-",
"x-y-",
];
// make sure the data above is correct
for (var i = 0; i < invalidLanguageTags.length; ++i) {
var invalidTag = invalidLanguageTags[i];
assert(
!isCanonicalizedStructurallyValidLanguageTag(invalidTag),
"Test data \"" + invalidTag + "\" is a canonicalized and structurally valid language tag."
);
}
return invalidLanguageTags;
}
/** /**
* @description Tests whether locale is a String value representing a * @description Tests whether locale is a String value representing a
* structurally valid and canonicalized BCP 47 language tag, as defined in * structurally valid and canonicalized BCP 47 language tag, as defined in

View File

@ -21,58 +21,7 @@ info: |
includes: [testIntl.js] includes: [testIntl.js]
---*/ ---*/
var invalidLanguageTags = [ var invalidLanguageTags = getInvalidLanguageTags();
"", // empty tag
"i", // singleton alone
"x", // private use without subtag
"u", // extension singleton in first place
"419", // region code in first place
"u-nu-latn-cu-bob", // extension sequence without language
"hans-cmn-cn", // "hans" could theoretically be a 4-letter language code,
// but those can't be followed by extlang codes.
"cmn-hans-cn-u-u", // duplicate singleton
"cmn-hans-cn-t-u-ca-u", // duplicate singleton
"de-gregory-gregory", // duplicate variant
"*", // language range
"de-*", // language range
"中文", // non-ASCII letters
"en-ß", // non-ASCII letters
"ıd", // non-ASCII letters
// underscores in different parts of the language tag
"de_DE",
"DE_de",
"cmn_Hans",
"cmn-hans_cn",
"es_419",
"es-419-u-nu-latn-cu_bob",
"i_klingon",
"cmn-hans-cn-t-ca-u-ca-x_t-u",
"enochian_enochian",
"de-gregory_u-ca-gregory",
"en\u0000", // null-terminator sequence
" en", // leading whitespace
"en ", // trailing whitespace
"it-IT-Latn", // country before script tag
"de-u", // incomplete Unicode extension sequences
"de-u-",
"de-u-ca-",
"de-u-ca-gregory-",
"si-x", // incomplete private-use tags
"x-",
"x-y-",
];
// make sure the data above is correct
for (var i = 0; i < invalidLanguageTags.length; ++i) {
var invalidTag = invalidLanguageTags[i];
assert(
!isCanonicalizedStructurallyValidLanguageTag(invalidTag),
"Test data \"" + invalidTag + "\" is a canonicalized and structurally valid language tag."
);
}
for (var i = 0; i < invalidLanguageTags.length; ++i) { for (var i = 0; i < invalidLanguageTags.length; ++i) {
var invalidTag = invalidLanguageTags[i]; var invalidTag = invalidLanguageTags[i];
assert.throws(RangeError, function() { assert.throws(RangeError, function() {