mirror of https://github.com/tc39/test262.git
Sort variants added to grandfathered tag
And add an explanation for a previously removed and now re-added assertion.
This commit is contained in:
parent
c596d9674e
commit
589ef945fa
|
@ -108,7 +108,7 @@ for (const tag of regularGrandfatheredWithExtLang) {
|
|||
assert.throws(RangeError, () => new Intl.Locale(tag));
|
||||
}
|
||||
|
||||
// Add constiants, extensions, and privateuse subtags to regular grandfathered
|
||||
// Add variants, extensions, and privateuse subtags to regular grandfathered
|
||||
// language tags and ensure it produces the "expected" result.
|
||||
const extras = [
|
||||
"fonipa",
|
||||
|
@ -127,14 +127,71 @@ for (const {tag} of regularGrandfathered) {
|
|||
for (const extra of extras) {
|
||||
const loc = new Intl.Locale(tag + "-" + extra);
|
||||
|
||||
assert.sameValue(loc.maximize().toString(), tagMax + "-" + extra);
|
||||
assert.sameValue(loc.maximize().maximize().toString(), tagMax + "-" + extra);
|
||||
let canonical = tag + "-" + extra;
|
||||
let canonicalMax = tagMax + "-" + extra;
|
||||
let canonicalMin = tagMin + "-" + extra;
|
||||
|
||||
assert.sameValue(loc.minimize().toString(), tagMin + "-" + extra);
|
||||
assert.sameValue(loc.minimize().minimize().toString(), tagMin + "-" + extra);
|
||||
// Ensure the added variant subtag is correctly sorted in the canonical tag.
|
||||
if (/^[a-z0-9]{5,8}|[0-9][a-z0-9]{3}$/i.test(extra)) {
|
||||
const sorted = s => s.replace(/(-([a-z0-9]{5,8}|[0-9][a-z0-9]{3}))+$/i,
|
||||
m => m.split("-").sort().join("-"));
|
||||
canonical = sorted(canonical);
|
||||
canonicalMax = sorted(canonicalMax);
|
||||
canonicalMin = sorted(canonicalMin);
|
||||
}
|
||||
|
||||
assert.sameValue(loc.maximize().minimize().toString(), tagMin + "-" + extra);
|
||||
assert.sameValue(loc.minimize().maximize().toString(), tagMax + "-" + extra);
|
||||
// Adding extra subtags to grandfathered tags can have "interesting" results. Take for
|
||||
// example "art-lojban" when "fonipa" is added, so we get "art-lojban-fonipa". The first
|
||||
// step when canonicalising the language tag is to bring it in 'canonical syntax', that
|
||||
// means among other things sorting variants in alphabetical order. So "art-lojban-fonipa"
|
||||
// is transformed to "art-fonipa-lojban", because "fonipa" is sorted before "lojban". And
|
||||
// only after that has happened, we replace aliases with their preferred form.
|
||||
//
|
||||
// Now the usual problems arise when doing silly things like adding subtags to
|
||||
// grandfathered subtags, nobody, neither RFC 5646 nor UTS 35, provides a clear description
|
||||
// what needs to happen next.
|
||||
//
|
||||
// From <http://unicode.org/reports/tr35/#Language_Tag_to_Locale_Identifier>:
|
||||
//
|
||||
// > If the BCP 47 primary language subtag matches the type attribute of a languageAlias
|
||||
// > element in Supplemental Data, replace the language subtag with the replacement value.
|
||||
// > 1. ...
|
||||
// > 2. Five special deprecated grandfathered codes (such as i-default) are in type
|
||||
// attributes, and are also replaced.
|
||||
// > 3. ...
|
||||
//
|
||||
// So let's assume grandfathered tags are treated as 'primary language subtag' if and only
|
||||
// if no additional subtags are present. Because in all other cases, we don't really have a
|
||||
// grandfathered tag, but only some arbitrary combination of random subtags.
|
||||
//
|
||||
// Basically what we expect here is that only grandfathered without any additional subtags
|
||||
// are canonicalised to their modern form and in all other cases they're left as is.
|
||||
//
|
||||
// Not all language tag processor will pass this test, for example because they don't order
|
||||
// variant subtags in alphabetical order or they're too eager when detecting grandfathered
|
||||
// tags. For example "zh-hakka-hakka" is accepted in some language tag processors, because
|
||||
// the language tag starts with a prefix which matches a grandfathered tag, and that prefix
|
||||
// is then canonicalised to "hak" and the second "hakka" is simply appended to it, so the
|
||||
// resulting tag is "hak-hakka". This is clearly wrong as far as ECMA-402 compliance is
|
||||
// concerned, because language tags are parsed and validated before any canonicalisation
|
||||
// happens. And during the validation step an error should be emitted, because the input
|
||||
// "zh-hakka-hakka" contains two identical variant subtags.
|
||||
//
|
||||
// From <https://tc39.es/ecma402/#sec-isstructurallyvalidlanguagetag>:
|
||||
//
|
||||
// > does not include duplicate variant subtags
|
||||
//
|
||||
// So, if your implementation fails this assertion, but you still like to test the rest of
|
||||
// this file, a pull request to split this file seems the way to go!
|
||||
assert.sameValue(loc.toString(), canonical);
|
||||
|
||||
assert.sameValue(loc.maximize().toString(), canonicalMax);
|
||||
assert.sameValue(loc.maximize().maximize().toString(), canonicalMax);
|
||||
|
||||
assert.sameValue(loc.minimize().toString(), canonicalMin);
|
||||
assert.sameValue(loc.minimize().minimize().toString(), canonicalMin);
|
||||
|
||||
assert.sameValue(loc.maximize().minimize().toString(), canonicalMin);
|
||||
assert.sameValue(loc.minimize().maximize().toString(), canonicalMax);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue