diff --git a/test/intl402/Locale/likely-subtags-grandfathered.js b/test/intl402/Locale/likely-subtags-grandfathered.js index ebcd548ef2..8df1d2fb9d 100644 --- a/test/intl402/Locale/likely-subtags-grandfathered.js +++ b/test/intl402/Locale/likely-subtags-grandfathered.js @@ -108,7 +108,7 @@ for (const tag of regularGrandfatheredWithExtLang) { assert.throws(RangeError, () => new Intl.Locale(tag)); } -// Add constiants, extensions, and privateuse subtags to regular grandfathered +// Add variants, extensions, and privateuse subtags to regular grandfathered // language tags and ensure it produces the "expected" result. const extras = [ "fonipa", @@ -127,14 +127,71 @@ for (const {tag} of regularGrandfathered) { for (const extra of extras) { const loc = new Intl.Locale(tag + "-" + extra); - assert.sameValue(loc.maximize().toString(), tagMax + "-" + extra); - assert.sameValue(loc.maximize().maximize().toString(), tagMax + "-" + extra); + let canonical = tag + "-" + extra; + let canonicalMax = tagMax + "-" + extra; + let canonicalMin = tagMin + "-" + extra; - assert.sameValue(loc.minimize().toString(), tagMin + "-" + extra); - assert.sameValue(loc.minimize().minimize().toString(), tagMin + "-" + extra); + // Ensure the added variant subtag is correctly sorted in the canonical tag. + if (/^[a-z0-9]{5,8}|[0-9][a-z0-9]{3}$/i.test(extra)) { + const sorted = s => s.replace(/(-([a-z0-9]{5,8}|[0-9][a-z0-9]{3}))+$/i, + m => m.split("-").sort().join("-")); + canonical = sorted(canonical); + canonicalMax = sorted(canonicalMax); + canonicalMin = sorted(canonicalMin); + } - assert.sameValue(loc.maximize().minimize().toString(), tagMin + "-" + extra); - assert.sameValue(loc.minimize().maximize().toString(), tagMax + "-" + extra); + // Adding extra subtags to grandfathered tags can have "interesting" results. Take for + // example "art-lojban" when "fonipa" is added, so we get "art-lojban-fonipa". The first + // step when canonicalising the language tag is to bring it in 'canonical syntax', that + // means among other things sorting variants in alphabetical order. So "art-lojban-fonipa" + // is transformed to "art-fonipa-lojban", because "fonipa" is sorted before "lojban". And + // only after that has happened, we replace aliases with their preferred form. + // + // Now the usual problems arise when doing silly things like adding subtags to + // grandfathered subtags, nobody, neither RFC 5646 nor UTS 35, provides a clear description + // what needs to happen next. + // + // From : + // + // > If the BCP 47 primary language subtag matches the type attribute of a languageAlias + // > element in Supplemental Data, replace the language subtag with the replacement value. + // > 1. ... + // > 2. Five special deprecated grandfathered codes (such as i-default) are in type + // attributes, and are also replaced. + // > 3. ... + // + // So let's assume grandfathered tags are treated as 'primary language subtag' if and only + // if no additional subtags are present. Because in all other cases, we don't really have a + // grandfathered tag, but only some arbitrary combination of random subtags. + // + // Basically what we expect here is that only grandfathered without any additional subtags + // are canonicalised to their modern form and in all other cases they're left as is. + // + // Not all language tag processor will pass this test, for example because they don't order + // variant subtags in alphabetical order or they're too eager when detecting grandfathered + // tags. For example "zh-hakka-hakka" is accepted in some language tag processors, because + // the language tag starts with a prefix which matches a grandfathered tag, and that prefix + // is then canonicalised to "hak" and the second "hakka" is simply appended to it, so the + // resulting tag is "hak-hakka". This is clearly wrong as far as ECMA-402 compliance is + // concerned, because language tags are parsed and validated before any canonicalisation + // happens. And during the validation step an error should be emitted, because the input + // "zh-hakka-hakka" contains two identical variant subtags. + // + // From : + // + // > does not include duplicate variant subtags + // + // So, if your implementation fails this assertion, but you still like to test the rest of + // this file, a pull request to split this file seems the way to go! + assert.sameValue(loc.toString(), canonical); + + assert.sameValue(loc.maximize().toString(), canonicalMax); + assert.sameValue(loc.maximize().maximize().toString(), canonicalMax); + + assert.sameValue(loc.minimize().toString(), canonicalMin); + assert.sameValue(loc.minimize().minimize().toString(), canonicalMin); + + assert.sameValue(loc.maximize().minimize().toString(), canonicalMin); + assert.sameValue(loc.minimize().maximize().toString(), canonicalMax); } } -