Sort variants added to grandfathered tag

And add an explanation for a previously removed and now re-added assertion.
2025-07-25 15:04:43 +02:00 · 2019-07-22 02:12:21 -07:00 · 2019-07-22 02:12:21 -07:00 · 589ef945fa
commit 589ef945fa
parent c596d9674e
1 changed files with 65 additions and 8 deletions
--- a/test/intl402/Locale/likely-subtags-grandfathered.js
+++ b/test/intl402/Locale/likely-subtags-grandfathered.js
@ -108,7 +108,7 @@ for (const tag of regularGrandfatheredWithExtLang) {
    assert.throws(RangeError, () => new Intl.Locale(tag));
 }
-// Add constiants, extensions, and privateuse subtags to regular grandfathered
+// Add variants, extensions, and privateuse subtags to regular grandfathered
 // language tags and ensure it produces the "expected" result.
 const extras = [
    "fonipa",
@ -127,14 +127,71 @@ for (const {tag} of regularGrandfathered) {
    for (const extra of extras) {
        const loc = new Intl.Locale(tag + "-" + extra);
-        assert.sameValue(loc.maximize().toString(), tagMax + "-" + extra);
+        let canonical = tag + "-" + extra;
-        assert.sameValue(loc.maximize().maximize().toString(), tagMax + "-" + extra);
+        let canonicalMax = tagMax + "-" + extra;
        let canonicalMin = tagMin + "-" + extra;
-        assert.sameValue(loc.minimize().toString(), tagMin + "-" + extra);
+        // Ensure the added variant subtag is correctly sorted in the canonical tag.
-        assert.sameValue(loc.minimize().minimize().toString(), tagMin + "-" + extra);
+        if (/^[a-z0-9]{5,8}|[0-9][a-z0-9]{3}$/i.test(extra)) {
-
+            const sorted = s => s.replace(/(-([a-z0-9]{5,8}|[0-9][a-z0-9]{3}))+$/i,
-        assert.sameValue(loc.maximize().minimize().toString(), tagMin + "-" + extra);
+                                          m => m.split("-").sort().join("-"));
-        assert.sameValue(loc.minimize().maximize().toString(), tagMax + "-" + extra);
+            canonical = sorted(canonical);
-    }
+            canonicalMax = sorted(canonicalMax);
            canonicalMin = sorted(canonicalMin);
        }
        // Adding extra subtags to grandfathered tags can have "interesting" results. Take for
        // example "art-lojban" when "fonipa" is added, so we get "art-lojban-fonipa". The first
        // step when canonicalising the language tag is to bring it in 'canonical syntax', that
        // means among other things sorting variants in alphabetical order. So "art-lojban-fonipa"
        // is transformed to "art-fonipa-lojban", because "fonipa" is sorted before "lojban". And
        // only after that has happened, we replace aliases with their preferred form.
        //
        // Now the usual problems arise when doing silly things like adding subtags to
        // grandfathered subtags, nobody, neither RFC 5646 nor UTS 35, provides a clear description
        // what needs to happen next.
        //
        // From <http://unicode.org/reports/tr35/#Language_Tag_to_Locale_Identifier>:
        //
        // > If the BCP 47 primary language subtag matches the type attribute of a languageAlias
        // > element in Supplemental Data, replace the language subtag with the replacement value.
        // >  1. ...
        // >  2. Five special deprecated grandfathered codes (such as i-default) are in type
        //       attributes, and are also replaced.
        // >  3. ...
        //
        // So let's assume grandfathered tags are treated as 'primary language subtag' if and only
        // if no additional subtags are present. Because in all other cases, we don't really have a
        // grandfathered tag, but only some arbitrary combination of random subtags.
        //
        // Basically what we expect here is that only grandfathered without any additional subtags
        // are canonicalised to their modern form and in all other cases they're left as is.
        //
        // Not all language tag processor will pass this test, for example because they don't order
        // variant subtags in alphabetical order or they're too eager when detecting grandfathered
        // tags. For example "zh-hakka-hakka" is accepted in some language tag processors, because
        // the language tag starts with a prefix which matches a grandfathered tag, and that prefix
        // is then canonicalised to "hak" and the second "hakka" is simply appended to it, so the
        // resulting tag is "hak-hakka". This is clearly wrong as far as ECMA-402 compliance is
        // concerned, because language tags are parsed and validated before any canonicalisation
        // happens. And during the validation step an error should be emitted, because the input
        // "zh-hakka-hakka" contains two identical variant subtags.
        //
        // From <https://tc39.es/ecma402/#sec-isstructurallyvalidlanguagetag>:
        //
        // > does not include duplicate variant subtags
        //
        // So, if your implementation fails this assertion, but you still like to test the rest of
        // this file, a pull request to split this file seems the way to go!
        assert.sameValue(loc.toString(), canonical);
        assert.sameValue(loc.maximize().toString(), canonicalMax);
        assert.sameValue(loc.maximize().maximize().toString(), canonicalMax);
        assert.sameValue(loc.minimize().toString(), canonicalMin);
        assert.sameValue(loc.minimize().minimize().toString(), canonicalMin);
        assert.sameValue(loc.maximize().minimize().toString(), canonicalMin);
        assert.sameValue(loc.minimize().maximize().toString(), canonicalMax);
    }
 }