diff --git a/test/intl402/Intl/getCanonicalLocales/complex-language-subtag-replacement.js b/test/intl402/Intl/getCanonicalLocales/complex-language-subtag-replacement.js
new file mode 100644
index 0000000000..7392d3bc16
--- /dev/null
+++ b/test/intl402/Intl/getCanonicalLocales/complex-language-subtag-replacement.js
@@ -0,0 +1,58 @@
+// Copyright (C) 2020 André Bargull. All rights reserved.
+// This code is governed by the BSD license found in the LICENSE file.
+
+/*---
+esid: sec-intl.getcanonicallocales
+description: >
+ Assert non-simple language subtag replacements work as expected.
+info: |
+ 8.2.1 Intl.getCanonicalLocales (locales)
+ 1. Let ll be ? CanonicalizeLocaleList(locales).
+ 2. Return CreateArrayFromList(ll).
+
+ 9.2.1 CanonicalizeLocaleList (locales)
+ ...
+ 7. Repeat, while k < len
+ ...
+ c. If kPresent is true, then
+ ...
+ vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
+ ...
+
+ UTS 35, §3.2.1 Canonical Unicode Locale Identifiers
+
+ - Replace aliases in the unicode_language_id and tlang (if any) using the following process:
+ - If the language subtag matches the type attribute of a languageAlias element in
+ Supplemental Data, replace the language subtag with the replacement value.
+ 1. If there are additional subtags in the replacement value, add them to the result,
+ but only if there is no corresponding subtag already in the tag.
+
+includes: [testIntl.js]
+---*/
+
+// CLDR contains language mappings where in addition to the language subtag also
+// the script or region subtag is modified, unless they're already present.
+
+const testData = {
+ // "sh" adds "Latn", unless a script subtag is already present.
+ //
+ "sh": "sr-Latn",
+ "sh-Cyrl": "sr-Cyrl",
+
+ // "cnr" adds "ME", unless a region subtag is already present.
+ //
+ "cnr": "sr-ME",
+ "cnr-BA": "sr-BA",
+};
+
+for (let [tag, canonical] of Object.entries(testData)) {
+ // Make sure the test data is correct.
+ assert(
+ isCanonicalizedStructurallyValidLanguageTag(canonical),
+ "\"" + canonical + "\" is a canonicalized and structurally valid language tag."
+ );
+
+ let result = Intl.getCanonicalLocales(tag);
+ assert.sameValue(result.length, 1);
+ assert.sameValue(result[0], canonical);
+}
diff --git a/test/intl402/Intl/getCanonicalLocales/complex-region-subtag-replacement.js b/test/intl402/Intl/getCanonicalLocales/complex-region-subtag-replacement.js
new file mode 100644
index 0000000000..0f10355bc8
--- /dev/null
+++ b/test/intl402/Intl/getCanonicalLocales/complex-region-subtag-replacement.js
@@ -0,0 +1,108 @@
+// Copyright (C) 2020 André Bargull. All rights reserved.
+// This code is governed by the BSD license found in the LICENSE file.
+
+/*---
+esid: sec-intl.getcanonicallocales
+description: >
+ Assert non-simple region subtag replacements work as expected.
+info: |
+ 8.2.1 Intl.getCanonicalLocales (locales)
+ 1. Let ll be ? CanonicalizeLocaleList(locales).
+ 2. Return CreateArrayFromList(ll).
+
+ 9.2.1 CanonicalizeLocaleList (locales)
+ ...
+ 7. Repeat, while k < len
+ ...
+ c. If kPresent is true, then
+ ...
+ vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
+ ...
+
+ UTS 35, §3.2.1 Canonical Unicode Locale Identifiers
+
+ - Replace aliases in the unicode_language_id and tlang (if any) using the following process:
+ - If the region subtag matches the type attribute of a territoryAlias element in
+ Supplemental Data, replace the language subtag with the replacement value, as follows:
+ 1. If there is a single territory in the replacement, use it.
+ 2. If there are multiple territories:
+ 1. Look up the most likely territory for the base language code (and script, if there is one).
+ 2. If that likely territory is in the list, use it.
+ 3. Otherwise, use the first territory in the list.
+
+includes: [testIntl.js]
+---*/
+
+// CLDR contains region mappings where the replacement region depends on the
+// likely subtags from the language and script subtags.
+
+const testData = {
+ // For example, the breakup of the Soviet Union ("SU") means that the region of
+ // the Soviet Union ("SU") is replaced by Russia ("RU"), Armenia ("AM"), or
+ // many others -- depending on the specified (or merely likely) language and
+ // script subtags:
+ //
+ //
+ //
+ "ru-SU": "ru-RU",
+ "ru-810": "ru-RU",
+ "en-SU": "en-RU",
+ "en-810": "en-RU",
+ "und-SU": "und-RU",
+ "und-810": "und-RU",
+ "und-Latn-SU": "und-Latn-RU",
+ "und-Latn-810": "und-Latn-RU",
+
+ // Armenia can be the preferred region when the language is "hy" (Armenian) or
+ // the script is "Armn" (Armenian).
+ //
+ //
+ //
+ "hy-SU": "hy-AM",
+ "hy-810": "hy-AM",
+ "und-Armn-SU": "und-Armn-AM",
+ "und-Armn-810": "und-Armn-AM",
+
+ //
+ //
+ // The following likely-subtags entries contain "RS" and "ME":
+ //
+ //
+ //
+ //
+ //
+ //
+ // In this case there is no language/script combination (without a region
+ // subtag) where "ME" is ever chosen, so the replacement is always "RS".
+ "sr-CS": "sr-RS",
+ "sr-Latn-CS": "sr-Latn-RS",
+ "sr-Cyrl-CS": "sr-Cyrl-RS",
+
+ // The existing region in the source locale identifier is ignored when selecting
+ // the likely replacement region. For example take "az-NT", which is Azerbaijani
+ // spoken in the Neutral Zone. The replacement region for "NT" is either
+ // "SA" (Saudi-Arabia) or "IQ" (Iraq), and there is also a likely subtags entry
+ // for "az-IQ". But when only looking at the language subtag in "az-NT", "az" is
+ // always resolved to "az-Latn-AZ", and because "AZ" is not in the list ["SA",
+ // "IQ"], the final replacement region is the default for "NT", namely "SA".
+ // That means "az-NT" will be canonicalised to "az-SA" and not "az-IQ", even
+ // though the latter may be a more sensible candidate based on the actual usage
+ // of the target locales.
+ //
+ //
+ //
+ //
+ "az-NT": "az-SA",
+};
+
+for (let [tag, canonical] of Object.entries(testData)) {
+ // Make sure the test data is correct.
+ assert(
+ isCanonicalizedStructurallyValidLanguageTag(canonical),
+ "\"" + canonical + "\" is a canonicalized and structurally valid language tag."
+ );
+
+ let result = Intl.getCanonicalLocales(tag);
+ assert.sameValue(result.length, 1);
+ assert.sameValue(result[0], canonical);
+}
diff --git a/test/intl402/Intl/getCanonicalLocales/transformed-ext-canonical.js b/test/intl402/Intl/getCanonicalLocales/transformed-ext-canonical.js
new file mode 100644
index 0000000000..c1e8e4f6ad
--- /dev/null
+++ b/test/intl402/Intl/getCanonicalLocales/transformed-ext-canonical.js
@@ -0,0 +1,54 @@
+// Copyright (C) 2020 André Bargull. All rights reserved.
+// This code is governed by the BSD license found in the LICENSE file.
+
+/*---
+esid: sec-intl.getcanonicallocales
+description: >
+ Test canonicalisation within transformed extension subtags.
+info: |
+ 8.2.1 Intl.getCanonicalLocales (locales)
+ 1. Let ll be ? CanonicalizeLocaleList(locales).
+ 2. Return CreateArrayFromList(ll).
+
+ 9.2.1 CanonicalizeLocaleList (locales)
+ ...
+ 7. Repeat, while k < len
+ ...
+ c. If kPresent is true, then
+ ...
+ v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
+ vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
+ ...
+
+includes: [testIntl.js]
+---*/
+
+const testData = {
+ // Variant subtags are alphabetically ordered.
+ "sl-t-sl-rozaj-biske-1994": "sl-t-sl-1994-biske-rozaj",
+
+ // tfield subtags are alphabetically ordered.
+ // (Also tests subtag case normalisation.)
+ "DE-T-M0-DIN-K0-QWERTZ": "de-t-k0-qwertz-m0-din",
+
+ // "true" tvalue subtags aren't removed.
+ // (UTS 35 version 36, §3.2.1 claims otherwise, but tkey must be followed by
+ // tvalue, so that's likely a spec bug in UTS 35.)
+ "en-t-m0-true": "en-t-m0-true",
+
+ // tlang subtags are canonicalised.
+ "en-t-iw": "en-t-he",
+
+ // Deprecated tvalue subtags are replaced by their preferred value.
+ "und-Latn-t-und-hani-m0-names": "und-Latn-t-und-hani-m0-prprname",
+};
+
+for (let [tag, canonical] of Object.entries(testData)) {
+ // Make sure the test data is correct.
+ assert(isCanonicalizedStructurallyValidLanguageTag(canonical),
+ "\"" + canonical + "\" is a canonical and structurally valid language tag.");
+
+ let result = Intl.getCanonicalLocales(tag);
+ assert.sameValue(result.length, 1);
+ assert.sameValue(result[0], canonical);
+}
diff --git a/test/intl402/Intl/getCanonicalLocales/transformed-ext-invalid.js b/test/intl402/Intl/getCanonicalLocales/transformed-ext-invalid.js
new file mode 100644
index 0000000000..207ef8141c
--- /dev/null
+++ b/test/intl402/Intl/getCanonicalLocales/transformed-ext-invalid.js
@@ -0,0 +1,78 @@
+// Copyright (C) 2020 André Bargull. All rights reserved.
+// This code is governed by the BSD license found in the LICENSE file.
+
+/*---
+esid: sec-intl.getcanonicallocales
+description: >
+ A RangeError is thrown when a language tag includes an invalid transformed extension subtag.
+info: |
+ 8.2.1 Intl.getCanonicalLocales (locales)
+ 1. Let ll be ? CanonicalizeLocaleList(locales).
+ 2. Return CreateArrayFromList(ll).
+
+ 9.2.1 CanonicalizeLocaleList (locales)
+ ...
+ 7. Repeat, while k < len
+ ...
+ c. If kPresent is true, then
+ ...
+ v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
+ ...
+
+includes: [testIntl.js]
+---*/
+
+const invalid = [
+ // empty
+ "en-t",
+ "en-t-a",
+ "en-t-x",
+ "en-t-0",
+
+ // incomplete
+ "en-t-",
+ "en-t-en-",
+ "en-t-0x-",
+
+ // tlang: unicode_language_subtag must be 2-3 or 5-8 characters and mustn't
+ // contain extlang subtags.
+ "en-t-root",
+ "en-t-abcdefghi",
+ "en-t-ar-aao",
+
+ // tlang: unicode_script_subtag must be 4 alphabetical characters, can't
+ // be repeated.
+ "en-t-en-lat0",
+ "en-t-en-latn-latn",
+
+ // tlang: unicode_region_subtag must either be 2 alpha characters or a three
+ // digit code.
+ "en-t-en-0",
+ "en-t-en-00",
+ "en-t-en-0x",
+ "en-t-en-x0",
+ "en-t-en-latn-0",
+ "en-t-en-latn-00",
+ "en-t-en-latn-xyz",
+
+ // tlang: unicode_variant_subtag is either 5-8 alphanum characters or 4
+ // characters starting with a digit.
+ "en-t-en-abcdefghi",
+ "en-t-en-latn-gb-ab",
+ "en-t-en-latn-gb-abc",
+ "en-t-en-latn-gb-abcd",
+ "en-t-en-latn-gb-abcdefghi",
+
+ // tkey must be followed by tvalue.
+ "en-t-d0",
+ "en-t-d0-m0",
+ "en-t-d0-x-private",
+];
+
+for (let tag of invalid) {
+ // Make sure the test data is correct.
+ assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(tag), false,
+ "\"" + tag + "\" isn't a structurally valid language tag.");
+
+ assert.throws(RangeError, () => Intl.getCanonicalLocales(tag), `${tag}`);
+}
diff --git a/test/intl402/Intl/getCanonicalLocales/transformed-ext-valid.js b/test/intl402/Intl/getCanonicalLocales/transformed-ext-valid.js
new file mode 100644
index 0000000000..b4ae24087b
--- /dev/null
+++ b/test/intl402/Intl/getCanonicalLocales/transformed-ext-valid.js
@@ -0,0 +1,78 @@
+// Copyright (C) 2020 André Bargull. All rights reserved.
+// This code is governed by the BSD license found in the LICENSE file.
+
+/*---
+esid: sec-intl.getcanonicallocales
+description: >
+ No RangeError is thrown when a language tag includes a valid transformed extension subtag.
+info: |
+ 8.2.1 Intl.getCanonicalLocales (locales)
+ 1. Let ll be ? CanonicalizeLocaleList(locales).
+ 2. Return CreateArrayFromList(ll).
+
+ 9.2.1 CanonicalizeLocaleList (locales)
+ ...
+ 7. Repeat, while k < len
+ ...
+ c. If kPresent is true, then
+ ...
+ v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
+ vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
+ ...
+
+includes: [testIntl.js]
+---*/
+
+const valid = [
+ // tlang with unicode_language_subtag.
+ "en-t-en",
+
+ // tlang with unicode_script_subtag.
+ "en-t-en-latn",
+
+ // tlang with unicode_region_subtag.
+ "en-t-en-ca",
+
+ // tlang with unicode_script_subtag and unicode_region_subtag.
+ "en-t-en-latn-ca",
+
+ // tlang with unicode_variant_subtag.
+ "en-t-en-emodeng",
+
+ // tlang with unicode_script_subtag and unicode_variant_subtag.
+ "en-t-en-latn-emodeng",
+
+ // tlang with unicode_script_subtag and unicode_variant_subtag.
+ "en-t-en-ca-emodeng",
+
+ // tlang with unicode_script_subtag, unicode_region_subtag, and unicode_variant_subtag.
+ "en-t-en-latn-ca-emodeng",
+
+ // No tlang. (Must contain at least one tfield.)
+ "en-t-d0-ascii",
+];
+
+const extraFields = [
+ // No extra tfield
+ "",
+
+ // tfield with a tvalue consisting of a single subtag.
+ "-i0-handwrit",
+
+ // tfield with a tvalue consisting of two subtags.
+ "-s0-accents-publish",
+];
+
+for (let tag of valid) {
+ for (let extra of extraFields) {
+ let actualTag = tag + extra;
+
+ // Make sure the test data is correct.
+ assert(isCanonicalizedStructurallyValidLanguageTag(actualTag),
+ "\"" + actualTag + "\" is a canonical and structurally valid language tag.");
+
+ let result = Intl.getCanonicalLocales(actualTag);
+ assert.sameValue(result.length, 1);
+ assert.sameValue(result[0], actualTag);
+ }
+}
diff --git a/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-calendar.js b/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-calendar.js
new file mode 100644
index 0000000000..7ac2bd338c
--- /dev/null
+++ b/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-calendar.js
@@ -0,0 +1,58 @@
+// Copyright (C) 2020 André Bargull. All rights reserved.
+// This code is governed by the BSD license found in the LICENSE file.
+
+/*---
+esid: sec-intl.getcanonicallocales
+description: >
+ Test Unicode extension subtag canonicalisation for the "ca" extension key.
+info: |
+ 8.2.1 Intl.getCanonicalLocales (locales)
+ 1. Let ll be ? CanonicalizeLocaleList(locales).
+ 2. Return CreateArrayFromList(ll).
+
+ 9.2.1 CanonicalizeLocaleList (locales)
+ ...
+ 7. Repeat, while k < len
+ ...
+ c. If kPresent is true, then
+ ...
+ v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
+ vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
+ ...
+
+ UTS 35, §3.2.1 Canonical Unicode Locale Identifiers
+ Use the bcp47 data to replace keys, types, tfields, and tvalues by their canonical forms.
+ See Section 3.6.4 U Extension Data Files) and Section 3.7.1 T Extension Data Files. The
+ aliases are in the alias attribute value, while the canonical is in the name attribute value.
+includes: [testIntl.js]
+---*/
+
+//
+const testData = {
+ //
+ "ethiopic-amete-alem": "ethioaa",
+
+ //
+ //
+ //
+ // "name" and "alias" for "islamic-civil" don't quite match of what's spec'ed in UTS 35, §3.2.1.
+ // Specifically following §3.2.1 to the letter means "islamicc" is the canonical value whereas
+ // "islamic-civil" is an alias value. Assume the definitions in
+ // https://unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files overrule UTS 35, §3.2.1.
+ "islamicc": "islamic-civil",
+};
+
+for (let [alias, name] of Object.entries(testData)) {
+ let tag = "und-u-ca-" + alias;
+ let canonical = "und-u-ca-" + name;
+
+ // Make sure the test data is correct.
+ assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(tag), false,
+ "\"" + tag + "\" isn't a canonical language tag.");
+ assert(isCanonicalizedStructurallyValidLanguageTag(canonical),
+ "\"" + canonical + "\" is a canonical and structurally valid language tag.");
+
+ let result = Intl.getCanonicalLocales(tag);
+ assert.sameValue(result.length, 1);
+ assert.sameValue(result[0], canonical);
+}
diff --git a/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-col-strength.js b/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-col-strength.js
new file mode 100644
index 0000000000..d28ad11ece
--- /dev/null
+++ b/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-col-strength.js
@@ -0,0 +1,65 @@
+// Copyright (C) 2020 André Bargull. All rights reserved.
+// This code is governed by the BSD license found in the LICENSE file.
+
+/*---
+esid: sec-intl.getcanonicallocales
+description: >
+ Test Unicode extension subtag canonicalisation for the "ks" extension key.
+info: |
+ 8.2.1 Intl.getCanonicalLocales (locales)
+ 1. Let ll be ? CanonicalizeLocaleList(locales).
+ 2. Return CreateArrayFromList(ll).
+
+ 9.2.1 CanonicalizeLocaleList (locales)
+ ...
+ 7. Repeat, while k < len
+ ...
+ c. If kPresent is true, then
+ ...
+ v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
+ vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
+ ...
+
+ UTS 35, §3.2.1 Canonical Unicode Locale Identifiers
+ Use the bcp47 data to replace keys, types, tfields, and tvalues by their canonical forms.
+ See Section 3.6.4 U Extension Data Files) and Section 3.7.1 T Extension Data Files. The
+ aliases are in the alias attribute value, while the canonical is in the name attribute value.
+includes: [testIntl.js]
+---*/
+
+// /
+const testData = {
+ //
+ "primary": "level1",
+
+ // "secondary" doesn't match |uvalue|, so we can skip it.
+ //
+ // "secondary": "level2",
+
+ //
+ "tertiary": "level3",
+
+ // Neither "quaternary" nor "quarternary" match |uvalue|, so we can skip them.
+ //
+ // "quaternary": "level4",
+ // "quarternary": "level4",
+
+ // "identical" doesn't match |uvalue|, so we can skip it.
+ //
+ // "identical": "identic",
+};
+
+for (let [alias, name] of Object.entries(testData)) {
+ let tag = "und-u-ks-" + alias;
+ let canonical = "und-u-ks-" + name;
+
+ // Make sure the test data is correct.
+ assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(tag), false,
+ "\"" + tag + "\" isn't a canonical language tag.");
+ assert(isCanonicalizedStructurallyValidLanguageTag(canonical),
+ "\"" + canonical + "\" is a canonical and structurally valid language tag.");
+
+ let result = Intl.getCanonicalLocales(tag);
+ assert.sameValue(result.length, 1);
+ assert.sameValue(result[0], canonical);
+}
diff --git a/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-measurement-system.js b/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-measurement-system.js
new file mode 100644
index 0000000000..013bc94242
--- /dev/null
+++ b/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-measurement-system.js
@@ -0,0 +1,49 @@
+// Copyright (C) 2020 André Bargull. All rights reserved.
+// This code is governed by the BSD license found in the LICENSE file.
+
+/*---
+esid: sec-intl.getcanonicallocales
+description: >
+ Test Unicode extension subtag canonicalisation for the "ms" extension key.
+info: |
+ 8.2.1 Intl.getCanonicalLocales (locales)
+ 1. Let ll be ? CanonicalizeLocaleList(locales).
+ 2. Return CreateArrayFromList(ll).
+
+ 9.2.1 CanonicalizeLocaleList (locales)
+ ...
+ 7. Repeat, while k < len
+ ...
+ c. If kPresent is true, then
+ ...
+ v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
+ vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
+ ...
+
+ UTS 35, §3.2.1 Canonical Unicode Locale Identifiers
+ Use the bcp47 data to replace keys, types, tfields, and tvalues by their canonical forms.
+ See Section 3.6.4 U Extension Data Files) and Section 3.7.1 T Extension Data Files. The
+ aliases are in the alias attribute value, while the canonical is in the name attribute value.
+includes: [testIntl.js]
+---*/
+
+//
+const testData = {
+ //
+ "imperial": "uksystem",
+};
+
+for (let [alias, name] of Object.entries(testData)) {
+ let tag = "und-u-ms-" + alias;
+ let canonical = "und-u-ms-" + name;
+
+ // Make sure the test data is correct.
+ assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(tag), false,
+ "\"" + tag + "\" isn't a canonical language tag.");
+ assert(isCanonicalizedStructurallyValidLanguageTag(canonical),
+ "\"" + canonical + "\" is a canonical and structurally valid language tag.");
+
+ let result = Intl.getCanonicalLocales(tag);
+ assert.sameValue(result.length, 1);
+ assert.sameValue(result[0], canonical);
+}
diff --git a/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-region.js b/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-region.js
new file mode 100644
index 0000000000..197b6868c0
--- /dev/null
+++ b/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-region.js
@@ -0,0 +1,67 @@
+// Copyright (C) 2020 André Bargull. All rights reserved.
+// This code is governed by the BSD license found in the LICENSE file.
+
+/*---
+esid: sec-intl.getcanonicallocales
+description: >
+ Test Unicode extension subtag canonicalisation for the "rg" extension key.
+info: |
+ 8.2.1 Intl.getCanonicalLocales (locales)
+ 1. Let ll be ? CanonicalizeLocaleList(locales).
+ 2. Return CreateArrayFromList(ll).
+
+ 9.2.1 CanonicalizeLocaleList (locales)
+ ...
+ 7. Repeat, while k < len
+ ...
+ c. If kPresent is true, then
+ ...
+ v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
+ vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
+ ...
+
+ UTS 35, §3.2.1 Canonical Unicode Locale Identifiers
+ Use the bcp47 data to replace keys, types, tfields, and tvalues by their canonical forms.
+ See Section 3.6.4 U Extension Data Files) and Section 3.7.1 T Extension Data Files. The
+ aliases are in the alias attribute value, while the canonical is in the name attribute value.
+
+ Replace aliases in special key values:
+ If there is an 'sd' or 'rg' key, replace any subdivision alias in its value in the same way,
+ using subdivisionAlias data.
+includes: [testIntl.js]
+---*/
+
+const testData = {
+ //
+ "no23": "no50",
+
+ //
+ "cn11": "cnbj",
+
+ //
+ "cz10a": "cz110",
+
+ //
+ "fra": "frges",
+
+ //
+ "frg": "frges",
+
+ //
+ "lud": "lucl",
+};
+
+for (let [alias, name] of Object.entries(testData)) {
+ let tag = "und-u-rg-" + alias;
+ let canonical = "und-u-rg-" + name;
+
+ // Make sure the test data is correct.
+ assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(tag), false,
+ "\"" + tag + "\" isn't a canonical language tag.");
+ assert(isCanonicalizedStructurallyValidLanguageTag(canonical),
+ "\"" + canonical + "\" is a canonical and structurally valid language tag.");
+
+ let result = Intl.getCanonicalLocales(tag);
+ assert.sameValue(result.length, 1);
+ assert.sameValue(result[0], canonical);
+}
diff --git a/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-subdivision.js b/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-subdivision.js
new file mode 100644
index 0000000000..3bbc70fb43
--- /dev/null
+++ b/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-subdivision.js
@@ -0,0 +1,72 @@
+// Copyright (C) 2020 André Bargull. All rights reserved.
+// This code is governed by the BSD license found in the LICENSE file.
+
+/*---
+esid: sec-intl.getcanonicallocales
+description: >
+ Test Unicode extension subtag canonicalisation for the "sd" extension key.
+info: |
+ 8.2.1 Intl.getCanonicalLocales (locales)
+ 1. Let ll be ? CanonicalizeLocaleList(locales).
+ 2. Return CreateArrayFromList(ll).
+
+ 9.2.1 CanonicalizeLocaleList (locales)
+ ...
+ 7. Repeat, while k < len
+ ...
+ c. If kPresent is true, then
+ ...
+ v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
+ vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
+ ...
+
+ UTS 35, §3.2.1 Canonical Unicode Locale Identifiers
+ Use the bcp47 data to replace keys, types, tfields, and tvalues by their canonical forms.
+ See Section 3.6.4 U Extension Data Files) and Section 3.7.1 T Extension Data Files. The
+ aliases are in the alias attribute value, while the canonical is in the name attribute value.
+
+ Replace aliases in special key values:
+ If there is an 'sd' or 'rg' key, replace any subdivision alias in its value in the same way,
+ using subdivisionAlias data.
+includes: [testIntl.js]
+---*/
+
+const testData = {
+ //
+ "no23": "no50",
+
+ //
+ "cn11": "cnbj",
+
+ //
+ "cz10a": "cz110",
+
+ //
+ "fra": "frges",
+
+ //
+ "frg": "frges",
+
+ //
+ "lud": "lucl",
+};
+
+for (let [alias, name] of Object.entries(testData)) {
+ // Subdivision codes should always have a matching region subtag. This
+ // shouldn't actually matter for canonicalisation, but let's not push our
+ // luck and instead keep the language tag 'valid' per UTS 35, §3.6.5.
+ let region = name.substring(0, 2).toUpperCase();
+
+ let tag = `und-${region}-u-sd-${alias}`;
+ let canonical = `und-${region}-u-sd-${name}`;
+
+ // Make sure the test data is correct.
+ assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(tag), false,
+ "\"" + tag + "\" isn't a canonical language tag.");
+ assert(isCanonicalizedStructurallyValidLanguageTag(canonical),
+ "\"" + canonical + "\" is a canonical and structurally valid language tag.");
+
+ let result = Intl.getCanonicalLocales(tag);
+ assert.sameValue(result.length, 1);
+ assert.sameValue(result[0], canonical);
+}
diff --git a/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-timezone.js b/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-timezone.js
new file mode 100644
index 0000000000..4de6458639
--- /dev/null
+++ b/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-timezone.js
@@ -0,0 +1,72 @@
+// Copyright (C) 2020 André Bargull. All rights reserved.
+// This code is governed by the BSD license found in the LICENSE file.
+
+/*---
+esid: sec-intl.getcanonicallocales
+description: >
+ Test Unicode extension subtag canonicalisation for the "tz" extension key.
+info: |
+ 8.2.1 Intl.getCanonicalLocales (locales)
+ 1. Let ll be ? CanonicalizeLocaleList(locales).
+ 2. Return CreateArrayFromList(ll).
+
+ 9.2.1 CanonicalizeLocaleList (locales)
+ ...
+ 7. Repeat, while k < len
+ ...
+ c. If kPresent is true, then
+ ...
+ v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
+ vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
+ ...
+
+ UTS 35, §3.2.1 Canonical Unicode Locale Identifiers
+ Use the bcp47 data to replace keys, types, tfields, and tvalues by their canonical forms.
+ See Section 3.6.4 U Extension Data Files) and Section 3.7.1 T Extension Data Files. The
+ aliases are in the alias attribute value, while the canonical is in the name attribute value.
+includes: [testIntl.js]
+---*/
+
+//
+const testData = {
+ // Similar to the "ca" extension key, assume "preferred" holds the canonical
+ // value and "name" the alias value.
+
+ //
+ "cnckg": "cnsha",
+
+ // NB: "Eire" matches the |uvalue| production.
+ //
+ "eire": "iedub",
+
+ // NB: "EST" matches the |uvalue| production.
+ //
+ "est": "utcw05",
+
+ // NB: "GMT0" matches the |uvalue| production.
+ //
+ "gmt0": "gmt",
+
+ // NB: "UCT" matches the |uvalue| production.
+ //
+ "uct": "utc",
+
+ // NB: "Zulu" matches the |uvalue| production.
+ //
+ "zulu": "utc",
+};
+
+for (let [alias, name] of Object.entries(testData)) {
+ let tag = "und-u-tz-" + alias;
+ let canonical = "und-u-tz-" + name;
+
+ // Make sure the test data is correct.
+ assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(tag), false,
+ "\"" + tag + "\" isn't a canonical language tag.");
+ assert(isCanonicalizedStructurallyValidLanguageTag(canonical),
+ "\"" + canonical + "\" is a canonical and structurally valid language tag.");
+
+ let result = Intl.getCanonicalLocales(tag);
+ assert.sameValue(result.length, 1);
+ assert.sameValue(result[0], canonical);
+}
diff --git a/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-yes-to-true.js b/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-yes-to-true.js
new file mode 100644
index 0000000000..296877bb6a
--- /dev/null
+++ b/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-yes-to-true.js
@@ -0,0 +1,86 @@
+// Copyright (C) 2020 André Bargull. All rights reserved.
+// This code is governed by the BSD license found in the LICENSE file.
+
+/*---
+esid: sec-intl.getcanonicallocales
+description: >
+ "kb", "kc", "kh", "kk", and "kn" Unicode extension keys canonicalise "yes" to "true".
+info: |
+ 8.2.1 Intl.getCanonicalLocales (locales)
+ 1. Let ll be ? CanonicalizeLocaleList(locales).
+ 2. Return CreateArrayFromList(ll).
+
+ 9.2.1 CanonicalizeLocaleList (locales)
+ ...
+ 7. Repeat, while k < len
+ ...
+ c. If kPresent is true, then
+ ...
+ v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
+ vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
+ ...
+
+ UTS 35, §3.2.1 Canonical Unicode Locale Identifiers
+ Use the bcp47 data to replace keys, types, tfields, and tvalues by their canonical forms.
+ See Section 3.6.4 U Extension Data Files) and Section 3.7.1 T Extension Data Files. The
+ aliases are in the alias attribute value, while the canonical is in the name attribute value.
+
+ UTS 35, §3.2.1 Canonical Unicode Locale Identifiers
+ Any type or tfield value "true" is removed.
+includes: [testIntl.js]
+---*/
+
+const unicodeKeys = [
+ //
+ //
+ "kb",
+
+ //
+ //
+ "kc",
+
+ //
+ //
+ "kh",
+
+ //
+ //
+ "kk",
+
+ //
+ //
+ "kn",
+];
+
+for (let key of unicodeKeys) {
+ let tag = `und-u-${key}-yes`;
+ let canonical = `und-u-${key}`;
+
+ // Make sure the test data is correct.
+ assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(tag), false,
+ "\"" + tag + "\" isn't a canonical language tag.");
+ assert(isCanonicalizedStructurallyValidLanguageTag(canonical),
+ "\"" + canonical + "\" is a canonical and structurally valid language tag.");
+
+ let result = Intl.getCanonicalLocales(tag);
+ assert.sameValue(result.length, 1);
+ assert.sameValue(result[0], canonical);
+}
+
+// Test some other Unicode extension keys which don't contain an alias entry to
+// canonicalise "yes" to "true".
+const otherUnicodeKeys = [
+ "ka", "kf", "kr", "ks", "kv",
+];
+
+for (let key of otherUnicodeKeys) {
+ let tag = `und-u-${key}-yes`;
+
+ // Make sure the test data is correct.
+ assert(isCanonicalizedStructurallyValidLanguageTag(tag),
+ "\"" + tag + "\" is a canonical and structurally valid language tag.");
+
+ let result = Intl.getCanonicalLocales(tag);
+ assert.sameValue(result.length, 1);
+ assert.sameValue(result[0], tag);
+}
diff --git a/test/intl402/Intl/getCanonicalLocales/unicode-ext-key-with-digit.js b/test/intl402/Intl/getCanonicalLocales/unicode-ext-key-with-digit.js
new file mode 100644
index 0000000000..aa31187b1b
--- /dev/null
+++ b/test/intl402/Intl/getCanonicalLocales/unicode-ext-key-with-digit.js
@@ -0,0 +1,54 @@
+// Copyright (C) 2020 André Bargull. All rights reserved.
+// This code is governed by the BSD license found in the LICENSE file.
+
+/*---
+esid: sec-intl.getcanonicallocales
+description: >
+ Test Unicode extension subtags where the ukey subtag contains a digit.
+info: |
+ 8.2.1 Intl.getCanonicalLocales (locales)
+ 1. Let ll be ? CanonicalizeLocaleList(locales).
+ 2. Return CreateArrayFromList(ll).
+
+ 9.2.1 CanonicalizeLocaleList (locales)
+ ...
+ 7. Repeat, while k < len
+ ...
+ c. If kPresent is true, then
+ ...
+ v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
+ vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
+ ...
+
+includes: [testIntl.js]
+---*/
+
+// Unicode locale extension sequences don't allow keys with a digit as their
+// second character.
+const invalidCases = [
+ "en-u-c0",
+ "en-u-00",
+];
+
+// The first character is allowed to be a digit.
+const validCases = [
+ "en-u-0c",
+];
+
+for (let invalid of invalidCases) {
+ // Make sure the test data is correct.
+ assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(invalid), false,
+ "\"" + invalid + "\" isn't a structurally valid language tag.");
+
+ assert.throws(RangeError, () => Intl.getCanonicalLocales(invalid));
+}
+
+for (let valid of validCases) {
+ // Make sure the test data is correct.
+ assert(isCanonicalizedStructurallyValidLanguageTag(valid),
+ "\"" + valid + "\" is a canonical and structurally valid language tag.");
+
+ let result = Intl.getCanonicalLocales(valid);
+ assert.sameValue(result.length, 1);
+ assert.sameValue(result[0], valid);
+}
diff --git a/test/intl402/Locale/constructor-apply-options-canonicalizes-twice.js b/test/intl402/Locale/constructor-apply-options-canonicalizes-twice.js
new file mode 100644
index 0000000000..a0ec1c83e8
--- /dev/null
+++ b/test/intl402/Locale/constructor-apply-options-canonicalizes-twice.js
@@ -0,0 +1,26 @@
+// Copyright 2020 André Bargull. All rights reserved.
+// This code is governed by the BSD license found in the LICENSE file.
+
+/*---
+esid: sec-apply-options-to-tag
+description: >
+ ApplyOptionsToTag canonicalises the language tag two times.
+info: |
+ 10.1.1 ApplyOptionsToTag( tag, options )
+
+ ...
+ 9. Set tag to CanonicalizeUnicodeLocaleId(tag).
+ 10. If language is not undefined,
+ ...
+ b. Set tag to tag with the substring corresponding to the unicode_language_subtag
+ production of the unicode_language_id replaced by the string language.
+ ...
+ 13. Return CanonicalizeUnicodeLocaleId(tag).
+features: [Intl.Locale]
+---*/
+
+// ApplyOptionsToTag canonicalises the locale identifier before applying the
+// options. That means "und-Armn-SU" is first canonicalised to "und-Armn-AM",
+// then the language is changed to "ru". If "ru" were applied first, the result
+// would be "ru-Armn-RU" instead.
+assert.sameValue(new Intl.Locale("und-Armn-SU", {language: "ru"}).toString(), "ru-Armn-AM");
diff --git a/test/intl402/Locale/likely-subtags.js b/test/intl402/Locale/likely-subtags.js
index 42d56a7217..44761f34a1 100644
--- a/test/intl402/Locale/likely-subtags.js
+++ b/test/intl402/Locale/likely-subtags.js
@@ -37,6 +37,10 @@ const testDataMaximal = {
"und-419": "es-Latn-419",
"und-150": "ru-Cyrl-RU",
"und-AT": "de-Latn-AT",
+ "und-Cyrl-RO": "bg-Cyrl-RO",
+
+ // Undefined primary language not required to change in all cases.
+ "und-AQ": "und-Latn-AQ",
};
const testDataMinimal = {
@@ -62,6 +66,8 @@ const testDataMinimal = {
"es-Latn-419": "es-419",
"ru-Cyrl-RU": "ru",
"de-Latn-AT": "de-AT",
+ "bg-Cyrl-RO": "bg-RO",
+ "und-Latn-AQ": "und-AQ",
};
// Add variants, extensions, and privateuse subtags and ensure they don't
diff --git a/test/intl402/Locale/prototype/minimize/removing-likely-subtags-first-adds-likely-subtags.js b/test/intl402/Locale/prototype/minimize/removing-likely-subtags-first-adds-likely-subtags.js
new file mode 100644
index 0000000000..2b2368b383
--- /dev/null
+++ b/test/intl402/Locale/prototype/minimize/removing-likely-subtags-first-adds-likely-subtags.js
@@ -0,0 +1,49 @@
+// Copyright 2020 André Bargull. All rights reserved.
+// This code is governed by the BSD license found in the LICENSE file.
+
+/*---
+esid: sec-Intl.Locale.prototype.minimize
+description: >
+ The "Remove Likely Subtags" algorithm adds likely subtags before processing the locale.
+info: |
+ Intl.Locale.prototype.minimize ()
+ 3. Let minimal be the result of the Remove Likely Subtags algorithm applied to loc.[[Locale]].
+ If an error is signaled, set minimal to loc.[[Locale]].
+
+ UTS 35, §4.3 Likely Subtags
+ Remove Likely Subtags
+
+ 1. First get max = AddLikelySubtags(inputLocale). If an error is signaled, return it.
+ 2. ...
+features: [Intl.Locale]
+---*/
+
+var testDataMinimal = {
+ // Undefined primary language.
+ "und": "en",
+ "und-Thai": "th",
+ "und-419": "es-419",
+ "und-150": "ru",
+ "und-AT": "de-AT",
+
+ // https://unicode-org.atlassian.net/browse/ICU-13786
+ "aae-Latn-IT": "aae-Latn-IT",
+ "aae-Thai-CO": "aae-Thai-CO",
+
+ // https://unicode-org.atlassian.net/browse/ICU-10220
+ // https://unicode-org.atlassian.net/browse/ICU-12345
+ "und-CW": "pap-CW",
+ "und-US": "en",
+ "zh-Hant": "zh-TW",
+ "zh-Hani": "zh-Hani",
+};
+
+for (const [tag, minimal] of Object.entries(testDataMinimal)) {
+ // Assert the |minimal| tag is indeed minimal.
+ assert.sameValue(new Intl.Locale(minimal).minimize().toString(), minimal,
+ `"${minimal}" should be minimal`);
+
+ // Assert RemoveLikelySubtags(tag) returns |minimal|.
+ assert.sameValue(new Intl.Locale(tag).minimize().toString(), minimal,
+ `"${tag}".minimize() should be "${minimal}"`);
+}