From 18bb955771669541c56c28748603f6afdb2e25ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Bargull?= Date: Mon, 30 Mar 2020 04:55:25 -0700 Subject: [PATCH] Upstream additional language tag canonicalisation and Intl.Locale tests from SpiderMonkey Adds more canonicalisation and Intl.Locale tests from: - https://hg.mozilla.org/mozilla-central/file/tip/js/src/tests/non262/Intl/Locale - https://hg.mozilla.org/mozilla-central/file/tip/js/src/tests/non262/Intl/ --- .../complex-language-subtag-replacement.js | 58 ++++++++++ .../complex-region-subtag-replacement.js | 108 ++++++++++++++++++ .../transformed-ext-canonical.js | 54 +++++++++ .../transformed-ext-invalid.js | 78 +++++++++++++ .../transformed-ext-valid.js | 78 +++++++++++++ .../unicode-ext-canonicalize-calendar.js | 58 ++++++++++ .../unicode-ext-canonicalize-col-strength.js | 65 +++++++++++ ...ode-ext-canonicalize-measurement-system.js | 49 ++++++++ .../unicode-ext-canonicalize-region.js | 67 +++++++++++ .../unicode-ext-canonicalize-subdivision.js | 72 ++++++++++++ .../unicode-ext-canonicalize-timezone.js | 72 ++++++++++++ .../unicode-ext-canonicalize-yes-to-true.js | 86 ++++++++++++++ .../unicode-ext-key-with-digit.js | 54 +++++++++ ...uctor-apply-options-canonicalizes-twice.js | 26 +++++ test/intl402/Locale/likely-subtags.js | 6 + ...ikely-subtags-first-adds-likely-subtags.js | 49 ++++++++ 16 files changed, 980 insertions(+) create mode 100644 test/intl402/Intl/getCanonicalLocales/complex-language-subtag-replacement.js create mode 100644 test/intl402/Intl/getCanonicalLocales/complex-region-subtag-replacement.js create mode 100644 test/intl402/Intl/getCanonicalLocales/transformed-ext-canonical.js create mode 100644 test/intl402/Intl/getCanonicalLocales/transformed-ext-invalid.js create mode 100644 test/intl402/Intl/getCanonicalLocales/transformed-ext-valid.js create mode 100644 test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-calendar.js create mode 100644 test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-col-strength.js create mode 100644 test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-measurement-system.js create mode 100644 test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-region.js create mode 100644 test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-subdivision.js create mode 100644 test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-timezone.js create mode 100644 test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-yes-to-true.js create mode 100644 test/intl402/Intl/getCanonicalLocales/unicode-ext-key-with-digit.js create mode 100644 test/intl402/Locale/constructor-apply-options-canonicalizes-twice.js create mode 100644 test/intl402/Locale/prototype/minimize/removing-likely-subtags-first-adds-likely-subtags.js diff --git a/test/intl402/Intl/getCanonicalLocales/complex-language-subtag-replacement.js b/test/intl402/Intl/getCanonicalLocales/complex-language-subtag-replacement.js new file mode 100644 index 0000000000..7392d3bc16 --- /dev/null +++ b/test/intl402/Intl/getCanonicalLocales/complex-language-subtag-replacement.js @@ -0,0 +1,58 @@ +// Copyright (C) 2020 André Bargull. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +esid: sec-intl.getcanonicallocales +description: > + Assert non-simple language subtag replacements work as expected. +info: | + 8.2.1 Intl.getCanonicalLocales (locales) + 1. Let ll be ? CanonicalizeLocaleList(locales). + 2. Return CreateArrayFromList(ll). + + 9.2.1 CanonicalizeLocaleList (locales) + ... + 7. Repeat, while k < len + ... + c. If kPresent is true, then + ... + vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag). + ... + + UTS 35, §3.2.1 Canonical Unicode Locale Identifiers + + - Replace aliases in the unicode_language_id and tlang (if any) using the following process: + - If the language subtag matches the type attribute of a languageAlias element in + Supplemental Data, replace the language subtag with the replacement value. + 1. If there are additional subtags in the replacement value, add them to the result, + but only if there is no corresponding subtag already in the tag. + +includes: [testIntl.js] +---*/ + +// CLDR contains language mappings where in addition to the language subtag also +// the script or region subtag is modified, unless they're already present. + +const testData = { + // "sh" adds "Latn", unless a script subtag is already present. + // + "sh": "sr-Latn", + "sh-Cyrl": "sr-Cyrl", + + // "cnr" adds "ME", unless a region subtag is already present. + // + "cnr": "sr-ME", + "cnr-BA": "sr-BA", +}; + +for (let [tag, canonical] of Object.entries(testData)) { + // Make sure the test data is correct. + assert( + isCanonicalizedStructurallyValidLanguageTag(canonical), + "\"" + canonical + "\" is a canonicalized and structurally valid language tag." + ); + + let result = Intl.getCanonicalLocales(tag); + assert.sameValue(result.length, 1); + assert.sameValue(result[0], canonical); +} diff --git a/test/intl402/Intl/getCanonicalLocales/complex-region-subtag-replacement.js b/test/intl402/Intl/getCanonicalLocales/complex-region-subtag-replacement.js new file mode 100644 index 0000000000..0f10355bc8 --- /dev/null +++ b/test/intl402/Intl/getCanonicalLocales/complex-region-subtag-replacement.js @@ -0,0 +1,108 @@ +// Copyright (C) 2020 André Bargull. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +esid: sec-intl.getcanonicallocales +description: > + Assert non-simple region subtag replacements work as expected. +info: | + 8.2.1 Intl.getCanonicalLocales (locales) + 1. Let ll be ? CanonicalizeLocaleList(locales). + 2. Return CreateArrayFromList(ll). + + 9.2.1 CanonicalizeLocaleList (locales) + ... + 7. Repeat, while k < len + ... + c. If kPresent is true, then + ... + vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag). + ... + + UTS 35, §3.2.1 Canonical Unicode Locale Identifiers + + - Replace aliases in the unicode_language_id and tlang (if any) using the following process: + - If the region subtag matches the type attribute of a territoryAlias element in + Supplemental Data, replace the language subtag with the replacement value, as follows: + 1. If there is a single territory in the replacement, use it. + 2. If there are multiple territories: + 1. Look up the most likely territory for the base language code (and script, if there is one). + 2. If that likely territory is in the list, use it. + 3. Otherwise, use the first territory in the list. + +includes: [testIntl.js] +---*/ + +// CLDR contains region mappings where the replacement region depends on the +// likely subtags from the language and script subtags. + +const testData = { + // For example, the breakup of the Soviet Union ("SU") means that the region of + // the Soviet Union ("SU") is replaced by Russia ("RU"), Armenia ("AM"), or + // many others -- depending on the specified (or merely likely) language and + // script subtags: + // + // + // + "ru-SU": "ru-RU", + "ru-810": "ru-RU", + "en-SU": "en-RU", + "en-810": "en-RU", + "und-SU": "und-RU", + "und-810": "und-RU", + "und-Latn-SU": "und-Latn-RU", + "und-Latn-810": "und-Latn-RU", + + // Armenia can be the preferred region when the language is "hy" (Armenian) or + // the script is "Armn" (Armenian). + // + // + // + "hy-SU": "hy-AM", + "hy-810": "hy-AM", + "und-Armn-SU": "und-Armn-AM", + "und-Armn-810": "und-Armn-AM", + + // + // + // The following likely-subtags entries contain "RS" and "ME": + // + // + // + // + // + // + // In this case there is no language/script combination (without a region + // subtag) where "ME" is ever chosen, so the replacement is always "RS". + "sr-CS": "sr-RS", + "sr-Latn-CS": "sr-Latn-RS", + "sr-Cyrl-CS": "sr-Cyrl-RS", + + // The existing region in the source locale identifier is ignored when selecting + // the likely replacement region. For example take "az-NT", which is Azerbaijani + // spoken in the Neutral Zone. The replacement region for "NT" is either + // "SA" (Saudi-Arabia) or "IQ" (Iraq), and there is also a likely subtags entry + // for "az-IQ". But when only looking at the language subtag in "az-NT", "az" is + // always resolved to "az-Latn-AZ", and because "AZ" is not in the list ["SA", + // "IQ"], the final replacement region is the default for "NT", namely "SA". + // That means "az-NT" will be canonicalised to "az-SA" and not "az-IQ", even + // though the latter may be a more sensible candidate based on the actual usage + // of the target locales. + // + // + // + // + "az-NT": "az-SA", +}; + +for (let [tag, canonical] of Object.entries(testData)) { + // Make sure the test data is correct. + assert( + isCanonicalizedStructurallyValidLanguageTag(canonical), + "\"" + canonical + "\" is a canonicalized and structurally valid language tag." + ); + + let result = Intl.getCanonicalLocales(tag); + assert.sameValue(result.length, 1); + assert.sameValue(result[0], canonical); +} diff --git a/test/intl402/Intl/getCanonicalLocales/transformed-ext-canonical.js b/test/intl402/Intl/getCanonicalLocales/transformed-ext-canonical.js new file mode 100644 index 0000000000..c1e8e4f6ad --- /dev/null +++ b/test/intl402/Intl/getCanonicalLocales/transformed-ext-canonical.js @@ -0,0 +1,54 @@ +// Copyright (C) 2020 André Bargull. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +esid: sec-intl.getcanonicallocales +description: > + Test canonicalisation within transformed extension subtags. +info: | + 8.2.1 Intl.getCanonicalLocales (locales) + 1. Let ll be ? CanonicalizeLocaleList(locales). + 2. Return CreateArrayFromList(ll). + + 9.2.1 CanonicalizeLocaleList (locales) + ... + 7. Repeat, while k < len + ... + c. If kPresent is true, then + ... + v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception. + vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag). + ... + +includes: [testIntl.js] +---*/ + +const testData = { + // Variant subtags are alphabetically ordered. + "sl-t-sl-rozaj-biske-1994": "sl-t-sl-1994-biske-rozaj", + + // tfield subtags are alphabetically ordered. + // (Also tests subtag case normalisation.) + "DE-T-M0-DIN-K0-QWERTZ": "de-t-k0-qwertz-m0-din", + + // "true" tvalue subtags aren't removed. + // (UTS 35 version 36, §3.2.1 claims otherwise, but tkey must be followed by + // tvalue, so that's likely a spec bug in UTS 35.) + "en-t-m0-true": "en-t-m0-true", + + // tlang subtags are canonicalised. + "en-t-iw": "en-t-he", + + // Deprecated tvalue subtags are replaced by their preferred value. + "und-Latn-t-und-hani-m0-names": "und-Latn-t-und-hani-m0-prprname", +}; + +for (let [tag, canonical] of Object.entries(testData)) { + // Make sure the test data is correct. + assert(isCanonicalizedStructurallyValidLanguageTag(canonical), + "\"" + canonical + "\" is a canonical and structurally valid language tag."); + + let result = Intl.getCanonicalLocales(tag); + assert.sameValue(result.length, 1); + assert.sameValue(result[0], canonical); +} diff --git a/test/intl402/Intl/getCanonicalLocales/transformed-ext-invalid.js b/test/intl402/Intl/getCanonicalLocales/transformed-ext-invalid.js new file mode 100644 index 0000000000..207ef8141c --- /dev/null +++ b/test/intl402/Intl/getCanonicalLocales/transformed-ext-invalid.js @@ -0,0 +1,78 @@ +// Copyright (C) 2020 André Bargull. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +esid: sec-intl.getcanonicallocales +description: > + A RangeError is thrown when a language tag includes an invalid transformed extension subtag. +info: | + 8.2.1 Intl.getCanonicalLocales (locales) + 1. Let ll be ? CanonicalizeLocaleList(locales). + 2. Return CreateArrayFromList(ll). + + 9.2.1 CanonicalizeLocaleList (locales) + ... + 7. Repeat, while k < len + ... + c. If kPresent is true, then + ... + v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception. + ... + +includes: [testIntl.js] +---*/ + +const invalid = [ + // empty + "en-t", + "en-t-a", + "en-t-x", + "en-t-0", + + // incomplete + "en-t-", + "en-t-en-", + "en-t-0x-", + + // tlang: unicode_language_subtag must be 2-3 or 5-8 characters and mustn't + // contain extlang subtags. + "en-t-root", + "en-t-abcdefghi", + "en-t-ar-aao", + + // tlang: unicode_script_subtag must be 4 alphabetical characters, can't + // be repeated. + "en-t-en-lat0", + "en-t-en-latn-latn", + + // tlang: unicode_region_subtag must either be 2 alpha characters or a three + // digit code. + "en-t-en-0", + "en-t-en-00", + "en-t-en-0x", + "en-t-en-x0", + "en-t-en-latn-0", + "en-t-en-latn-00", + "en-t-en-latn-xyz", + + // tlang: unicode_variant_subtag is either 5-8 alphanum characters or 4 + // characters starting with a digit. + "en-t-en-abcdefghi", + "en-t-en-latn-gb-ab", + "en-t-en-latn-gb-abc", + "en-t-en-latn-gb-abcd", + "en-t-en-latn-gb-abcdefghi", + + // tkey must be followed by tvalue. + "en-t-d0", + "en-t-d0-m0", + "en-t-d0-x-private", +]; + +for (let tag of invalid) { + // Make sure the test data is correct. + assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(tag), false, + "\"" + tag + "\" isn't a structurally valid language tag."); + + assert.throws(RangeError, () => Intl.getCanonicalLocales(tag), `${tag}`); +} diff --git a/test/intl402/Intl/getCanonicalLocales/transformed-ext-valid.js b/test/intl402/Intl/getCanonicalLocales/transformed-ext-valid.js new file mode 100644 index 0000000000..b4ae24087b --- /dev/null +++ b/test/intl402/Intl/getCanonicalLocales/transformed-ext-valid.js @@ -0,0 +1,78 @@ +// Copyright (C) 2020 André Bargull. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +esid: sec-intl.getcanonicallocales +description: > + No RangeError is thrown when a language tag includes a valid transformed extension subtag. +info: | + 8.2.1 Intl.getCanonicalLocales (locales) + 1. Let ll be ? CanonicalizeLocaleList(locales). + 2. Return CreateArrayFromList(ll). + + 9.2.1 CanonicalizeLocaleList (locales) + ... + 7. Repeat, while k < len + ... + c. If kPresent is true, then + ... + v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception. + vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag). + ... + +includes: [testIntl.js] +---*/ + +const valid = [ + // tlang with unicode_language_subtag. + "en-t-en", + + // tlang with unicode_script_subtag. + "en-t-en-latn", + + // tlang with unicode_region_subtag. + "en-t-en-ca", + + // tlang with unicode_script_subtag and unicode_region_subtag. + "en-t-en-latn-ca", + + // tlang with unicode_variant_subtag. + "en-t-en-emodeng", + + // tlang with unicode_script_subtag and unicode_variant_subtag. + "en-t-en-latn-emodeng", + + // tlang with unicode_script_subtag and unicode_variant_subtag. + "en-t-en-ca-emodeng", + + // tlang with unicode_script_subtag, unicode_region_subtag, and unicode_variant_subtag. + "en-t-en-latn-ca-emodeng", + + // No tlang. (Must contain at least one tfield.) + "en-t-d0-ascii", +]; + +const extraFields = [ + // No extra tfield + "", + + // tfield with a tvalue consisting of a single subtag. + "-i0-handwrit", + + // tfield with a tvalue consisting of two subtags. + "-s0-accents-publish", +]; + +for (let tag of valid) { + for (let extra of extraFields) { + let actualTag = tag + extra; + + // Make sure the test data is correct. + assert(isCanonicalizedStructurallyValidLanguageTag(actualTag), + "\"" + actualTag + "\" is a canonical and structurally valid language tag."); + + let result = Intl.getCanonicalLocales(actualTag); + assert.sameValue(result.length, 1); + assert.sameValue(result[0], actualTag); + } +} diff --git a/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-calendar.js b/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-calendar.js new file mode 100644 index 0000000000..7ac2bd338c --- /dev/null +++ b/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-calendar.js @@ -0,0 +1,58 @@ +// Copyright (C) 2020 André Bargull. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +esid: sec-intl.getcanonicallocales +description: > + Test Unicode extension subtag canonicalisation for the "ca" extension key. +info: | + 8.2.1 Intl.getCanonicalLocales (locales) + 1. Let ll be ? CanonicalizeLocaleList(locales). + 2. Return CreateArrayFromList(ll). + + 9.2.1 CanonicalizeLocaleList (locales) + ... + 7. Repeat, while k < len + ... + c. If kPresent is true, then + ... + v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception. + vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag). + ... + + UTS 35, §3.2.1 Canonical Unicode Locale Identifiers + Use the bcp47 data to replace keys, types, tfields, and tvalues by their canonical forms. + See Section 3.6.4 U Extension Data Files) and Section 3.7.1 T Extension Data Files. The + aliases are in the alias attribute value, while the canonical is in the name attribute value. +includes: [testIntl.js] +---*/ + +// +const testData = { + // + "ethiopic-amete-alem": "ethioaa", + + // + // + // + // "name" and "alias" for "islamic-civil" don't quite match of what's spec'ed in UTS 35, §3.2.1. + // Specifically following §3.2.1 to the letter means "islamicc" is the canonical value whereas + // "islamic-civil" is an alias value. Assume the definitions in + // https://unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files overrule UTS 35, §3.2.1. + "islamicc": "islamic-civil", +}; + +for (let [alias, name] of Object.entries(testData)) { + let tag = "und-u-ca-" + alias; + let canonical = "und-u-ca-" + name; + + // Make sure the test data is correct. + assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(tag), false, + "\"" + tag + "\" isn't a canonical language tag."); + assert(isCanonicalizedStructurallyValidLanguageTag(canonical), + "\"" + canonical + "\" is a canonical and structurally valid language tag."); + + let result = Intl.getCanonicalLocales(tag); + assert.sameValue(result.length, 1); + assert.sameValue(result[0], canonical); +} diff --git a/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-col-strength.js b/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-col-strength.js new file mode 100644 index 0000000000..d28ad11ece --- /dev/null +++ b/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-col-strength.js @@ -0,0 +1,65 @@ +// Copyright (C) 2020 André Bargull. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +esid: sec-intl.getcanonicallocales +description: > + Test Unicode extension subtag canonicalisation for the "ks" extension key. +info: | + 8.2.1 Intl.getCanonicalLocales (locales) + 1. Let ll be ? CanonicalizeLocaleList(locales). + 2. Return CreateArrayFromList(ll). + + 9.2.1 CanonicalizeLocaleList (locales) + ... + 7. Repeat, while k < len + ... + c. If kPresent is true, then + ... + v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception. + vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag). + ... + + UTS 35, §3.2.1 Canonical Unicode Locale Identifiers + Use the bcp47 data to replace keys, types, tfields, and tvalues by their canonical forms. + See Section 3.6.4 U Extension Data Files) and Section 3.7.1 T Extension Data Files. The + aliases are in the alias attribute value, while the canonical is in the name attribute value. +includes: [testIntl.js] +---*/ + +// / +const testData = { + // + "primary": "level1", + + // "secondary" doesn't match |uvalue|, so we can skip it. + // + // "secondary": "level2", + + // + "tertiary": "level3", + + // Neither "quaternary" nor "quarternary" match |uvalue|, so we can skip them. + // + // "quaternary": "level4", + // "quarternary": "level4", + + // "identical" doesn't match |uvalue|, so we can skip it. + // + // "identical": "identic", +}; + +for (let [alias, name] of Object.entries(testData)) { + let tag = "und-u-ks-" + alias; + let canonical = "und-u-ks-" + name; + + // Make sure the test data is correct. + assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(tag), false, + "\"" + tag + "\" isn't a canonical language tag."); + assert(isCanonicalizedStructurallyValidLanguageTag(canonical), + "\"" + canonical + "\" is a canonical and structurally valid language tag."); + + let result = Intl.getCanonicalLocales(tag); + assert.sameValue(result.length, 1); + assert.sameValue(result[0], canonical); +} diff --git a/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-measurement-system.js b/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-measurement-system.js new file mode 100644 index 0000000000..013bc94242 --- /dev/null +++ b/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-measurement-system.js @@ -0,0 +1,49 @@ +// Copyright (C) 2020 André Bargull. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +esid: sec-intl.getcanonicallocales +description: > + Test Unicode extension subtag canonicalisation for the "ms" extension key. +info: | + 8.2.1 Intl.getCanonicalLocales (locales) + 1. Let ll be ? CanonicalizeLocaleList(locales). + 2. Return CreateArrayFromList(ll). + + 9.2.1 CanonicalizeLocaleList (locales) + ... + 7. Repeat, while k < len + ... + c. If kPresent is true, then + ... + v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception. + vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag). + ... + + UTS 35, §3.2.1 Canonical Unicode Locale Identifiers + Use the bcp47 data to replace keys, types, tfields, and tvalues by their canonical forms. + See Section 3.6.4 U Extension Data Files) and Section 3.7.1 T Extension Data Files. The + aliases are in the alias attribute value, while the canonical is in the name attribute value. +includes: [testIntl.js] +---*/ + +// +const testData = { + // + "imperial": "uksystem", +}; + +for (let [alias, name] of Object.entries(testData)) { + let tag = "und-u-ms-" + alias; + let canonical = "und-u-ms-" + name; + + // Make sure the test data is correct. + assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(tag), false, + "\"" + tag + "\" isn't a canonical language tag."); + assert(isCanonicalizedStructurallyValidLanguageTag(canonical), + "\"" + canonical + "\" is a canonical and structurally valid language tag."); + + let result = Intl.getCanonicalLocales(tag); + assert.sameValue(result.length, 1); + assert.sameValue(result[0], canonical); +} diff --git a/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-region.js b/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-region.js new file mode 100644 index 0000000000..197b6868c0 --- /dev/null +++ b/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-region.js @@ -0,0 +1,67 @@ +// Copyright (C) 2020 André Bargull. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +esid: sec-intl.getcanonicallocales +description: > + Test Unicode extension subtag canonicalisation for the "rg" extension key. +info: | + 8.2.1 Intl.getCanonicalLocales (locales) + 1. Let ll be ? CanonicalizeLocaleList(locales). + 2. Return CreateArrayFromList(ll). + + 9.2.1 CanonicalizeLocaleList (locales) + ... + 7. Repeat, while k < len + ... + c. If kPresent is true, then + ... + v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception. + vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag). + ... + + UTS 35, §3.2.1 Canonical Unicode Locale Identifiers + Use the bcp47 data to replace keys, types, tfields, and tvalues by their canonical forms. + See Section 3.6.4 U Extension Data Files) and Section 3.7.1 T Extension Data Files. The + aliases are in the alias attribute value, while the canonical is in the name attribute value. + + Replace aliases in special key values: + If there is an 'sd' or 'rg' key, replace any subdivision alias in its value in the same way, + using subdivisionAlias data. +includes: [testIntl.js] +---*/ + +const testData = { + // + "no23": "no50", + + // + "cn11": "cnbj", + + // + "cz10a": "cz110", + + // + "fra": "frges", + + // + "frg": "frges", + + // + "lud": "lucl", +}; + +for (let [alias, name] of Object.entries(testData)) { + let tag = "und-u-rg-" + alias; + let canonical = "und-u-rg-" + name; + + // Make sure the test data is correct. + assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(tag), false, + "\"" + tag + "\" isn't a canonical language tag."); + assert(isCanonicalizedStructurallyValidLanguageTag(canonical), + "\"" + canonical + "\" is a canonical and structurally valid language tag."); + + let result = Intl.getCanonicalLocales(tag); + assert.sameValue(result.length, 1); + assert.sameValue(result[0], canonical); +} diff --git a/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-subdivision.js b/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-subdivision.js new file mode 100644 index 0000000000..3bbc70fb43 --- /dev/null +++ b/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-subdivision.js @@ -0,0 +1,72 @@ +// Copyright (C) 2020 André Bargull. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +esid: sec-intl.getcanonicallocales +description: > + Test Unicode extension subtag canonicalisation for the "sd" extension key. +info: | + 8.2.1 Intl.getCanonicalLocales (locales) + 1. Let ll be ? CanonicalizeLocaleList(locales). + 2. Return CreateArrayFromList(ll). + + 9.2.1 CanonicalizeLocaleList (locales) + ... + 7. Repeat, while k < len + ... + c. If kPresent is true, then + ... + v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception. + vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag). + ... + + UTS 35, §3.2.1 Canonical Unicode Locale Identifiers + Use the bcp47 data to replace keys, types, tfields, and tvalues by their canonical forms. + See Section 3.6.4 U Extension Data Files) and Section 3.7.1 T Extension Data Files. The + aliases are in the alias attribute value, while the canonical is in the name attribute value. + + Replace aliases in special key values: + If there is an 'sd' or 'rg' key, replace any subdivision alias in its value in the same way, + using subdivisionAlias data. +includes: [testIntl.js] +---*/ + +const testData = { + // + "no23": "no50", + + // + "cn11": "cnbj", + + // + "cz10a": "cz110", + + // + "fra": "frges", + + // + "frg": "frges", + + // + "lud": "lucl", +}; + +for (let [alias, name] of Object.entries(testData)) { + // Subdivision codes should always have a matching region subtag. This + // shouldn't actually matter for canonicalisation, but let's not push our + // luck and instead keep the language tag 'valid' per UTS 35, §3.6.5. + let region = name.substring(0, 2).toUpperCase(); + + let tag = `und-${region}-u-sd-${alias}`; + let canonical = `und-${region}-u-sd-${name}`; + + // Make sure the test data is correct. + assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(tag), false, + "\"" + tag + "\" isn't a canonical language tag."); + assert(isCanonicalizedStructurallyValidLanguageTag(canonical), + "\"" + canonical + "\" is a canonical and structurally valid language tag."); + + let result = Intl.getCanonicalLocales(tag); + assert.sameValue(result.length, 1); + assert.sameValue(result[0], canonical); +} diff --git a/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-timezone.js b/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-timezone.js new file mode 100644 index 0000000000..4de6458639 --- /dev/null +++ b/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-timezone.js @@ -0,0 +1,72 @@ +// Copyright (C) 2020 André Bargull. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +esid: sec-intl.getcanonicallocales +description: > + Test Unicode extension subtag canonicalisation for the "tz" extension key. +info: | + 8.2.1 Intl.getCanonicalLocales (locales) + 1. Let ll be ? CanonicalizeLocaleList(locales). + 2. Return CreateArrayFromList(ll). + + 9.2.1 CanonicalizeLocaleList (locales) + ... + 7. Repeat, while k < len + ... + c. If kPresent is true, then + ... + v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception. + vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag). + ... + + UTS 35, §3.2.1 Canonical Unicode Locale Identifiers + Use the bcp47 data to replace keys, types, tfields, and tvalues by their canonical forms. + See Section 3.6.4 U Extension Data Files) and Section 3.7.1 T Extension Data Files. The + aliases are in the alias attribute value, while the canonical is in the name attribute value. +includes: [testIntl.js] +---*/ + +// +const testData = { + // Similar to the "ca" extension key, assume "preferred" holds the canonical + // value and "name" the alias value. + + // + "cnckg": "cnsha", + + // NB: "Eire" matches the |uvalue| production. + // + "eire": "iedub", + + // NB: "EST" matches the |uvalue| production. + // + "est": "utcw05", + + // NB: "GMT0" matches the |uvalue| production. + // + "gmt0": "gmt", + + // NB: "UCT" matches the |uvalue| production. + // + "uct": "utc", + + // NB: "Zulu" matches the |uvalue| production. + // + "zulu": "utc", +}; + +for (let [alias, name] of Object.entries(testData)) { + let tag = "und-u-tz-" + alias; + let canonical = "und-u-tz-" + name; + + // Make sure the test data is correct. + assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(tag), false, + "\"" + tag + "\" isn't a canonical language tag."); + assert(isCanonicalizedStructurallyValidLanguageTag(canonical), + "\"" + canonical + "\" is a canonical and structurally valid language tag."); + + let result = Intl.getCanonicalLocales(tag); + assert.sameValue(result.length, 1); + assert.sameValue(result[0], canonical); +} diff --git a/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-yes-to-true.js b/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-yes-to-true.js new file mode 100644 index 0000000000..296877bb6a --- /dev/null +++ b/test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-yes-to-true.js @@ -0,0 +1,86 @@ +// Copyright (C) 2020 André Bargull. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +esid: sec-intl.getcanonicallocales +description: > + "kb", "kc", "kh", "kk", and "kn" Unicode extension keys canonicalise "yes" to "true". +info: | + 8.2.1 Intl.getCanonicalLocales (locales) + 1. Let ll be ? CanonicalizeLocaleList(locales). + 2. Return CreateArrayFromList(ll). + + 9.2.1 CanonicalizeLocaleList (locales) + ... + 7. Repeat, while k < len + ... + c. If kPresent is true, then + ... + v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception. + vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag). + ... + + UTS 35, §3.2.1 Canonical Unicode Locale Identifiers + Use the bcp47 data to replace keys, types, tfields, and tvalues by their canonical forms. + See Section 3.6.4 U Extension Data Files) and Section 3.7.1 T Extension Data Files. The + aliases are in the alias attribute value, while the canonical is in the name attribute value. + + UTS 35, §3.2.1 Canonical Unicode Locale Identifiers + Any type or tfield value "true" is removed. +includes: [testIntl.js] +---*/ + +const unicodeKeys = [ + // + // + "kb", + + // + // + "kc", + + // + // + "kh", + + // + // + "kk", + + // + // + "kn", +]; + +for (let key of unicodeKeys) { + let tag = `und-u-${key}-yes`; + let canonical = `und-u-${key}`; + + // Make sure the test data is correct. + assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(tag), false, + "\"" + tag + "\" isn't a canonical language tag."); + assert(isCanonicalizedStructurallyValidLanguageTag(canonical), + "\"" + canonical + "\" is a canonical and structurally valid language tag."); + + let result = Intl.getCanonicalLocales(tag); + assert.sameValue(result.length, 1); + assert.sameValue(result[0], canonical); +} + +// Test some other Unicode extension keys which don't contain an alias entry to +// canonicalise "yes" to "true". +const otherUnicodeKeys = [ + "ka", "kf", "kr", "ks", "kv", +]; + +for (let key of otherUnicodeKeys) { + let tag = `und-u-${key}-yes`; + + // Make sure the test data is correct. + assert(isCanonicalizedStructurallyValidLanguageTag(tag), + "\"" + tag + "\" is a canonical and structurally valid language tag."); + + let result = Intl.getCanonicalLocales(tag); + assert.sameValue(result.length, 1); + assert.sameValue(result[0], tag); +} diff --git a/test/intl402/Intl/getCanonicalLocales/unicode-ext-key-with-digit.js b/test/intl402/Intl/getCanonicalLocales/unicode-ext-key-with-digit.js new file mode 100644 index 0000000000..aa31187b1b --- /dev/null +++ b/test/intl402/Intl/getCanonicalLocales/unicode-ext-key-with-digit.js @@ -0,0 +1,54 @@ +// Copyright (C) 2020 André Bargull. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +esid: sec-intl.getcanonicallocales +description: > + Test Unicode extension subtags where the ukey subtag contains a digit. +info: | + 8.2.1 Intl.getCanonicalLocales (locales) + 1. Let ll be ? CanonicalizeLocaleList(locales). + 2. Return CreateArrayFromList(ll). + + 9.2.1 CanonicalizeLocaleList (locales) + ... + 7. Repeat, while k < len + ... + c. If kPresent is true, then + ... + v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception. + vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag). + ... + +includes: [testIntl.js] +---*/ + +// Unicode locale extension sequences don't allow keys with a digit as their +// second character. +const invalidCases = [ + "en-u-c0", + "en-u-00", +]; + +// The first character is allowed to be a digit. +const validCases = [ + "en-u-0c", +]; + +for (let invalid of invalidCases) { + // Make sure the test data is correct. + assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(invalid), false, + "\"" + invalid + "\" isn't a structurally valid language tag."); + + assert.throws(RangeError, () => Intl.getCanonicalLocales(invalid)); +} + +for (let valid of validCases) { + // Make sure the test data is correct. + assert(isCanonicalizedStructurallyValidLanguageTag(valid), + "\"" + valid + "\" is a canonical and structurally valid language tag."); + + let result = Intl.getCanonicalLocales(valid); + assert.sameValue(result.length, 1); + assert.sameValue(result[0], valid); +} diff --git a/test/intl402/Locale/constructor-apply-options-canonicalizes-twice.js b/test/intl402/Locale/constructor-apply-options-canonicalizes-twice.js new file mode 100644 index 0000000000..a0ec1c83e8 --- /dev/null +++ b/test/intl402/Locale/constructor-apply-options-canonicalizes-twice.js @@ -0,0 +1,26 @@ +// Copyright 2020 André Bargull. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +esid: sec-apply-options-to-tag +description: > + ApplyOptionsToTag canonicalises the language tag two times. +info: | + 10.1.1 ApplyOptionsToTag( tag, options ) + + ... + 9. Set tag to CanonicalizeUnicodeLocaleId(tag). + 10. If language is not undefined, + ... + b. Set tag to tag with the substring corresponding to the unicode_language_subtag + production of the unicode_language_id replaced by the string language. + ... + 13. Return CanonicalizeUnicodeLocaleId(tag). +features: [Intl.Locale] +---*/ + +// ApplyOptionsToTag canonicalises the locale identifier before applying the +// options. That means "und-Armn-SU" is first canonicalised to "und-Armn-AM", +// then the language is changed to "ru". If "ru" were applied first, the result +// would be "ru-Armn-RU" instead. +assert.sameValue(new Intl.Locale("und-Armn-SU", {language: "ru"}).toString(), "ru-Armn-AM"); diff --git a/test/intl402/Locale/likely-subtags.js b/test/intl402/Locale/likely-subtags.js index 42d56a7217..44761f34a1 100644 --- a/test/intl402/Locale/likely-subtags.js +++ b/test/intl402/Locale/likely-subtags.js @@ -37,6 +37,10 @@ const testDataMaximal = { "und-419": "es-Latn-419", "und-150": "ru-Cyrl-RU", "und-AT": "de-Latn-AT", + "und-Cyrl-RO": "bg-Cyrl-RO", + + // Undefined primary language not required to change in all cases. + "und-AQ": "und-Latn-AQ", }; const testDataMinimal = { @@ -62,6 +66,8 @@ const testDataMinimal = { "es-Latn-419": "es-419", "ru-Cyrl-RU": "ru", "de-Latn-AT": "de-AT", + "bg-Cyrl-RO": "bg-RO", + "und-Latn-AQ": "und-AQ", }; // Add variants, extensions, and privateuse subtags and ensure they don't diff --git a/test/intl402/Locale/prototype/minimize/removing-likely-subtags-first-adds-likely-subtags.js b/test/intl402/Locale/prototype/minimize/removing-likely-subtags-first-adds-likely-subtags.js new file mode 100644 index 0000000000..2b2368b383 --- /dev/null +++ b/test/intl402/Locale/prototype/minimize/removing-likely-subtags-first-adds-likely-subtags.js @@ -0,0 +1,49 @@ +// Copyright 2020 André Bargull. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +esid: sec-Intl.Locale.prototype.minimize +description: > + The "Remove Likely Subtags" algorithm adds likely subtags before processing the locale. +info: | + Intl.Locale.prototype.minimize () + 3. Let minimal be the result of the Remove Likely Subtags algorithm applied to loc.[[Locale]]. + If an error is signaled, set minimal to loc.[[Locale]]. + + UTS 35, §4.3 Likely Subtags + Remove Likely Subtags + + 1. First get max = AddLikelySubtags(inputLocale). If an error is signaled, return it. + 2. ... +features: [Intl.Locale] +---*/ + +var testDataMinimal = { + // Undefined primary language. + "und": "en", + "und-Thai": "th", + "und-419": "es-419", + "und-150": "ru", + "und-AT": "de-AT", + + // https://unicode-org.atlassian.net/browse/ICU-13786 + "aae-Latn-IT": "aae-Latn-IT", + "aae-Thai-CO": "aae-Thai-CO", + + // https://unicode-org.atlassian.net/browse/ICU-10220 + // https://unicode-org.atlassian.net/browse/ICU-12345 + "und-CW": "pap-CW", + "und-US": "en", + "zh-Hant": "zh-TW", + "zh-Hani": "zh-Hani", +}; + +for (const [tag, minimal] of Object.entries(testDataMinimal)) { + // Assert the |minimal| tag is indeed minimal. + assert.sameValue(new Intl.Locale(minimal).minimize().toString(), minimal, + `"${minimal}" should be minimal`); + + // Assert RemoveLikelySubtags(tag) returns |minimal|. + assert.sameValue(new Intl.Locale(tag).minimize().toString(), minimal, + `"${tag}".minimize() should be "${minimal}"`); +}