Upstream additional language tag canonicalisation and Intl.Locale tests from SpiderMonkey

Adds more canonicalisation and Intl.Locale tests from:
- https://hg.mozilla.org/mozilla-central/file/tip/js/src/tests/non262/Intl/Locale
- https://hg.mozilla.org/mozilla-central/file/tip/js/src/tests/non262/Intl/
This commit is contained in:
André Bargull 2020-03-30 04:55:25 -07:00 committed by Rick Waldron
parent 5d8e6c8e85
commit 18bb955771
16 changed files with 980 additions and 0 deletions

View File

@ -0,0 +1,58 @@
// Copyright (C) 2020 André Bargull. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-intl.getcanonicallocales
description: >
Assert non-simple language subtag replacements work as expected.
info: |
8.2.1 Intl.getCanonicalLocales (locales)
1. Let ll be ? CanonicalizeLocaleList(locales).
2. Return CreateArrayFromList(ll).
9.2.1 CanonicalizeLocaleList (locales)
...
7. Repeat, while k < len
...
c. If kPresent is true, then
...
vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
...
UTS 35, §3.2.1 Canonical Unicode Locale Identifiers
- Replace aliases in the unicode_language_id and tlang (if any) using the following process:
- If the language subtag matches the type attribute of a languageAlias element in
Supplemental Data, replace the language subtag with the replacement value.
1. If there are additional subtags in the replacement value, add them to the result,
but only if there is no corresponding subtag already in the tag.
includes: [testIntl.js]
---*/
// CLDR contains language mappings where in addition to the language subtag also
// the script or region subtag is modified, unless they're already present.
const testData = {
// "sh" adds "Latn", unless a script subtag is already present.
// <languageAlias type="sh" replacement="sr_Latn" reason="legacy"/>
"sh": "sr-Latn",
"sh-Cyrl": "sr-Cyrl",
// "cnr" adds "ME", unless a region subtag is already present.
// <languageAlias type="cnr" replacement="sr_ME" reason="legacy"/>
"cnr": "sr-ME",
"cnr-BA": "sr-BA",
};
for (let [tag, canonical] of Object.entries(testData)) {
// Make sure the test data is correct.
assert(
isCanonicalizedStructurallyValidLanguageTag(canonical),
"\"" + canonical + "\" is a canonicalized and structurally valid language tag."
);
let result = Intl.getCanonicalLocales(tag);
assert.sameValue(result.length, 1);
assert.sameValue(result[0], canonical);
}

View File

@ -0,0 +1,108 @@
// Copyright (C) 2020 André Bargull. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-intl.getcanonicallocales
description: >
Assert non-simple region subtag replacements work as expected.
info: |
8.2.1 Intl.getCanonicalLocales (locales)
1. Let ll be ? CanonicalizeLocaleList(locales).
2. Return CreateArrayFromList(ll).
9.2.1 CanonicalizeLocaleList (locales)
...
7. Repeat, while k < len
...
c. If kPresent is true, then
...
vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
...
UTS 35, §3.2.1 Canonical Unicode Locale Identifiers
- Replace aliases in the unicode_language_id and tlang (if any) using the following process:
- If the region subtag matches the type attribute of a territoryAlias element in
Supplemental Data, replace the language subtag with the replacement value, as follows:
1. If there is a single territory in the replacement, use it.
2. If there are multiple territories:
1. Look up the most likely territory for the base language code (and script, if there is one).
2. If that likely territory is in the list, use it.
3. Otherwise, use the first territory in the list.
includes: [testIntl.js]
---*/
// CLDR contains region mappings where the replacement region depends on the
// likely subtags from the language and script subtags.
const testData = {
// For example, the breakup of the Soviet Union ("SU") means that the region of
// the Soviet Union ("SU") is replaced by Russia ("RU"), Armenia ("AM"), or
// many others -- depending on the specified (or merely likely) language and
// script subtags:
//
// <territoryAlias type="SU" replacement="RU AM AZ BY EE GE KZ KG LV LT MD TJ TM UA UZ" reason="deprecated"/>
// <territoryAlias type="810" replacement="RU AM AZ BY EE GE KZ KG LV LT MD TJ TM UA UZ" reason="overlong"/>
"ru-SU": "ru-RU",
"ru-810": "ru-RU",
"en-SU": "en-RU",
"en-810": "en-RU",
"und-SU": "und-RU",
"und-810": "und-RU",
"und-Latn-SU": "und-Latn-RU",
"und-Latn-810": "und-Latn-RU",
// Armenia can be the preferred region when the language is "hy" (Armenian) or
// the script is "Armn" (Armenian).
//
// <likelySubtag from="hy" to="hy_Armn_AM"/>
// <likelySubtag from="und_Armn" to="hy_Armn_AM"/>
"hy-SU": "hy-AM",
"hy-810": "hy-AM",
"und-Armn-SU": "und-Armn-AM",
"und-Armn-810": "und-Armn-AM",
// <territoryAlias type="CS" replacement="RS ME" reason="deprecated"/>
//
// The following likely-subtags entries contain "RS" and "ME":
//
// <likelySubtag from="sr" to="sr_Cyrl_RS"/>
// <likelySubtag from="sr_ME" to="sr_Latn_ME"/>
// <likelySubtag from="und_RS" to="sr_Cyrl_RS"/>
// <likelySubtag from="und_ME" to="sr_Latn_ME"/>
//
// In this case there is no language/script combination (without a region
// subtag) where "ME" is ever chosen, so the replacement is always "RS".
"sr-CS": "sr-RS",
"sr-Latn-CS": "sr-Latn-RS",
"sr-Cyrl-CS": "sr-Cyrl-RS",
// The existing region in the source locale identifier is ignored when selecting
// the likely replacement region. For example take "az-NT", which is Azerbaijani
// spoken in the Neutral Zone. The replacement region for "NT" is either
// "SA" (Saudi-Arabia) or "IQ" (Iraq), and there is also a likely subtags entry
// for "az-IQ". But when only looking at the language subtag in "az-NT", "az" is
// always resolved to "az-Latn-AZ", and because "AZ" is not in the list ["SA",
// "IQ"], the final replacement region is the default for "NT", namely "SA".
// That means "az-NT" will be canonicalised to "az-SA" and not "az-IQ", even
// though the latter may be a more sensible candidate based on the actual usage
// of the target locales.
//
// <territoryAlias type="NT" replacement="SA IQ" reason="deprecated"/>
// <likelySubtag from="az_IQ" to="az_Arab_IQ"/>
// <likelySubtag from="az" to="az_Latn_AZ"/>
"az-NT": "az-SA",
};
for (let [tag, canonical] of Object.entries(testData)) {
// Make sure the test data is correct.
assert(
isCanonicalizedStructurallyValidLanguageTag(canonical),
"\"" + canonical + "\" is a canonicalized and structurally valid language tag."
);
let result = Intl.getCanonicalLocales(tag);
assert.sameValue(result.length, 1);
assert.sameValue(result[0], canonical);
}

View File

@ -0,0 +1,54 @@
// Copyright (C) 2020 André Bargull. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-intl.getcanonicallocales
description: >
Test canonicalisation within transformed extension subtags.
info: |
8.2.1 Intl.getCanonicalLocales (locales)
1. Let ll be ? CanonicalizeLocaleList(locales).
2. Return CreateArrayFromList(ll).
9.2.1 CanonicalizeLocaleList (locales)
...
7. Repeat, while k < len
...
c. If kPresent is true, then
...
v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
...
includes: [testIntl.js]
---*/
const testData = {
// Variant subtags are alphabetically ordered.
"sl-t-sl-rozaj-biske-1994": "sl-t-sl-1994-biske-rozaj",
// tfield subtags are alphabetically ordered.
// (Also tests subtag case normalisation.)
"DE-T-M0-DIN-K0-QWERTZ": "de-t-k0-qwertz-m0-din",
// "true" tvalue subtags aren't removed.
// (UTS 35 version 36, §3.2.1 claims otherwise, but tkey must be followed by
// tvalue, so that's likely a spec bug in UTS 35.)
"en-t-m0-true": "en-t-m0-true",
// tlang subtags are canonicalised.
"en-t-iw": "en-t-he",
// Deprecated tvalue subtags are replaced by their preferred value.
"und-Latn-t-und-hani-m0-names": "und-Latn-t-und-hani-m0-prprname",
};
for (let [tag, canonical] of Object.entries(testData)) {
// Make sure the test data is correct.
assert(isCanonicalizedStructurallyValidLanguageTag(canonical),
"\"" + canonical + "\" is a canonical and structurally valid language tag.");
let result = Intl.getCanonicalLocales(tag);
assert.sameValue(result.length, 1);
assert.sameValue(result[0], canonical);
}

View File

@ -0,0 +1,78 @@
// Copyright (C) 2020 André Bargull. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-intl.getcanonicallocales
description: >
A RangeError is thrown when a language tag includes an invalid transformed extension subtag.
info: |
8.2.1 Intl.getCanonicalLocales (locales)
1. Let ll be ? CanonicalizeLocaleList(locales).
2. Return CreateArrayFromList(ll).
9.2.1 CanonicalizeLocaleList (locales)
...
7. Repeat, while k < len
...
c. If kPresent is true, then
...
v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
...
includes: [testIntl.js]
---*/
const invalid = [
// empty
"en-t",
"en-t-a",
"en-t-x",
"en-t-0",
// incomplete
"en-t-",
"en-t-en-",
"en-t-0x-",
// tlang: unicode_language_subtag must be 2-3 or 5-8 characters and mustn't
// contain extlang subtags.
"en-t-root",
"en-t-abcdefghi",
"en-t-ar-aao",
// tlang: unicode_script_subtag must be 4 alphabetical characters, can't
// be repeated.
"en-t-en-lat0",
"en-t-en-latn-latn",
// tlang: unicode_region_subtag must either be 2 alpha characters or a three
// digit code.
"en-t-en-0",
"en-t-en-00",
"en-t-en-0x",
"en-t-en-x0",
"en-t-en-latn-0",
"en-t-en-latn-00",
"en-t-en-latn-xyz",
// tlang: unicode_variant_subtag is either 5-8 alphanum characters or 4
// characters starting with a digit.
"en-t-en-abcdefghi",
"en-t-en-latn-gb-ab",
"en-t-en-latn-gb-abc",
"en-t-en-latn-gb-abcd",
"en-t-en-latn-gb-abcdefghi",
// tkey must be followed by tvalue.
"en-t-d0",
"en-t-d0-m0",
"en-t-d0-x-private",
];
for (let tag of invalid) {
// Make sure the test data is correct.
assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(tag), false,
"\"" + tag + "\" isn't a structurally valid language tag.");
assert.throws(RangeError, () => Intl.getCanonicalLocales(tag), `${tag}`);
}

View File

@ -0,0 +1,78 @@
// Copyright (C) 2020 André Bargull. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-intl.getcanonicallocales
description: >
No RangeError is thrown when a language tag includes a valid transformed extension subtag.
info: |
8.2.1 Intl.getCanonicalLocales (locales)
1. Let ll be ? CanonicalizeLocaleList(locales).
2. Return CreateArrayFromList(ll).
9.2.1 CanonicalizeLocaleList (locales)
...
7. Repeat, while k < len
...
c. If kPresent is true, then
...
v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
...
includes: [testIntl.js]
---*/
const valid = [
// tlang with unicode_language_subtag.
"en-t-en",
// tlang with unicode_script_subtag.
"en-t-en-latn",
// tlang with unicode_region_subtag.
"en-t-en-ca",
// tlang with unicode_script_subtag and unicode_region_subtag.
"en-t-en-latn-ca",
// tlang with unicode_variant_subtag.
"en-t-en-emodeng",
// tlang with unicode_script_subtag and unicode_variant_subtag.
"en-t-en-latn-emodeng",
// tlang with unicode_script_subtag and unicode_variant_subtag.
"en-t-en-ca-emodeng",
// tlang with unicode_script_subtag, unicode_region_subtag, and unicode_variant_subtag.
"en-t-en-latn-ca-emodeng",
// No tlang. (Must contain at least one tfield.)
"en-t-d0-ascii",
];
const extraFields = [
// No extra tfield
"",
// tfield with a tvalue consisting of a single subtag.
"-i0-handwrit",
// tfield with a tvalue consisting of two subtags.
"-s0-accents-publish",
];
for (let tag of valid) {
for (let extra of extraFields) {
let actualTag = tag + extra;
// Make sure the test data is correct.
assert(isCanonicalizedStructurallyValidLanguageTag(actualTag),
"\"" + actualTag + "\" is a canonical and structurally valid language tag.");
let result = Intl.getCanonicalLocales(actualTag);
assert.sameValue(result.length, 1);
assert.sameValue(result[0], actualTag);
}
}

View File

@ -0,0 +1,58 @@
// Copyright (C) 2020 André Bargull. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-intl.getcanonicallocales
description: >
Test Unicode extension subtag canonicalisation for the "ca" extension key.
info: |
8.2.1 Intl.getCanonicalLocales (locales)
1. Let ll be ? CanonicalizeLocaleList(locales).
2. Return CreateArrayFromList(ll).
9.2.1 CanonicalizeLocaleList (locales)
...
7. Repeat, while k < len
...
c. If kPresent is true, then
...
v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
...
UTS 35, §3.2.1 Canonical Unicode Locale Identifiers
Use the bcp47 data to replace keys, types, tfields, and tvalues by their canonical forms.
See Section 3.6.4 U Extension Data Files) and Section 3.7.1 T Extension Data Files. The
aliases are in the alias attribute value, while the canonical is in the name attribute value.
includes: [testIntl.js]
---*/
// <key name="ca" [...] alias="calendar">
const testData = {
// <type name="ethioaa" [...] alias="ethiopic-amete-alem"/>
"ethiopic-amete-alem": "ethioaa",
// <type name="islamic-civil" [...] />
// <type name="islamicc" [...] deprecated="true" preferred="islamic-civil" alias="islamic-civil"/>
//
// "name" and "alias" for "islamic-civil" don't quite match of what's spec'ed in UTS 35, §3.2.1.
// Specifically following §3.2.1 to the letter means "islamicc" is the canonical value whereas
// "islamic-civil" is an alias value. Assume the definitions in
// https://unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files overrule UTS 35, §3.2.1.
"islamicc": "islamic-civil",
};
for (let [alias, name] of Object.entries(testData)) {
let tag = "und-u-ca-" + alias;
let canonical = "und-u-ca-" + name;
// Make sure the test data is correct.
assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(tag), false,
"\"" + tag + "\" isn't a canonical language tag.");
assert(isCanonicalizedStructurallyValidLanguageTag(canonical),
"\"" + canonical + "\" is a canonical and structurally valid language tag.");
let result = Intl.getCanonicalLocales(tag);
assert.sameValue(result.length, 1);
assert.sameValue(result[0], canonical);
}

View File

@ -0,0 +1,65 @@
// Copyright (C) 2020 André Bargull. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-intl.getcanonicallocales
description: >
Test Unicode extension subtag canonicalisation for the "ks" extension key.
info: |
8.2.1 Intl.getCanonicalLocales (locales)
1. Let ll be ? CanonicalizeLocaleList(locales).
2. Return CreateArrayFromList(ll).
9.2.1 CanonicalizeLocaleList (locales)
...
7. Repeat, while k < len
...
c. If kPresent is true, then
...
v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
...
UTS 35, §3.2.1 Canonical Unicode Locale Identifiers
Use the bcp47 data to replace keys, types, tfields, and tvalues by their canonical forms.
See Section 3.6.4 U Extension Data Files) and Section 3.7.1 T Extension Data Files. The
aliases are in the alias attribute value, while the canonical is in the name attribute value.
includes: [testIntl.js]
---*/
// <key name="ks" [...] alias="colStrength">/
const testData = {
// <type name="level1" [...] alias="primary"/>
"primary": "level1",
// "secondary" doesn't match |uvalue|, so we can skip it.
// <type name="level2" [...] alias="secondary"/>
// "secondary": "level2",
// <type name="level3" [...] alias="tertiary"/>
"tertiary": "level3",
// Neither "quaternary" nor "quarternary" match |uvalue|, so we can skip them.
// <type name="level4" [...] alias="quaternary quarternary"/>
// "quaternary": "level4",
// "quarternary": "level4",
// "identical" doesn't match |uvalue|, so we can skip it.
// <type name="identic" [...] alias="identical"/>
// "identical": "identic",
};
for (let [alias, name] of Object.entries(testData)) {
let tag = "und-u-ks-" + alias;
let canonical = "und-u-ks-" + name;
// Make sure the test data is correct.
assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(tag), false,
"\"" + tag + "\" isn't a canonical language tag.");
assert(isCanonicalizedStructurallyValidLanguageTag(canonical),
"\"" + canonical + "\" is a canonical and structurally valid language tag.");
let result = Intl.getCanonicalLocales(tag);
assert.sameValue(result.length, 1);
assert.sameValue(result[0], canonical);
}

View File

@ -0,0 +1,49 @@
// Copyright (C) 2020 André Bargull. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-intl.getcanonicallocales
description: >
Test Unicode extension subtag canonicalisation for the "ms" extension key.
info: |
8.2.1 Intl.getCanonicalLocales (locales)
1. Let ll be ? CanonicalizeLocaleList(locales).
2. Return CreateArrayFromList(ll).
9.2.1 CanonicalizeLocaleList (locales)
...
7. Repeat, while k < len
...
c. If kPresent is true, then
...
v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
...
UTS 35, §3.2.1 Canonical Unicode Locale Identifiers
Use the bcp47 data to replace keys, types, tfields, and tvalues by their canonical forms.
See Section 3.6.4 U Extension Data Files) and Section 3.7.1 T Extension Data Files. The
aliases are in the alias attribute value, while the canonical is in the name attribute value.
includes: [testIntl.js]
---*/
// <key name="ms" [...] alias="measure" since="28">
const testData = {
// <type name="uksystem" [...] alias="imperial" since="28" />
"imperial": "uksystem",
};
for (let [alias, name] of Object.entries(testData)) {
let tag = "und-u-ms-" + alias;
let canonical = "und-u-ms-" + name;
// Make sure the test data is correct.
assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(tag), false,
"\"" + tag + "\" isn't a canonical language tag.");
assert(isCanonicalizedStructurallyValidLanguageTag(canonical),
"\"" + canonical + "\" is a canonical and structurally valid language tag.");
let result = Intl.getCanonicalLocales(tag);
assert.sameValue(result.length, 1);
assert.sameValue(result[0], canonical);
}

View File

@ -0,0 +1,67 @@
// Copyright (C) 2020 André Bargull. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-intl.getcanonicallocales
description: >
Test Unicode extension subtag canonicalisation for the "rg" extension key.
info: |
8.2.1 Intl.getCanonicalLocales (locales)
1. Let ll be ? CanonicalizeLocaleList(locales).
2. Return CreateArrayFromList(ll).
9.2.1 CanonicalizeLocaleList (locales)
...
7. Repeat, while k < len
...
c. If kPresent is true, then
...
v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
...
UTS 35, §3.2.1 Canonical Unicode Locale Identifiers
Use the bcp47 data to replace keys, types, tfields, and tvalues by their canonical forms.
See Section 3.6.4 U Extension Data Files) and Section 3.7.1 T Extension Data Files. The
aliases are in the alias attribute value, while the canonical is in the name attribute value.
Replace aliases in special key values:
If there is an 'sd' or 'rg' key, replace any subdivision alias in its value in the same way,
using subdivisionAlias data.
includes: [testIntl.js]
---*/
const testData = {
// <subdivisionAlias type="no23" replacement="no50" reason="deprecated"/>
"no23": "no50",
// <subdivisionAlias type="cn11" replacement="cnbj" reason="deprecated"/>
"cn11": "cnbj",
// <subdivisionAlias type="cz10a" replacement="cz110" reason="deprecated"/>
"cz10a": "cz110",
// <subdivisionAlias type="fra" replacement="frges" reason="deprecated"/>
"fra": "frges",
// <subdivisionAlias type="frg" replacement="frges" reason="deprecated"/>
"frg": "frges",
// <subdivisionAlias type="lud" replacement="lucl ludi lurd luvd luwi" reason="deprecated"/>
"lud": "lucl",
};
for (let [alias, name] of Object.entries(testData)) {
let tag = "und-u-rg-" + alias;
let canonical = "und-u-rg-" + name;
// Make sure the test data is correct.
assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(tag), false,
"\"" + tag + "\" isn't a canonical language tag.");
assert(isCanonicalizedStructurallyValidLanguageTag(canonical),
"\"" + canonical + "\" is a canonical and structurally valid language tag.");
let result = Intl.getCanonicalLocales(tag);
assert.sameValue(result.length, 1);
assert.sameValue(result[0], canonical);
}

View File

@ -0,0 +1,72 @@
// Copyright (C) 2020 André Bargull. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-intl.getcanonicallocales
description: >
Test Unicode extension subtag canonicalisation for the "sd" extension key.
info: |
8.2.1 Intl.getCanonicalLocales (locales)
1. Let ll be ? CanonicalizeLocaleList(locales).
2. Return CreateArrayFromList(ll).
9.2.1 CanonicalizeLocaleList (locales)
...
7. Repeat, while k < len
...
c. If kPresent is true, then
...
v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
...
UTS 35, §3.2.1 Canonical Unicode Locale Identifiers
Use the bcp47 data to replace keys, types, tfields, and tvalues by their canonical forms.
See Section 3.6.4 U Extension Data Files) and Section 3.7.1 T Extension Data Files. The
aliases are in the alias attribute value, while the canonical is in the name attribute value.
Replace aliases in special key values:
If there is an 'sd' or 'rg' key, replace any subdivision alias in its value in the same way,
using subdivisionAlias data.
includes: [testIntl.js]
---*/
const testData = {
// <subdivisionAlias type="no23" replacement="no50" reason="deprecated"/>
"no23": "no50",
// <subdivisionAlias type="cn11" replacement="cnbj" reason="deprecated"/>
"cn11": "cnbj",
// <subdivisionAlias type="cz10a" replacement="cz110" reason="deprecated"/>
"cz10a": "cz110",
// <subdivisionAlias type="fra" replacement="frges" reason="deprecated"/>
"fra": "frges",
// <subdivisionAlias type="frg" replacement="frges" reason="deprecated"/>
"frg": "frges",
// <subdivisionAlias type="lud" replacement="lucl ludi lurd luvd luwi" reason="deprecated"/>
"lud": "lucl",
};
for (let [alias, name] of Object.entries(testData)) {
// Subdivision codes should always have a matching region subtag. This
// shouldn't actually matter for canonicalisation, but let's not push our
// luck and instead keep the language tag 'valid' per UTS 35, §3.6.5.
let region = name.substring(0, 2).toUpperCase();
let tag = `und-${region}-u-sd-${alias}`;
let canonical = `und-${region}-u-sd-${name}`;
// Make sure the test data is correct.
assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(tag), false,
"\"" + tag + "\" isn't a canonical language tag.");
assert(isCanonicalizedStructurallyValidLanguageTag(canonical),
"\"" + canonical + "\" is a canonical and structurally valid language tag.");
let result = Intl.getCanonicalLocales(tag);
assert.sameValue(result.length, 1);
assert.sameValue(result[0], canonical);
}

View File

@ -0,0 +1,72 @@
// Copyright (C) 2020 André Bargull. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-intl.getcanonicallocales
description: >
Test Unicode extension subtag canonicalisation for the "tz" extension key.
info: |
8.2.1 Intl.getCanonicalLocales (locales)
1. Let ll be ? CanonicalizeLocaleList(locales).
2. Return CreateArrayFromList(ll).
9.2.1 CanonicalizeLocaleList (locales)
...
7. Repeat, while k < len
...
c. If kPresent is true, then
...
v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
...
UTS 35, §3.2.1 Canonical Unicode Locale Identifiers
Use the bcp47 data to replace keys, types, tfields, and tvalues by their canonical forms.
See Section 3.6.4 U Extension Data Files) and Section 3.7.1 T Extension Data Files. The
aliases are in the alias attribute value, while the canonical is in the name attribute value.
includes: [testIntl.js]
---*/
// <key name="tz" [...] alias="timezone">
const testData = {
// Similar to the "ca" extension key, assume "preferred" holds the canonical
// value and "name" the alias value.
// <type name="cnckg" [...] deprecated="true" preferred="cnsha"/>
"cnckg": "cnsha",
// NB: "Eire" matches the |uvalue| production.
// <type name="iedub" [...] alias="Europe/Dublin Eire"/>
"eire": "iedub",
// NB: "EST" matches the |uvalue| production.
// <type name="utcw05" [...] alias="Etc/GMT+5 EST"/>
"est": "utcw05",
// NB: "GMT0" matches the |uvalue| production.
// <type name="gmt" [...] alias="Etc/GMT Etc/GMT+0 Etc/GMT-0 Etc/GMT0 Etc/Greenwich GMT GMT+0 GMT-0 GMT0 Greenwich"/>
"gmt0": "gmt",
// NB: "UCT" matches the |uvalue| production.
// <type name="utc" [...] alias="Etc/UTC Etc/UCT Etc/Universal Etc/Zulu UCT UTC Universal Zulu"/>
"uct": "utc",
// NB: "Zulu" matches the |uvalue| production.
// <type name="utc" [...] alias="Etc/UTC Etc/UCT Etc/Universal Etc/Zulu UCT UTC Universal Zulu"/>
"zulu": "utc",
};
for (let [alias, name] of Object.entries(testData)) {
let tag = "und-u-tz-" + alias;
let canonical = "und-u-tz-" + name;
// Make sure the test data is correct.
assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(tag), false,
"\"" + tag + "\" isn't a canonical language tag.");
assert(isCanonicalizedStructurallyValidLanguageTag(canonical),
"\"" + canonical + "\" is a canonical and structurally valid language tag.");
let result = Intl.getCanonicalLocales(tag);
assert.sameValue(result.length, 1);
assert.sameValue(result[0], canonical);
}

View File

@ -0,0 +1,86 @@
// Copyright (C) 2020 André Bargull. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-intl.getcanonicallocales
description: >
"kb", "kc", "kh", "kk", and "kn" Unicode extension keys canonicalise "yes" to "true".
info: |
8.2.1 Intl.getCanonicalLocales (locales)
1. Let ll be ? CanonicalizeLocaleList(locales).
2. Return CreateArrayFromList(ll).
9.2.1 CanonicalizeLocaleList (locales)
...
7. Repeat, while k < len
...
c. If kPresent is true, then
...
v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
...
UTS 35, §3.2.1 Canonical Unicode Locale Identifiers
Use the bcp47 data to replace keys, types, tfields, and tvalues by their canonical forms.
See Section 3.6.4 U Extension Data Files) and Section 3.7.1 T Extension Data Files. The
aliases are in the alias attribute value, while the canonical is in the name attribute value.
UTS 35, §3.2.1 Canonical Unicode Locale Identifiers
Any type or tfield value "true" is removed.
includes: [testIntl.js]
---*/
const unicodeKeys = [
// <key name="kb" [...] alias="colBackwards">
// <type name="true" [...] alias="yes"/>
"kb",
// <key name="kc" [...] alias="colCaseLevel">
// <type name="true" [...] alias="yes"/>
"kc",
// <key name="kh" [...] alias="colBackwards">
// <type name="true" [...] alias="yes"/>
"kh",
// <key name="kh" [...] alias="colHiraganaQuaternary">
// <type name="true" [...] alias="yes"/>
"kk",
// <key name="kn" [...] alias="colNumeric">
// <type name="true" [...] alias="yes"/>
"kn",
];
for (let key of unicodeKeys) {
let tag = `und-u-${key}-yes`;
let canonical = `und-u-${key}`;
// Make sure the test data is correct.
assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(tag), false,
"\"" + tag + "\" isn't a canonical language tag.");
assert(isCanonicalizedStructurallyValidLanguageTag(canonical),
"\"" + canonical + "\" is a canonical and structurally valid language tag.");
let result = Intl.getCanonicalLocales(tag);
assert.sameValue(result.length, 1);
assert.sameValue(result[0], canonical);
}
// Test some other Unicode extension keys which don't contain an alias entry to
// canonicalise "yes" to "true".
const otherUnicodeKeys = [
"ka", "kf", "kr", "ks", "kv",
];
for (let key of otherUnicodeKeys) {
let tag = `und-u-${key}-yes`;
// Make sure the test data is correct.
assert(isCanonicalizedStructurallyValidLanguageTag(tag),
"\"" + tag + "\" is a canonical and structurally valid language tag.");
let result = Intl.getCanonicalLocales(tag);
assert.sameValue(result.length, 1);
assert.sameValue(result[0], tag);
}

View File

@ -0,0 +1,54 @@
// Copyright (C) 2020 André Bargull. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-intl.getcanonicallocales
description: >
Test Unicode extension subtags where the ukey subtag contains a digit.
info: |
8.2.1 Intl.getCanonicalLocales (locales)
1. Let ll be ? CanonicalizeLocaleList(locales).
2. Return CreateArrayFromList(ll).
9.2.1 CanonicalizeLocaleList (locales)
...
7. Repeat, while k < len
...
c. If kPresent is true, then
...
v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
...
includes: [testIntl.js]
---*/
// Unicode locale extension sequences don't allow keys with a digit as their
// second character.
const invalidCases = [
"en-u-c0",
"en-u-00",
];
// The first character is allowed to be a digit.
const validCases = [
"en-u-0c",
];
for (let invalid of invalidCases) {
// Make sure the test data is correct.
assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(invalid), false,
"\"" + invalid + "\" isn't a structurally valid language tag.");
assert.throws(RangeError, () => Intl.getCanonicalLocales(invalid));
}
for (let valid of validCases) {
// Make sure the test data is correct.
assert(isCanonicalizedStructurallyValidLanguageTag(valid),
"\"" + valid + "\" is a canonical and structurally valid language tag.");
let result = Intl.getCanonicalLocales(valid);
assert.sameValue(result.length, 1);
assert.sameValue(result[0], valid);
}

View File

@ -0,0 +1,26 @@
// Copyright 2020 André Bargull. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-apply-options-to-tag
description: >
ApplyOptionsToTag canonicalises the language tag two times.
info: |
10.1.1 ApplyOptionsToTag( tag, options )
...
9. Set tag to CanonicalizeUnicodeLocaleId(tag).
10. If language is not undefined,
...
b. Set tag to tag with the substring corresponding to the unicode_language_subtag
production of the unicode_language_id replaced by the string language.
...
13. Return CanonicalizeUnicodeLocaleId(tag).
features: [Intl.Locale]
---*/
// ApplyOptionsToTag canonicalises the locale identifier before applying the
// options. That means "und-Armn-SU" is first canonicalised to "und-Armn-AM",
// then the language is changed to "ru". If "ru" were applied first, the result
// would be "ru-Armn-RU" instead.
assert.sameValue(new Intl.Locale("und-Armn-SU", {language: "ru"}).toString(), "ru-Armn-AM");

View File

@ -37,6 +37,10 @@ const testDataMaximal = {
"und-419": "es-Latn-419",
"und-150": "ru-Cyrl-RU",
"und-AT": "de-Latn-AT",
"und-Cyrl-RO": "bg-Cyrl-RO",
// Undefined primary language not required to change in all cases.
"und-AQ": "und-Latn-AQ",
};
const testDataMinimal = {
@ -62,6 +66,8 @@ const testDataMinimal = {
"es-Latn-419": "es-419",
"ru-Cyrl-RU": "ru",
"de-Latn-AT": "de-AT",
"bg-Cyrl-RO": "bg-RO",
"und-Latn-AQ": "und-AQ",
};
// Add variants, extensions, and privateuse subtags and ensure they don't

View File

@ -0,0 +1,49 @@
// Copyright 2020 André Bargull. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-Intl.Locale.prototype.minimize
description: >
The "Remove Likely Subtags" algorithm adds likely subtags before processing the locale.
info: |
Intl.Locale.prototype.minimize ()
3. Let minimal be the result of the Remove Likely Subtags algorithm applied to loc.[[Locale]].
If an error is signaled, set minimal to loc.[[Locale]].
UTS 35, §4.3 Likely Subtags
Remove Likely Subtags
1. First get max = AddLikelySubtags(inputLocale). If an error is signaled, return it.
2. ...
features: [Intl.Locale]
---*/
var testDataMinimal = {
// Undefined primary language.
"und": "en",
"und-Thai": "th",
"und-419": "es-419",
"und-150": "ru",
"und-AT": "de-AT",
// https://unicode-org.atlassian.net/browse/ICU-13786
"aae-Latn-IT": "aae-Latn-IT",
"aae-Thai-CO": "aae-Thai-CO",
// https://unicode-org.atlassian.net/browse/ICU-10220
// https://unicode-org.atlassian.net/browse/ICU-12345
"und-CW": "pap-CW",
"und-US": "en",
"zh-Hant": "zh-TW",
"zh-Hani": "zh-Hani",
};
for (const [tag, minimal] of Object.entries(testDataMinimal)) {
// Assert the |minimal| tag is indeed minimal.
assert.sameValue(new Intl.Locale(minimal).minimize().toString(), minimal,
`"${minimal}" should be minimal`);
// Assert RemoveLikelySubtags(tag) returns |minimal|.
assert.sameValue(new Intl.Locale(tag).minimize().toString(), minimal,
`"${tag}".minimize() should be "${minimal}"`);
}