mirror of https://github.com/tc39/test262.git
Upstream additional language tag canonicalisation and Intl.Locale tests from SpiderMonkey
Adds more canonicalisation and Intl.Locale tests from: - https://hg.mozilla.org/mozilla-central/file/tip/js/src/tests/non262/Intl/Locale - https://hg.mozilla.org/mozilla-central/file/tip/js/src/tests/non262/Intl/
This commit is contained in:
parent
5d8e6c8e85
commit
18bb955771
|
@ -0,0 +1,58 @@
|
|||
// Copyright (C) 2020 André Bargull. All rights reserved.
|
||||
// This code is governed by the BSD license found in the LICENSE file.
|
||||
|
||||
/*---
|
||||
esid: sec-intl.getcanonicallocales
|
||||
description: >
|
||||
Assert non-simple language subtag replacements work as expected.
|
||||
info: |
|
||||
8.2.1 Intl.getCanonicalLocales (locales)
|
||||
1. Let ll be ? CanonicalizeLocaleList(locales).
|
||||
2. Return CreateArrayFromList(ll).
|
||||
|
||||
9.2.1 CanonicalizeLocaleList (locales)
|
||||
...
|
||||
7. Repeat, while k < len
|
||||
...
|
||||
c. If kPresent is true, then
|
||||
...
|
||||
vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
|
||||
...
|
||||
|
||||
UTS 35, §3.2.1 Canonical Unicode Locale Identifiers
|
||||
|
||||
- Replace aliases in the unicode_language_id and tlang (if any) using the following process:
|
||||
- If the language subtag matches the type attribute of a languageAlias element in
|
||||
Supplemental Data, replace the language subtag with the replacement value.
|
||||
1. If there are additional subtags in the replacement value, add them to the result,
|
||||
but only if there is no corresponding subtag already in the tag.
|
||||
|
||||
includes: [testIntl.js]
|
||||
---*/
|
||||
|
||||
// CLDR contains language mappings where in addition to the language subtag also
|
||||
// the script or region subtag is modified, unless they're already present.
|
||||
|
||||
const testData = {
|
||||
// "sh" adds "Latn", unless a script subtag is already present.
|
||||
// <languageAlias type="sh" replacement="sr_Latn" reason="legacy"/>
|
||||
"sh": "sr-Latn",
|
||||
"sh-Cyrl": "sr-Cyrl",
|
||||
|
||||
// "cnr" adds "ME", unless a region subtag is already present.
|
||||
// <languageAlias type="cnr" replacement="sr_ME" reason="legacy"/>
|
||||
"cnr": "sr-ME",
|
||||
"cnr-BA": "sr-BA",
|
||||
};
|
||||
|
||||
for (let [tag, canonical] of Object.entries(testData)) {
|
||||
// Make sure the test data is correct.
|
||||
assert(
|
||||
isCanonicalizedStructurallyValidLanguageTag(canonical),
|
||||
"\"" + canonical + "\" is a canonicalized and structurally valid language tag."
|
||||
);
|
||||
|
||||
let result = Intl.getCanonicalLocales(tag);
|
||||
assert.sameValue(result.length, 1);
|
||||
assert.sameValue(result[0], canonical);
|
||||
}
|
|
@ -0,0 +1,108 @@
|
|||
// Copyright (C) 2020 André Bargull. All rights reserved.
|
||||
// This code is governed by the BSD license found in the LICENSE file.
|
||||
|
||||
/*---
|
||||
esid: sec-intl.getcanonicallocales
|
||||
description: >
|
||||
Assert non-simple region subtag replacements work as expected.
|
||||
info: |
|
||||
8.2.1 Intl.getCanonicalLocales (locales)
|
||||
1. Let ll be ? CanonicalizeLocaleList(locales).
|
||||
2. Return CreateArrayFromList(ll).
|
||||
|
||||
9.2.1 CanonicalizeLocaleList (locales)
|
||||
...
|
||||
7. Repeat, while k < len
|
||||
...
|
||||
c. If kPresent is true, then
|
||||
...
|
||||
vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
|
||||
...
|
||||
|
||||
UTS 35, §3.2.1 Canonical Unicode Locale Identifiers
|
||||
|
||||
- Replace aliases in the unicode_language_id and tlang (if any) using the following process:
|
||||
- If the region subtag matches the type attribute of a territoryAlias element in
|
||||
Supplemental Data, replace the language subtag with the replacement value, as follows:
|
||||
1. If there is a single territory in the replacement, use it.
|
||||
2. If there are multiple territories:
|
||||
1. Look up the most likely territory for the base language code (and script, if there is one).
|
||||
2. If that likely territory is in the list, use it.
|
||||
3. Otherwise, use the first territory in the list.
|
||||
|
||||
includes: [testIntl.js]
|
||||
---*/
|
||||
|
||||
// CLDR contains region mappings where the replacement region depends on the
|
||||
// likely subtags from the language and script subtags.
|
||||
|
||||
const testData = {
|
||||
// For example, the breakup of the Soviet Union ("SU") means that the region of
|
||||
// the Soviet Union ("SU") is replaced by Russia ("RU"), Armenia ("AM"), or
|
||||
// many others -- depending on the specified (or merely likely) language and
|
||||
// script subtags:
|
||||
//
|
||||
// <territoryAlias type="SU" replacement="RU AM AZ BY EE GE KZ KG LV LT MD TJ TM UA UZ" reason="deprecated"/>
|
||||
// <territoryAlias type="810" replacement="RU AM AZ BY EE GE KZ KG LV LT MD TJ TM UA UZ" reason="overlong"/>
|
||||
"ru-SU": "ru-RU",
|
||||
"ru-810": "ru-RU",
|
||||
"en-SU": "en-RU",
|
||||
"en-810": "en-RU",
|
||||
"und-SU": "und-RU",
|
||||
"und-810": "und-RU",
|
||||
"und-Latn-SU": "und-Latn-RU",
|
||||
"und-Latn-810": "und-Latn-RU",
|
||||
|
||||
// Armenia can be the preferred region when the language is "hy" (Armenian) or
|
||||
// the script is "Armn" (Armenian).
|
||||
//
|
||||
// <likelySubtag from="hy" to="hy_Armn_AM"/>
|
||||
// <likelySubtag from="und_Armn" to="hy_Armn_AM"/>
|
||||
"hy-SU": "hy-AM",
|
||||
"hy-810": "hy-AM",
|
||||
"und-Armn-SU": "und-Armn-AM",
|
||||
"und-Armn-810": "und-Armn-AM",
|
||||
|
||||
// <territoryAlias type="CS" replacement="RS ME" reason="deprecated"/>
|
||||
//
|
||||
// The following likely-subtags entries contain "RS" and "ME":
|
||||
//
|
||||
// <likelySubtag from="sr" to="sr_Cyrl_RS"/>
|
||||
// <likelySubtag from="sr_ME" to="sr_Latn_ME"/>
|
||||
// <likelySubtag from="und_RS" to="sr_Cyrl_RS"/>
|
||||
// <likelySubtag from="und_ME" to="sr_Latn_ME"/>
|
||||
//
|
||||
// In this case there is no language/script combination (without a region
|
||||
// subtag) where "ME" is ever chosen, so the replacement is always "RS".
|
||||
"sr-CS": "sr-RS",
|
||||
"sr-Latn-CS": "sr-Latn-RS",
|
||||
"sr-Cyrl-CS": "sr-Cyrl-RS",
|
||||
|
||||
// The existing region in the source locale identifier is ignored when selecting
|
||||
// the likely replacement region. For example take "az-NT", which is Azerbaijani
|
||||
// spoken in the Neutral Zone. The replacement region for "NT" is either
|
||||
// "SA" (Saudi-Arabia) or "IQ" (Iraq), and there is also a likely subtags entry
|
||||
// for "az-IQ". But when only looking at the language subtag in "az-NT", "az" is
|
||||
// always resolved to "az-Latn-AZ", and because "AZ" is not in the list ["SA",
|
||||
// "IQ"], the final replacement region is the default for "NT", namely "SA".
|
||||
// That means "az-NT" will be canonicalised to "az-SA" and not "az-IQ", even
|
||||
// though the latter may be a more sensible candidate based on the actual usage
|
||||
// of the target locales.
|
||||
//
|
||||
// <territoryAlias type="NT" replacement="SA IQ" reason="deprecated"/>
|
||||
// <likelySubtag from="az_IQ" to="az_Arab_IQ"/>
|
||||
// <likelySubtag from="az" to="az_Latn_AZ"/>
|
||||
"az-NT": "az-SA",
|
||||
};
|
||||
|
||||
for (let [tag, canonical] of Object.entries(testData)) {
|
||||
// Make sure the test data is correct.
|
||||
assert(
|
||||
isCanonicalizedStructurallyValidLanguageTag(canonical),
|
||||
"\"" + canonical + "\" is a canonicalized and structurally valid language tag."
|
||||
);
|
||||
|
||||
let result = Intl.getCanonicalLocales(tag);
|
||||
assert.sameValue(result.length, 1);
|
||||
assert.sameValue(result[0], canonical);
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
// Copyright (C) 2020 André Bargull. All rights reserved.
|
||||
// This code is governed by the BSD license found in the LICENSE file.
|
||||
|
||||
/*---
|
||||
esid: sec-intl.getcanonicallocales
|
||||
description: >
|
||||
Test canonicalisation within transformed extension subtags.
|
||||
info: |
|
||||
8.2.1 Intl.getCanonicalLocales (locales)
|
||||
1. Let ll be ? CanonicalizeLocaleList(locales).
|
||||
2. Return CreateArrayFromList(ll).
|
||||
|
||||
9.2.1 CanonicalizeLocaleList (locales)
|
||||
...
|
||||
7. Repeat, while k < len
|
||||
...
|
||||
c. If kPresent is true, then
|
||||
...
|
||||
v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
|
||||
vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
|
||||
...
|
||||
|
||||
includes: [testIntl.js]
|
||||
---*/
|
||||
|
||||
const testData = {
|
||||
// Variant subtags are alphabetically ordered.
|
||||
"sl-t-sl-rozaj-biske-1994": "sl-t-sl-1994-biske-rozaj",
|
||||
|
||||
// tfield subtags are alphabetically ordered.
|
||||
// (Also tests subtag case normalisation.)
|
||||
"DE-T-M0-DIN-K0-QWERTZ": "de-t-k0-qwertz-m0-din",
|
||||
|
||||
// "true" tvalue subtags aren't removed.
|
||||
// (UTS 35 version 36, §3.2.1 claims otherwise, but tkey must be followed by
|
||||
// tvalue, so that's likely a spec bug in UTS 35.)
|
||||
"en-t-m0-true": "en-t-m0-true",
|
||||
|
||||
// tlang subtags are canonicalised.
|
||||
"en-t-iw": "en-t-he",
|
||||
|
||||
// Deprecated tvalue subtags are replaced by their preferred value.
|
||||
"und-Latn-t-und-hani-m0-names": "und-Latn-t-und-hani-m0-prprname",
|
||||
};
|
||||
|
||||
for (let [tag, canonical] of Object.entries(testData)) {
|
||||
// Make sure the test data is correct.
|
||||
assert(isCanonicalizedStructurallyValidLanguageTag(canonical),
|
||||
"\"" + canonical + "\" is a canonical and structurally valid language tag.");
|
||||
|
||||
let result = Intl.getCanonicalLocales(tag);
|
||||
assert.sameValue(result.length, 1);
|
||||
assert.sameValue(result[0], canonical);
|
||||
}
|
|
@ -0,0 +1,78 @@
|
|||
// Copyright (C) 2020 André Bargull. All rights reserved.
|
||||
// This code is governed by the BSD license found in the LICENSE file.
|
||||
|
||||
/*---
|
||||
esid: sec-intl.getcanonicallocales
|
||||
description: >
|
||||
A RangeError is thrown when a language tag includes an invalid transformed extension subtag.
|
||||
info: |
|
||||
8.2.1 Intl.getCanonicalLocales (locales)
|
||||
1. Let ll be ? CanonicalizeLocaleList(locales).
|
||||
2. Return CreateArrayFromList(ll).
|
||||
|
||||
9.2.1 CanonicalizeLocaleList (locales)
|
||||
...
|
||||
7. Repeat, while k < len
|
||||
...
|
||||
c. If kPresent is true, then
|
||||
...
|
||||
v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
|
||||
...
|
||||
|
||||
includes: [testIntl.js]
|
||||
---*/
|
||||
|
||||
const invalid = [
|
||||
// empty
|
||||
"en-t",
|
||||
"en-t-a",
|
||||
"en-t-x",
|
||||
"en-t-0",
|
||||
|
||||
// incomplete
|
||||
"en-t-",
|
||||
"en-t-en-",
|
||||
"en-t-0x-",
|
||||
|
||||
// tlang: unicode_language_subtag must be 2-3 or 5-8 characters and mustn't
|
||||
// contain extlang subtags.
|
||||
"en-t-root",
|
||||
"en-t-abcdefghi",
|
||||
"en-t-ar-aao",
|
||||
|
||||
// tlang: unicode_script_subtag must be 4 alphabetical characters, can't
|
||||
// be repeated.
|
||||
"en-t-en-lat0",
|
||||
"en-t-en-latn-latn",
|
||||
|
||||
// tlang: unicode_region_subtag must either be 2 alpha characters or a three
|
||||
// digit code.
|
||||
"en-t-en-0",
|
||||
"en-t-en-00",
|
||||
"en-t-en-0x",
|
||||
"en-t-en-x0",
|
||||
"en-t-en-latn-0",
|
||||
"en-t-en-latn-00",
|
||||
"en-t-en-latn-xyz",
|
||||
|
||||
// tlang: unicode_variant_subtag is either 5-8 alphanum characters or 4
|
||||
// characters starting with a digit.
|
||||
"en-t-en-abcdefghi",
|
||||
"en-t-en-latn-gb-ab",
|
||||
"en-t-en-latn-gb-abc",
|
||||
"en-t-en-latn-gb-abcd",
|
||||
"en-t-en-latn-gb-abcdefghi",
|
||||
|
||||
// tkey must be followed by tvalue.
|
||||
"en-t-d0",
|
||||
"en-t-d0-m0",
|
||||
"en-t-d0-x-private",
|
||||
];
|
||||
|
||||
for (let tag of invalid) {
|
||||
// Make sure the test data is correct.
|
||||
assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(tag), false,
|
||||
"\"" + tag + "\" isn't a structurally valid language tag.");
|
||||
|
||||
assert.throws(RangeError, () => Intl.getCanonicalLocales(tag), `${tag}`);
|
||||
}
|
|
@ -0,0 +1,78 @@
|
|||
// Copyright (C) 2020 André Bargull. All rights reserved.
|
||||
// This code is governed by the BSD license found in the LICENSE file.
|
||||
|
||||
/*---
|
||||
esid: sec-intl.getcanonicallocales
|
||||
description: >
|
||||
No RangeError is thrown when a language tag includes a valid transformed extension subtag.
|
||||
info: |
|
||||
8.2.1 Intl.getCanonicalLocales (locales)
|
||||
1. Let ll be ? CanonicalizeLocaleList(locales).
|
||||
2. Return CreateArrayFromList(ll).
|
||||
|
||||
9.2.1 CanonicalizeLocaleList (locales)
|
||||
...
|
||||
7. Repeat, while k < len
|
||||
...
|
||||
c. If kPresent is true, then
|
||||
...
|
||||
v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
|
||||
vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
|
||||
...
|
||||
|
||||
includes: [testIntl.js]
|
||||
---*/
|
||||
|
||||
const valid = [
|
||||
// tlang with unicode_language_subtag.
|
||||
"en-t-en",
|
||||
|
||||
// tlang with unicode_script_subtag.
|
||||
"en-t-en-latn",
|
||||
|
||||
// tlang with unicode_region_subtag.
|
||||
"en-t-en-ca",
|
||||
|
||||
// tlang with unicode_script_subtag and unicode_region_subtag.
|
||||
"en-t-en-latn-ca",
|
||||
|
||||
// tlang with unicode_variant_subtag.
|
||||
"en-t-en-emodeng",
|
||||
|
||||
// tlang with unicode_script_subtag and unicode_variant_subtag.
|
||||
"en-t-en-latn-emodeng",
|
||||
|
||||
// tlang with unicode_script_subtag and unicode_variant_subtag.
|
||||
"en-t-en-ca-emodeng",
|
||||
|
||||
// tlang with unicode_script_subtag, unicode_region_subtag, and unicode_variant_subtag.
|
||||
"en-t-en-latn-ca-emodeng",
|
||||
|
||||
// No tlang. (Must contain at least one tfield.)
|
||||
"en-t-d0-ascii",
|
||||
];
|
||||
|
||||
const extraFields = [
|
||||
// No extra tfield
|
||||
"",
|
||||
|
||||
// tfield with a tvalue consisting of a single subtag.
|
||||
"-i0-handwrit",
|
||||
|
||||
// tfield with a tvalue consisting of two subtags.
|
||||
"-s0-accents-publish",
|
||||
];
|
||||
|
||||
for (let tag of valid) {
|
||||
for (let extra of extraFields) {
|
||||
let actualTag = tag + extra;
|
||||
|
||||
// Make sure the test data is correct.
|
||||
assert(isCanonicalizedStructurallyValidLanguageTag(actualTag),
|
||||
"\"" + actualTag + "\" is a canonical and structurally valid language tag.");
|
||||
|
||||
let result = Intl.getCanonicalLocales(actualTag);
|
||||
assert.sameValue(result.length, 1);
|
||||
assert.sameValue(result[0], actualTag);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,58 @@
|
|||
// Copyright (C) 2020 André Bargull. All rights reserved.
|
||||
// This code is governed by the BSD license found in the LICENSE file.
|
||||
|
||||
/*---
|
||||
esid: sec-intl.getcanonicallocales
|
||||
description: >
|
||||
Test Unicode extension subtag canonicalisation for the "ca" extension key.
|
||||
info: |
|
||||
8.2.1 Intl.getCanonicalLocales (locales)
|
||||
1. Let ll be ? CanonicalizeLocaleList(locales).
|
||||
2. Return CreateArrayFromList(ll).
|
||||
|
||||
9.2.1 CanonicalizeLocaleList (locales)
|
||||
...
|
||||
7. Repeat, while k < len
|
||||
...
|
||||
c. If kPresent is true, then
|
||||
...
|
||||
v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
|
||||
vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
|
||||
...
|
||||
|
||||
UTS 35, §3.2.1 Canonical Unicode Locale Identifiers
|
||||
Use the bcp47 data to replace keys, types, tfields, and tvalues by their canonical forms.
|
||||
See Section 3.6.4 U Extension Data Files) and Section 3.7.1 T Extension Data Files. The
|
||||
aliases are in the alias attribute value, while the canonical is in the name attribute value.
|
||||
includes: [testIntl.js]
|
||||
---*/
|
||||
|
||||
// <key name="ca" [...] alias="calendar">
|
||||
const testData = {
|
||||
// <type name="ethioaa" [...] alias="ethiopic-amete-alem"/>
|
||||
"ethiopic-amete-alem": "ethioaa",
|
||||
|
||||
// <type name="islamic-civil" [...] />
|
||||
// <type name="islamicc" [...] deprecated="true" preferred="islamic-civil" alias="islamic-civil"/>
|
||||
//
|
||||
// "name" and "alias" for "islamic-civil" don't quite match of what's spec'ed in UTS 35, §3.2.1.
|
||||
// Specifically following §3.2.1 to the letter means "islamicc" is the canonical value whereas
|
||||
// "islamic-civil" is an alias value. Assume the definitions in
|
||||
// https://unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files overrule UTS 35, §3.2.1.
|
||||
"islamicc": "islamic-civil",
|
||||
};
|
||||
|
||||
for (let [alias, name] of Object.entries(testData)) {
|
||||
let tag = "und-u-ca-" + alias;
|
||||
let canonical = "und-u-ca-" + name;
|
||||
|
||||
// Make sure the test data is correct.
|
||||
assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(tag), false,
|
||||
"\"" + tag + "\" isn't a canonical language tag.");
|
||||
assert(isCanonicalizedStructurallyValidLanguageTag(canonical),
|
||||
"\"" + canonical + "\" is a canonical and structurally valid language tag.");
|
||||
|
||||
let result = Intl.getCanonicalLocales(tag);
|
||||
assert.sameValue(result.length, 1);
|
||||
assert.sameValue(result[0], canonical);
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
// Copyright (C) 2020 André Bargull. All rights reserved.
|
||||
// This code is governed by the BSD license found in the LICENSE file.
|
||||
|
||||
/*---
|
||||
esid: sec-intl.getcanonicallocales
|
||||
description: >
|
||||
Test Unicode extension subtag canonicalisation for the "ks" extension key.
|
||||
info: |
|
||||
8.2.1 Intl.getCanonicalLocales (locales)
|
||||
1. Let ll be ? CanonicalizeLocaleList(locales).
|
||||
2. Return CreateArrayFromList(ll).
|
||||
|
||||
9.2.1 CanonicalizeLocaleList (locales)
|
||||
...
|
||||
7. Repeat, while k < len
|
||||
...
|
||||
c. If kPresent is true, then
|
||||
...
|
||||
v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
|
||||
vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
|
||||
...
|
||||
|
||||
UTS 35, §3.2.1 Canonical Unicode Locale Identifiers
|
||||
Use the bcp47 data to replace keys, types, tfields, and tvalues by their canonical forms.
|
||||
See Section 3.6.4 U Extension Data Files) and Section 3.7.1 T Extension Data Files. The
|
||||
aliases are in the alias attribute value, while the canonical is in the name attribute value.
|
||||
includes: [testIntl.js]
|
||||
---*/
|
||||
|
||||
// <key name="ks" [...] alias="colStrength">/
|
||||
const testData = {
|
||||
// <type name="level1" [...] alias="primary"/>
|
||||
"primary": "level1",
|
||||
|
||||
// "secondary" doesn't match |uvalue|, so we can skip it.
|
||||
// <type name="level2" [...] alias="secondary"/>
|
||||
// "secondary": "level2",
|
||||
|
||||
// <type name="level3" [...] alias="tertiary"/>
|
||||
"tertiary": "level3",
|
||||
|
||||
// Neither "quaternary" nor "quarternary" match |uvalue|, so we can skip them.
|
||||
// <type name="level4" [...] alias="quaternary quarternary"/>
|
||||
// "quaternary": "level4",
|
||||
// "quarternary": "level4",
|
||||
|
||||
// "identical" doesn't match |uvalue|, so we can skip it.
|
||||
// <type name="identic" [...] alias="identical"/>
|
||||
// "identical": "identic",
|
||||
};
|
||||
|
||||
for (let [alias, name] of Object.entries(testData)) {
|
||||
let tag = "und-u-ks-" + alias;
|
||||
let canonical = "und-u-ks-" + name;
|
||||
|
||||
// Make sure the test data is correct.
|
||||
assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(tag), false,
|
||||
"\"" + tag + "\" isn't a canonical language tag.");
|
||||
assert(isCanonicalizedStructurallyValidLanguageTag(canonical),
|
||||
"\"" + canonical + "\" is a canonical and structurally valid language tag.");
|
||||
|
||||
let result = Intl.getCanonicalLocales(tag);
|
||||
assert.sameValue(result.length, 1);
|
||||
assert.sameValue(result[0], canonical);
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
// Copyright (C) 2020 André Bargull. All rights reserved.
|
||||
// This code is governed by the BSD license found in the LICENSE file.
|
||||
|
||||
/*---
|
||||
esid: sec-intl.getcanonicallocales
|
||||
description: >
|
||||
Test Unicode extension subtag canonicalisation for the "ms" extension key.
|
||||
info: |
|
||||
8.2.1 Intl.getCanonicalLocales (locales)
|
||||
1. Let ll be ? CanonicalizeLocaleList(locales).
|
||||
2. Return CreateArrayFromList(ll).
|
||||
|
||||
9.2.1 CanonicalizeLocaleList (locales)
|
||||
...
|
||||
7. Repeat, while k < len
|
||||
...
|
||||
c. If kPresent is true, then
|
||||
...
|
||||
v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
|
||||
vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
|
||||
...
|
||||
|
||||
UTS 35, §3.2.1 Canonical Unicode Locale Identifiers
|
||||
Use the bcp47 data to replace keys, types, tfields, and tvalues by their canonical forms.
|
||||
See Section 3.6.4 U Extension Data Files) and Section 3.7.1 T Extension Data Files. The
|
||||
aliases are in the alias attribute value, while the canonical is in the name attribute value.
|
||||
includes: [testIntl.js]
|
||||
---*/
|
||||
|
||||
// <key name="ms" [...] alias="measure" since="28">
|
||||
const testData = {
|
||||
// <type name="uksystem" [...] alias="imperial" since="28" />
|
||||
"imperial": "uksystem",
|
||||
};
|
||||
|
||||
for (let [alias, name] of Object.entries(testData)) {
|
||||
let tag = "und-u-ms-" + alias;
|
||||
let canonical = "und-u-ms-" + name;
|
||||
|
||||
// Make sure the test data is correct.
|
||||
assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(tag), false,
|
||||
"\"" + tag + "\" isn't a canonical language tag.");
|
||||
assert(isCanonicalizedStructurallyValidLanguageTag(canonical),
|
||||
"\"" + canonical + "\" is a canonical and structurally valid language tag.");
|
||||
|
||||
let result = Intl.getCanonicalLocales(tag);
|
||||
assert.sameValue(result.length, 1);
|
||||
assert.sameValue(result[0], canonical);
|
||||
}
|
|
@ -0,0 +1,67 @@
|
|||
// Copyright (C) 2020 André Bargull. All rights reserved.
|
||||
// This code is governed by the BSD license found in the LICENSE file.
|
||||
|
||||
/*---
|
||||
esid: sec-intl.getcanonicallocales
|
||||
description: >
|
||||
Test Unicode extension subtag canonicalisation for the "rg" extension key.
|
||||
info: |
|
||||
8.2.1 Intl.getCanonicalLocales (locales)
|
||||
1. Let ll be ? CanonicalizeLocaleList(locales).
|
||||
2. Return CreateArrayFromList(ll).
|
||||
|
||||
9.2.1 CanonicalizeLocaleList (locales)
|
||||
...
|
||||
7. Repeat, while k < len
|
||||
...
|
||||
c. If kPresent is true, then
|
||||
...
|
||||
v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
|
||||
vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
|
||||
...
|
||||
|
||||
UTS 35, §3.2.1 Canonical Unicode Locale Identifiers
|
||||
Use the bcp47 data to replace keys, types, tfields, and tvalues by their canonical forms.
|
||||
See Section 3.6.4 U Extension Data Files) and Section 3.7.1 T Extension Data Files. The
|
||||
aliases are in the alias attribute value, while the canonical is in the name attribute value.
|
||||
|
||||
Replace aliases in special key values:
|
||||
If there is an 'sd' or 'rg' key, replace any subdivision alias in its value in the same way,
|
||||
using subdivisionAlias data.
|
||||
includes: [testIntl.js]
|
||||
---*/
|
||||
|
||||
const testData = {
|
||||
// <subdivisionAlias type="no23" replacement="no50" reason="deprecated"/>
|
||||
"no23": "no50",
|
||||
|
||||
// <subdivisionAlias type="cn11" replacement="cnbj" reason="deprecated"/>
|
||||
"cn11": "cnbj",
|
||||
|
||||
// <subdivisionAlias type="cz10a" replacement="cz110" reason="deprecated"/>
|
||||
"cz10a": "cz110",
|
||||
|
||||
// <subdivisionAlias type="fra" replacement="frges" reason="deprecated"/>
|
||||
"fra": "frges",
|
||||
|
||||
// <subdivisionAlias type="frg" replacement="frges" reason="deprecated"/>
|
||||
"frg": "frges",
|
||||
|
||||
// <subdivisionAlias type="lud" replacement="lucl ludi lurd luvd luwi" reason="deprecated"/>
|
||||
"lud": "lucl",
|
||||
};
|
||||
|
||||
for (let [alias, name] of Object.entries(testData)) {
|
||||
let tag = "und-u-rg-" + alias;
|
||||
let canonical = "und-u-rg-" + name;
|
||||
|
||||
// Make sure the test data is correct.
|
||||
assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(tag), false,
|
||||
"\"" + tag + "\" isn't a canonical language tag.");
|
||||
assert(isCanonicalizedStructurallyValidLanguageTag(canonical),
|
||||
"\"" + canonical + "\" is a canonical and structurally valid language tag.");
|
||||
|
||||
let result = Intl.getCanonicalLocales(tag);
|
||||
assert.sameValue(result.length, 1);
|
||||
assert.sameValue(result[0], canonical);
|
||||
}
|
|
@ -0,0 +1,72 @@
|
|||
// Copyright (C) 2020 André Bargull. All rights reserved.
|
||||
// This code is governed by the BSD license found in the LICENSE file.
|
||||
|
||||
/*---
|
||||
esid: sec-intl.getcanonicallocales
|
||||
description: >
|
||||
Test Unicode extension subtag canonicalisation for the "sd" extension key.
|
||||
info: |
|
||||
8.2.1 Intl.getCanonicalLocales (locales)
|
||||
1. Let ll be ? CanonicalizeLocaleList(locales).
|
||||
2. Return CreateArrayFromList(ll).
|
||||
|
||||
9.2.1 CanonicalizeLocaleList (locales)
|
||||
...
|
||||
7. Repeat, while k < len
|
||||
...
|
||||
c. If kPresent is true, then
|
||||
...
|
||||
v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
|
||||
vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
|
||||
...
|
||||
|
||||
UTS 35, §3.2.1 Canonical Unicode Locale Identifiers
|
||||
Use the bcp47 data to replace keys, types, tfields, and tvalues by their canonical forms.
|
||||
See Section 3.6.4 U Extension Data Files) and Section 3.7.1 T Extension Data Files. The
|
||||
aliases are in the alias attribute value, while the canonical is in the name attribute value.
|
||||
|
||||
Replace aliases in special key values:
|
||||
If there is an 'sd' or 'rg' key, replace any subdivision alias in its value in the same way,
|
||||
using subdivisionAlias data.
|
||||
includes: [testIntl.js]
|
||||
---*/
|
||||
|
||||
const testData = {
|
||||
// <subdivisionAlias type="no23" replacement="no50" reason="deprecated"/>
|
||||
"no23": "no50",
|
||||
|
||||
// <subdivisionAlias type="cn11" replacement="cnbj" reason="deprecated"/>
|
||||
"cn11": "cnbj",
|
||||
|
||||
// <subdivisionAlias type="cz10a" replacement="cz110" reason="deprecated"/>
|
||||
"cz10a": "cz110",
|
||||
|
||||
// <subdivisionAlias type="fra" replacement="frges" reason="deprecated"/>
|
||||
"fra": "frges",
|
||||
|
||||
// <subdivisionAlias type="frg" replacement="frges" reason="deprecated"/>
|
||||
"frg": "frges",
|
||||
|
||||
// <subdivisionAlias type="lud" replacement="lucl ludi lurd luvd luwi" reason="deprecated"/>
|
||||
"lud": "lucl",
|
||||
};
|
||||
|
||||
for (let [alias, name] of Object.entries(testData)) {
|
||||
// Subdivision codes should always have a matching region subtag. This
|
||||
// shouldn't actually matter for canonicalisation, but let's not push our
|
||||
// luck and instead keep the language tag 'valid' per UTS 35, §3.6.5.
|
||||
let region = name.substring(0, 2).toUpperCase();
|
||||
|
||||
let tag = `und-${region}-u-sd-${alias}`;
|
||||
let canonical = `und-${region}-u-sd-${name}`;
|
||||
|
||||
// Make sure the test data is correct.
|
||||
assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(tag), false,
|
||||
"\"" + tag + "\" isn't a canonical language tag.");
|
||||
assert(isCanonicalizedStructurallyValidLanguageTag(canonical),
|
||||
"\"" + canonical + "\" is a canonical and structurally valid language tag.");
|
||||
|
||||
let result = Intl.getCanonicalLocales(tag);
|
||||
assert.sameValue(result.length, 1);
|
||||
assert.sameValue(result[0], canonical);
|
||||
}
|
|
@ -0,0 +1,72 @@
|
|||
// Copyright (C) 2020 André Bargull. All rights reserved.
|
||||
// This code is governed by the BSD license found in the LICENSE file.
|
||||
|
||||
/*---
|
||||
esid: sec-intl.getcanonicallocales
|
||||
description: >
|
||||
Test Unicode extension subtag canonicalisation for the "tz" extension key.
|
||||
info: |
|
||||
8.2.1 Intl.getCanonicalLocales (locales)
|
||||
1. Let ll be ? CanonicalizeLocaleList(locales).
|
||||
2. Return CreateArrayFromList(ll).
|
||||
|
||||
9.2.1 CanonicalizeLocaleList (locales)
|
||||
...
|
||||
7. Repeat, while k < len
|
||||
...
|
||||
c. If kPresent is true, then
|
||||
...
|
||||
v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
|
||||
vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
|
||||
...
|
||||
|
||||
UTS 35, §3.2.1 Canonical Unicode Locale Identifiers
|
||||
Use the bcp47 data to replace keys, types, tfields, and tvalues by their canonical forms.
|
||||
See Section 3.6.4 U Extension Data Files) and Section 3.7.1 T Extension Data Files. The
|
||||
aliases are in the alias attribute value, while the canonical is in the name attribute value.
|
||||
includes: [testIntl.js]
|
||||
---*/
|
||||
|
||||
// <key name="tz" [...] alias="timezone">
|
||||
const testData = {
|
||||
// Similar to the "ca" extension key, assume "preferred" holds the canonical
|
||||
// value and "name" the alias value.
|
||||
|
||||
// <type name="cnckg" [...] deprecated="true" preferred="cnsha"/>
|
||||
"cnckg": "cnsha",
|
||||
|
||||
// NB: "Eire" matches the |uvalue| production.
|
||||
// <type name="iedub" [...] alias="Europe/Dublin Eire"/>
|
||||
"eire": "iedub",
|
||||
|
||||
// NB: "EST" matches the |uvalue| production.
|
||||
// <type name="utcw05" [...] alias="Etc/GMT+5 EST"/>
|
||||
"est": "utcw05",
|
||||
|
||||
// NB: "GMT0" matches the |uvalue| production.
|
||||
// <type name="gmt" [...] alias="Etc/GMT Etc/GMT+0 Etc/GMT-0 Etc/GMT0 Etc/Greenwich GMT GMT+0 GMT-0 GMT0 Greenwich"/>
|
||||
"gmt0": "gmt",
|
||||
|
||||
// NB: "UCT" matches the |uvalue| production.
|
||||
// <type name="utc" [...] alias="Etc/UTC Etc/UCT Etc/Universal Etc/Zulu UCT UTC Universal Zulu"/>
|
||||
"uct": "utc",
|
||||
|
||||
// NB: "Zulu" matches the |uvalue| production.
|
||||
// <type name="utc" [...] alias="Etc/UTC Etc/UCT Etc/Universal Etc/Zulu UCT UTC Universal Zulu"/>
|
||||
"zulu": "utc",
|
||||
};
|
||||
|
||||
for (let [alias, name] of Object.entries(testData)) {
|
||||
let tag = "und-u-tz-" + alias;
|
||||
let canonical = "und-u-tz-" + name;
|
||||
|
||||
// Make sure the test data is correct.
|
||||
assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(tag), false,
|
||||
"\"" + tag + "\" isn't a canonical language tag.");
|
||||
assert(isCanonicalizedStructurallyValidLanguageTag(canonical),
|
||||
"\"" + canonical + "\" is a canonical and structurally valid language tag.");
|
||||
|
||||
let result = Intl.getCanonicalLocales(tag);
|
||||
assert.sameValue(result.length, 1);
|
||||
assert.sameValue(result[0], canonical);
|
||||
}
|
|
@ -0,0 +1,86 @@
|
|||
// Copyright (C) 2020 André Bargull. All rights reserved.
|
||||
// This code is governed by the BSD license found in the LICENSE file.
|
||||
|
||||
/*---
|
||||
esid: sec-intl.getcanonicallocales
|
||||
description: >
|
||||
"kb", "kc", "kh", "kk", and "kn" Unicode extension keys canonicalise "yes" to "true".
|
||||
info: |
|
||||
8.2.1 Intl.getCanonicalLocales (locales)
|
||||
1. Let ll be ? CanonicalizeLocaleList(locales).
|
||||
2. Return CreateArrayFromList(ll).
|
||||
|
||||
9.2.1 CanonicalizeLocaleList (locales)
|
||||
...
|
||||
7. Repeat, while k < len
|
||||
...
|
||||
c. If kPresent is true, then
|
||||
...
|
||||
v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
|
||||
vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
|
||||
...
|
||||
|
||||
UTS 35, §3.2.1 Canonical Unicode Locale Identifiers
|
||||
Use the bcp47 data to replace keys, types, tfields, and tvalues by their canonical forms.
|
||||
See Section 3.6.4 U Extension Data Files) and Section 3.7.1 T Extension Data Files. The
|
||||
aliases are in the alias attribute value, while the canonical is in the name attribute value.
|
||||
|
||||
UTS 35, §3.2.1 Canonical Unicode Locale Identifiers
|
||||
Any type or tfield value "true" is removed.
|
||||
includes: [testIntl.js]
|
||||
---*/
|
||||
|
||||
const unicodeKeys = [
|
||||
// <key name="kb" [...] alias="colBackwards">
|
||||
// <type name="true" [...] alias="yes"/>
|
||||
"kb",
|
||||
|
||||
// <key name="kc" [...] alias="colCaseLevel">
|
||||
// <type name="true" [...] alias="yes"/>
|
||||
"kc",
|
||||
|
||||
// <key name="kh" [...] alias="colBackwards">
|
||||
// <type name="true" [...] alias="yes"/>
|
||||
"kh",
|
||||
|
||||
// <key name="kh" [...] alias="colHiraganaQuaternary">
|
||||
// <type name="true" [...] alias="yes"/>
|
||||
"kk",
|
||||
|
||||
// <key name="kn" [...] alias="colNumeric">
|
||||
// <type name="true" [...] alias="yes"/>
|
||||
"kn",
|
||||
];
|
||||
|
||||
for (let key of unicodeKeys) {
|
||||
let tag = `und-u-${key}-yes`;
|
||||
let canonical = `und-u-${key}`;
|
||||
|
||||
// Make sure the test data is correct.
|
||||
assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(tag), false,
|
||||
"\"" + tag + "\" isn't a canonical language tag.");
|
||||
assert(isCanonicalizedStructurallyValidLanguageTag(canonical),
|
||||
"\"" + canonical + "\" is a canonical and structurally valid language tag.");
|
||||
|
||||
let result = Intl.getCanonicalLocales(tag);
|
||||
assert.sameValue(result.length, 1);
|
||||
assert.sameValue(result[0], canonical);
|
||||
}
|
||||
|
||||
// Test some other Unicode extension keys which don't contain an alias entry to
|
||||
// canonicalise "yes" to "true".
|
||||
const otherUnicodeKeys = [
|
||||
"ka", "kf", "kr", "ks", "kv",
|
||||
];
|
||||
|
||||
for (let key of otherUnicodeKeys) {
|
||||
let tag = `und-u-${key}-yes`;
|
||||
|
||||
// Make sure the test data is correct.
|
||||
assert(isCanonicalizedStructurallyValidLanguageTag(tag),
|
||||
"\"" + tag + "\" is a canonical and structurally valid language tag.");
|
||||
|
||||
let result = Intl.getCanonicalLocales(tag);
|
||||
assert.sameValue(result.length, 1);
|
||||
assert.sameValue(result[0], tag);
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
// Copyright (C) 2020 André Bargull. All rights reserved.
|
||||
// This code is governed by the BSD license found in the LICENSE file.
|
||||
|
||||
/*---
|
||||
esid: sec-intl.getcanonicallocales
|
||||
description: >
|
||||
Test Unicode extension subtags where the ukey subtag contains a digit.
|
||||
info: |
|
||||
8.2.1 Intl.getCanonicalLocales (locales)
|
||||
1. Let ll be ? CanonicalizeLocaleList(locales).
|
||||
2. Return CreateArrayFromList(ll).
|
||||
|
||||
9.2.1 CanonicalizeLocaleList (locales)
|
||||
...
|
||||
7. Repeat, while k < len
|
||||
...
|
||||
c. If kPresent is true, then
|
||||
...
|
||||
v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
|
||||
vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
|
||||
...
|
||||
|
||||
includes: [testIntl.js]
|
||||
---*/
|
||||
|
||||
// Unicode locale extension sequences don't allow keys with a digit as their
|
||||
// second character.
|
||||
const invalidCases = [
|
||||
"en-u-c0",
|
||||
"en-u-00",
|
||||
];
|
||||
|
||||
// The first character is allowed to be a digit.
|
||||
const validCases = [
|
||||
"en-u-0c",
|
||||
];
|
||||
|
||||
for (let invalid of invalidCases) {
|
||||
// Make sure the test data is correct.
|
||||
assert.sameValue(isCanonicalizedStructurallyValidLanguageTag(invalid), false,
|
||||
"\"" + invalid + "\" isn't a structurally valid language tag.");
|
||||
|
||||
assert.throws(RangeError, () => Intl.getCanonicalLocales(invalid));
|
||||
}
|
||||
|
||||
for (let valid of validCases) {
|
||||
// Make sure the test data is correct.
|
||||
assert(isCanonicalizedStructurallyValidLanguageTag(valid),
|
||||
"\"" + valid + "\" is a canonical and structurally valid language tag.");
|
||||
|
||||
let result = Intl.getCanonicalLocales(valid);
|
||||
assert.sameValue(result.length, 1);
|
||||
assert.sameValue(result[0], valid);
|
||||
}
|
|
@ -0,0 +1,26 @@
|
|||
// Copyright 2020 André Bargull. All rights reserved.
|
||||
// This code is governed by the BSD license found in the LICENSE file.
|
||||
|
||||
/*---
|
||||
esid: sec-apply-options-to-tag
|
||||
description: >
|
||||
ApplyOptionsToTag canonicalises the language tag two times.
|
||||
info: |
|
||||
10.1.1 ApplyOptionsToTag( tag, options )
|
||||
|
||||
...
|
||||
9. Set tag to CanonicalizeUnicodeLocaleId(tag).
|
||||
10. If language is not undefined,
|
||||
...
|
||||
b. Set tag to tag with the substring corresponding to the unicode_language_subtag
|
||||
production of the unicode_language_id replaced by the string language.
|
||||
...
|
||||
13. Return CanonicalizeUnicodeLocaleId(tag).
|
||||
features: [Intl.Locale]
|
||||
---*/
|
||||
|
||||
// ApplyOptionsToTag canonicalises the locale identifier before applying the
|
||||
// options. That means "und-Armn-SU" is first canonicalised to "und-Armn-AM",
|
||||
// then the language is changed to "ru". If "ru" were applied first, the result
|
||||
// would be "ru-Armn-RU" instead.
|
||||
assert.sameValue(new Intl.Locale("und-Armn-SU", {language: "ru"}).toString(), "ru-Armn-AM");
|
|
@ -37,6 +37,10 @@ const testDataMaximal = {
|
|||
"und-419": "es-Latn-419",
|
||||
"und-150": "ru-Cyrl-RU",
|
||||
"und-AT": "de-Latn-AT",
|
||||
"und-Cyrl-RO": "bg-Cyrl-RO",
|
||||
|
||||
// Undefined primary language not required to change in all cases.
|
||||
"und-AQ": "und-Latn-AQ",
|
||||
};
|
||||
|
||||
const testDataMinimal = {
|
||||
|
@ -62,6 +66,8 @@ const testDataMinimal = {
|
|||
"es-Latn-419": "es-419",
|
||||
"ru-Cyrl-RU": "ru",
|
||||
"de-Latn-AT": "de-AT",
|
||||
"bg-Cyrl-RO": "bg-RO",
|
||||
"und-Latn-AQ": "und-AQ",
|
||||
};
|
||||
|
||||
// Add variants, extensions, and privateuse subtags and ensure they don't
|
||||
|
|
49
test/intl402/Locale/prototype/minimize/removing-likely-subtags-first-adds-likely-subtags.js
vendored
Normal file
49
test/intl402/Locale/prototype/minimize/removing-likely-subtags-first-adds-likely-subtags.js
vendored
Normal file
|
@ -0,0 +1,49 @@
|
|||
// Copyright 2020 André Bargull. All rights reserved.
|
||||
// This code is governed by the BSD license found in the LICENSE file.
|
||||
|
||||
/*---
|
||||
esid: sec-Intl.Locale.prototype.minimize
|
||||
description: >
|
||||
The "Remove Likely Subtags" algorithm adds likely subtags before processing the locale.
|
||||
info: |
|
||||
Intl.Locale.prototype.minimize ()
|
||||
3. Let minimal be the result of the Remove Likely Subtags algorithm applied to loc.[[Locale]].
|
||||
If an error is signaled, set minimal to loc.[[Locale]].
|
||||
|
||||
UTS 35, §4.3 Likely Subtags
|
||||
Remove Likely Subtags
|
||||
|
||||
1. First get max = AddLikelySubtags(inputLocale). If an error is signaled, return it.
|
||||
2. ...
|
||||
features: [Intl.Locale]
|
||||
---*/
|
||||
|
||||
var testDataMinimal = {
|
||||
// Undefined primary language.
|
||||
"und": "en",
|
||||
"und-Thai": "th",
|
||||
"und-419": "es-419",
|
||||
"und-150": "ru",
|
||||
"und-AT": "de-AT",
|
||||
|
||||
// https://unicode-org.atlassian.net/browse/ICU-13786
|
||||
"aae-Latn-IT": "aae-Latn-IT",
|
||||
"aae-Thai-CO": "aae-Thai-CO",
|
||||
|
||||
// https://unicode-org.atlassian.net/browse/ICU-10220
|
||||
// https://unicode-org.atlassian.net/browse/ICU-12345
|
||||
"und-CW": "pap-CW",
|
||||
"und-US": "en",
|
||||
"zh-Hant": "zh-TW",
|
||||
"zh-Hani": "zh-Hani",
|
||||
};
|
||||
|
||||
for (const [tag, minimal] of Object.entries(testDataMinimal)) {
|
||||
// Assert the |minimal| tag is indeed minimal.
|
||||
assert.sameValue(new Intl.Locale(minimal).minimize().toString(), minimal,
|
||||
`"${minimal}" should be minimal`);
|
||||
|
||||
// Assert RemoveLikelySubtags(tag) returns |minimal|.
|
||||
assert.sameValue(new Intl.Locale(tag).minimize().toString(), minimal,
|
||||
`"${tag}".minimize() should be "${minimal}"`);
|
||||
}
|
Loading…
Reference in New Issue