diff --git a/harness/testIntl.js b/harness/testIntl.js index 08008419be..a4d8b09e40 100644 --- a/harness/testIntl.js +++ b/harness/testIntl.js @@ -301,408 +301,713 @@ function isCanonicalizedStructurallyValidLanguageTag(locale) { /** * Mappings from complete tags to preferred values. * - * Spec: IANA Language Subtag Registry. + * Spec: http://unicode.org/reports/tr35/#Identifiers + * Version: CLDR, version 35 */ var __tagMappings = { // property names must be in lower case; values in canonical form - // grandfathered tags from IANA language subtag registry, file date 2018-04-23 "art-lojban": "jbo", - "cel-gaulish": "cel-gaulish", - "en-gb-oed": "en-GB-oxendict", - "i-ami": "ami", - "i-bnn": "bnn", - "i-default": "i-default", - "i-enochian": "i-enochian", - "i-hak": "hak", - "i-klingon": "tlh", - "i-lux": "lb", - "i-mingo": "i-mingo", - "i-navajo": "nv", - "i-pwn": "pwn", - "i-tao": "tao", - "i-tay": "tay", - "i-tsu": "tsu", - "no-bok": "nb", - "no-nyn": "nn", - "sgn-be-fr": "sfb", - "sgn-be-nl": "vgt", - "sgn-ch-de": "sgg", - "zh-guoyu": "cmn", + "cel-gaulish": "xtg-x-cel-gaulish", + "zh-guoyu": "zh", "zh-hakka": "hak", - "zh-min": "zh-min", - "zh-min-nan": "nan", "zh-xiang": "hsn", - // deprecated redundant tags from IANA language subtag registry, file date 2018-04-23 - "sgn-br": "bzs", - "sgn-co": "csn", - "sgn-de": "gsg", - "sgn-dk": "dsl", - "sgn-es": "ssp", - "sgn-fr": "fsl", - "sgn-gb": "bfi", - "sgn-gr": "gss", - "sgn-ie": "isg", - "sgn-it": "ise", - "sgn-jp": "jsl", - "sgn-mx": "mfs", - "sgn-ni": "ncs", - "sgn-nl": "dse", - "sgn-no": "nsl", - "sgn-pt": "psr", - "sgn-se": "swl", - "sgn-us": "ase", - "sgn-za": "sfs", - "zh-cmn": "cmn", - "zh-cmn-hans": "cmn-Hans", - "zh-cmn-hant": "cmn-Hant", - "zh-gan": "gan", - "zh-wuu": "wuu", - "zh-yue": "yue", - // deprecated variant with prefix from IANA language subtag registry, file date 2018-04-23 - "ja-latn-hepburn-heploc": "ja-Latn-alalc97" }; /** - * Mappings from non-extlang subtags to preferred values. + * Mappings from language subtags to preferred values. * - * Spec: IANA Language Subtag Registry. + * Spec: http://unicode.org/reports/tr35/#Identifiers + * Version: CLDR, version 35 */ - var __subtagMappings = { + var __languageMappings = { // property names and values must be in canonical case - // language subtags with Preferred-Value mappings from IANA language subtag registry, file date 2018-04-23 - "in": "id", - "iw": "he", - "ji": "yi", - "jw": "jv", - "mo": "ro", + "aam": "aas", + "aar": "aa", + "abk": "ab", "adp": "dz", + "afr": "af", + "aju": "jrb", + "aka": "ak", + "alb": "sq", + "als": "sq", + "amh": "am", + "ara": "ar", + "arb": "ar", + "arg": "an", + "arm": "hy", + "asm": "as", "aue": "ktz", + "ava": "av", + "ave": "ae", + "aym": "ay", + "ayr": "ay", "ayx": "nun", + "aze": "az", + "azj": "az", + "bak": "ba", + "bam": "bm", + "baq": "eu", + "bcc": "bal", + "bcl": "bik", + "bel": "be", + "ben": "bn", "bgm": "bcg", + "bh": "bho", + "bih": "bho", + "bis": "bi", "bjd": "drl", + "bod": "bo", + "bos": "bs", + "bre": "br", + "bul": "bg", + "bur": "my", + "bxk": "luy", + "bxr": "bua", + "cat": "ca", "ccq": "rki", + "ces": "cs", + "cha": "ch", + "che": "ce", + "chi": "zh", + "chu": "cu", + "chv": "cv", "cjr": "mom", "cka": "cmr", + "cld": "syr", "cmk": "xch", + "cmn": "zh", + "cor": "kw", + "cos": "co", "coy": "pij", "cqu": "quh", - "drh": "khk", - "drw": "prs", + "cre": "cr", + "cwd": "cr", + "cym": "cy", + "cze": "cs", + "dan": "da", + "deu": "de", + "dgo": "doi", + "dhd": "mwr", + "dik": "din", + "diq": "zza", + "div": "dv", + "drh": "mn", + "dut": "nl", + "dzo": "dz", + "ekk": "et", + "ell": "el", + "emk": "man", + "eng": "en", + "epo": "eo", + "esk": "ik", + "est": "et", + "eus": "eu", + "ewe": "ee", + "fao": "fo", + "fas": "fa", + "fat": "ak", + "fij": "fj", + "fin": "fi", + "fra": "fr", + "fre": "fr", + "fry": "fy", + "fuc": "ff", + "ful": "ff", "gav": "dev", + "gaz": "om", + "gbo": "grb", + "geo": "ka", + "ger": "de", "gfx": "vaj", "ggn": "gvr", + "gla": "gd", + "gle": "ga", + "glg": "gl", + "glv": "gv", + "gno": "gon", + "gre": "el", + "grn": "gn", "gti": "nyc", + "gug": "gn", + "guj": "gu", "guv": "duz", + "gya": "gba", + "hat": "ht", + "hau": "ha", + "hdn": "hai", + "hea": "hmn", + "heb": "he", + "her": "hz", + "him": "srx", + "hin": "hi", + "hmo": "ho", "hrr": "jal", + "hrv": "hr", + "hun": "hu", + "hye": "hy", "ibi": "opa", + "ibo": "ig", + "ice": "is", + "ido": "io", + "iii": "ii", + "ike": "iu", + "iku": "iu", + "ile": "ie", "ilw": "gal", + "in": "id", + "ina": "ia", + "ind": "id", + "ipk": "ik", + "isl": "is", + "ita": "it", + "iw": "he", + "jav": "jv", "jeg": "oyb", + "ji": "yi", + "jpn": "ja", + "jw": "jv", + "kal": "kl", + "kan": "kn", + "kas": "ks", + "kat": "ka", + "kau": "kr", + "kaz": "kk", "kgc": "tdf", "kgh": "kml", + "khk": "mn", + "khm": "km", + "kik": "ki", + "kin": "rw", + "kir": "ky", + "kmr": "ku", + "knc": "kr", + "kng": "kg", + "knn": "kok", "koj": "kwv", + "kom": "kv", + "kon": "kg", + "kor": "ko", + "kpv": "kv", "krm": "bmf", "ktr": "dtp", + "kua": "kj", + "kur": "ku", "kvs": "gdj", "kwq": "yam", "kxe": "tvd", "kzj": "dtp", "kzt": "dtp", + "lao": "lo", + "lat": "la", + "lav": "lv", + "lbk": "bnc", "lii": "raq", + "lim": "li", + "lin": "ln", + "lit": "lt", "lmm": "rmx", + "ltz": "lb", + "lub": "lu", + "lug": "lg", + "lvs": "lv", + "mac": "mk", + "mah": "mh", + "mal": "ml", + "mao": "mi", + "mar": "mr", + "may": "ms", "meg": "cir", + "mhr": "chm", + "mkd": "mk", + "mlg": "mg", + "mlt": "mt", + "mnk": "man", + "mo": "ro", + "mol": "ro", + "mon": "mn", + "mri": "mi", + "msa": "ms", "mst": "mry", + "mup": "raj", "mwj": "vaj", + "mya": "my", "myt": "mry", "nad": "xny", + "nau": "na", + "nav": "nv", + "nbl": "nr", "ncp": "kdz", + "nde": "nd", + "ndo": "ng", + "nep": "ne", + "nld": "nl", + "nno": "nn", "nnx": "ngv", + "no": "nb", + "nob": "nb", + "nor": "nb", + "npi": "ne", "nts": "pij", + "nya": "ny", + "oci": "oc", + "ojg": "oj", + "oji": "oj", + "ori": "or", + "orm": "om", + "ory": "or", + "oss": "os", "oun": "vaj", + "pan": "pa", + "pbu": "ps", "pcr": "adx", + "per": "fa", + "pes": "fa", + "pli": "pi", + "plt": "mg", "pmc": "huw", "pmu": "phr", + "pnb": "lah", + "pol": "pl", + "por": "pt", "ppa": "bfy", "ppr": "lcq", "pry": "prt", + "pus": "ps", "puz": "pub", + "que": "qu", + "quz": "qu", + "rmy": "rom", + "roh": "rm", + "ron": "ro", + "rum": "ro", + "run": "rn", + "rus": "ru", + "sag": "sg", + "san": "sa", "sca": "hle", + "scc": "sr", + "scr": "hr", + "sin": "si", "skk": "oyb", + "slk": "sk", + "slo": "sk", + "slv": "sl", + "sme": "se", + "smo": "sm", + "sna": "sn", + "snd": "sd", + "som": "so", + "sot": "st", + "spa": "es", + "spy": "kln", + "sqi": "sq", + "src": "sc", + "srd": "sc", + "srp": "sr", + "ssw": "ss", + "sun": "su", + "swa": "sw", + "swe": "sv", + "swh": "sw", + "tah": "ty", + "tam": "ta", + "tat": "tt", "tdu": "dtp", + "tel": "te", + "tgk": "tg", + "tgl": "fil", + "tha": "th", "thc": "tpo", "thx": "oyb", + "tib": "bo", "tie": "ras", + "tir": "ti", "tkk": "twm", + "tl": "fil", "tlw": "weo", "tmp": "tyj", "tne": "kak", - "tnf": "prs", + "ton": "to", "tsf": "taj", + "tsn": "tn", + "tso": "ts", + "ttq": "tmh", + "tuk": "tk", + "tur": "tr", + "tw": "ak", + "twi": "ak", + "uig": "ug", + "ukr": "uk", + "umu": "del", "uok": "ema", + "urd": "ur", + "uzb": "uz", + "uzn": "uz", + "ven": "ve", + "vie": "vi", + "vol": "vo", + "wel": "cy", + "wln": "wa", + "wol": "wo", "xba": "cax", + "xho": "xh", "xia": "acn", "xkh": "waw", + "xpe": "kpe", "xsj": "suj", + "xsl": "den", "ybd": "rki", + "ydd": "yi", + "yid": "yi", "yma": "lrr", "ymt": "mtm", + "yor": "yo", "yos": "zom", "yuu": "yug", - // region subtags with Preferred-Value mappings from IANA language subtag registry, file date 2018-04-23 - "BU": "MM", - "DD": "DE", - "FX": "FR", - "TP": "TL", - "YD": "YE", - "ZR": "CD" - }; + "zai": "zap", + "zha": "za", + "zho": "zh", + "zsm": "ms", + "zul": "zu", + "zyb": "za", + } /** - * Mappings from extlang subtags to preferred values. + * Mappings from region subtags to preferred values. * - * Spec: IANA Language Subtag Registry. + * Spec: http://unicode.org/reports/tr35/#Identifiers + * Version: CLDR, version 35 */ - var __extlangMappings = { - // extlang subtags with Preferred-Value mappings from IANA language subtag registry, file date 2018-04-23 - // values are arrays with [0] the replacement value, [1] (if present) the prefix to be removed - "aao": ["aao", "ar"], - "abh": ["abh", "ar"], - "abv": ["abv", "ar"], - "acm": ["acm", "ar"], - "acq": ["acq", "ar"], - "acw": ["acw", "ar"], - "acx": ["acx", "ar"], - "acy": ["acy", "ar"], - "adf": ["adf", "ar"], - "ads": ["ads", "sgn"], - "aeb": ["aeb", "ar"], - "aec": ["aec", "ar"], - "aed": ["aed", "sgn"], - "aen": ["aen", "sgn"], - "afb": ["afb", "ar"], - "afg": ["afg", "sgn"], - "ajp": ["ajp", "ar"], - "apc": ["apc", "ar"], - "apd": ["apd", "ar"], - "arb": ["arb", "ar"], - "arq": ["arq", "ar"], - "ars": ["ars", "ar"], - "ary": ["ary", "ar"], - "arz": ["arz", "ar"], - "ase": ["ase", "sgn"], - "asf": ["asf", "sgn"], - "asp": ["asp", "sgn"], - "asq": ["asq", "sgn"], - "asw": ["asw", "sgn"], - "auz": ["auz", "ar"], - "avl": ["avl", "ar"], - "ayh": ["ayh", "ar"], - "ayl": ["ayl", "ar"], - "ayn": ["ayn", "ar"], - "ayp": ["ayp", "ar"], - "bbz": ["bbz", "ar"], - "bfi": ["bfi", "sgn"], - "bfk": ["bfk", "sgn"], - "bjn": ["bjn", "ms"], - "bog": ["bog", "sgn"], - "bqn": ["bqn", "sgn"], - "bqy": ["bqy", "sgn"], - "btj": ["btj", "ms"], - "bve": ["bve", "ms"], - "bvl": ["bvl", "sgn"], - "bvu": ["bvu", "ms"], - "bzs": ["bzs", "sgn"], - "cdo": ["cdo", "zh"], - "cds": ["cds", "sgn"], - "cjy": ["cjy", "zh"], - "cmn": ["cmn", "zh"], - "coa": ["coa", "ms"], - "cpx": ["cpx", "zh"], - "csc": ["csc", "sgn"], - "csd": ["csd", "sgn"], - "cse": ["cse", "sgn"], - "csf": ["csf", "sgn"], - "csg": ["csg", "sgn"], - "csl": ["csl", "sgn"], - "csn": ["csn", "sgn"], - "csq": ["csq", "sgn"], - "csr": ["csr", "sgn"], - "czh": ["czh", "zh"], - "czo": ["czo", "zh"], - "doq": ["doq", "sgn"], - "dse": ["dse", "sgn"], - "dsl": ["dsl", "sgn"], - "dup": ["dup", "ms"], - "ecs": ["ecs", "sgn"], - "esl": ["esl", "sgn"], - "esn": ["esn", "sgn"], - "eso": ["eso", "sgn"], - "eth": ["eth", "sgn"], - "fcs": ["fcs", "sgn"], - "fse": ["fse", "sgn"], - "fsl": ["fsl", "sgn"], - "fss": ["fss", "sgn"], - "gan": ["gan", "zh"], - "gds": ["gds", "sgn"], - "gom": ["gom", "kok"], - "gse": ["gse", "sgn"], - "gsg": ["gsg", "sgn"], - "gsm": ["gsm", "sgn"], - "gss": ["gss", "sgn"], - "gus": ["gus", "sgn"], - "hab": ["hab", "sgn"], - "haf": ["haf", "sgn"], - "hak": ["hak", "zh"], - "hds": ["hds", "sgn"], - "hji": ["hji", "ms"], - "hks": ["hks", "sgn"], - "hos": ["hos", "sgn"], - "hps": ["hps", "sgn"], - "hsh": ["hsh", "sgn"], - "hsl": ["hsl", "sgn"], - "hsn": ["hsn", "zh"], - "icl": ["icl", "sgn"], - "iks": ["iks", "sgn"], - "ils": ["ils", "sgn"], - "inl": ["inl", "sgn"], - "ins": ["ins", "sgn"], - "ise": ["ise", "sgn"], - "isg": ["isg", "sgn"], - "isr": ["isr", "sgn"], - "jak": ["jak", "ms"], - "jax": ["jax", "ms"], - "jcs": ["jcs", "sgn"], - "jhs": ["jhs", "sgn"], - "jls": ["jls", "sgn"], - "jos": ["jos", "sgn"], - "jsl": ["jsl", "sgn"], - "jus": ["jus", "sgn"], - "kgi": ["kgi", "sgn"], - "knn": ["knn", "kok"], - "kvb": ["kvb", "ms"], - "kvk": ["kvk", "sgn"], - "kvr": ["kvr", "ms"], - "kxd": ["kxd", "ms"], - "lbs": ["lbs", "sgn"], - "lce": ["lce", "ms"], - "lcf": ["lcf", "ms"], - "liw": ["liw", "ms"], - "lls": ["lls", "sgn"], - "lsg": ["lsg", "sgn"], - "lsl": ["lsl", "sgn"], - "lso": ["lso", "sgn"], - "lsp": ["lsp", "sgn"], - "lst": ["lst", "sgn"], - "lsy": ["lsy", "sgn"], - "ltg": ["ltg", "lv"], - "lvs": ["lvs", "lv"], - "lws": ["lws", "sgn"], - "lzh": ["lzh", "zh"], - "max": ["max", "ms"], - "mdl": ["mdl", "sgn"], - "meo": ["meo", "ms"], - "mfa": ["mfa", "ms"], - "mfb": ["mfb", "ms"], - "mfs": ["mfs", "sgn"], - "min": ["min", "ms"], - "mnp": ["mnp", "zh"], - "mqg": ["mqg", "ms"], - "mre": ["mre", "sgn"], - "msd": ["msd", "sgn"], - "msi": ["msi", "ms"], - "msr": ["msr", "sgn"], - "mui": ["mui", "ms"], - "mzc": ["mzc", "sgn"], - "mzg": ["mzg", "sgn"], - "mzy": ["mzy", "sgn"], - "nan": ["nan", "zh"], - "nbs": ["nbs", "sgn"], - "ncs": ["ncs", "sgn"], - "nsi": ["nsi", "sgn"], - "nsl": ["nsl", "sgn"], - "nsp": ["nsp", "sgn"], - "nsr": ["nsr", "sgn"], - "nzs": ["nzs", "sgn"], - "okl": ["okl", "sgn"], - "orn": ["orn", "ms"], - "ors": ["ors", "ms"], - "pel": ["pel", "ms"], - "pga": ["pga", "ar"], - "pgz": ["pgz", "sgn"], - "pks": ["pks", "sgn"], - "prl": ["prl", "sgn"], - "prz": ["prz", "sgn"], - "psc": ["psc", "sgn"], - "psd": ["psd", "sgn"], - "pse": ["pse", "ms"], - "psg": ["psg", "sgn"], - "psl": ["psl", "sgn"], - "pso": ["pso", "sgn"], - "psp": ["psp", "sgn"], - "psr": ["psr", "sgn"], - "pys": ["pys", "sgn"], - "rms": ["rms", "sgn"], - "rsi": ["rsi", "sgn"], - "rsl": ["rsl", "sgn"], - "rsm": ["rsm", "sgn"], - "sdl": ["sdl", "sgn"], - "sfb": ["sfb", "sgn"], - "sfs": ["sfs", "sgn"], - "sgg": ["sgg", "sgn"], - "sgx": ["sgx", "sgn"], - "shu": ["shu", "ar"], - "slf": ["slf", "sgn"], - "sls": ["sls", "sgn"], - "sqk": ["sqk", "sgn"], - "sqs": ["sqs", "sgn"], - "ssh": ["ssh", "ar"], - "ssp": ["ssp", "sgn"], - "ssr": ["ssr", "sgn"], - "svk": ["svk", "sgn"], - "swc": ["swc", "sw"], - "swh": ["swh", "sw"], - "swl": ["swl", "sgn"], - "syy": ["syy", "sgn"], - "szs": ["szs", "sgn"], - "tmw": ["tmw", "ms"], - "tse": ["tse", "sgn"], - "tsm": ["tsm", "sgn"], - "tsq": ["tsq", "sgn"], - "tss": ["tss", "sgn"], - "tsy": ["tsy", "sgn"], - "tza": ["tza", "sgn"], - "ugn": ["ugn", "sgn"], - "ugy": ["ugy", "sgn"], - "ukl": ["ukl", "sgn"], - "uks": ["uks", "sgn"], - "urk": ["urk", "ms"], - "uzn": ["uzn", "uz"], - "uzs": ["uzs", "uz"], - "vgt": ["vgt", "sgn"], - "vkk": ["vkk", "ms"], - "vkt": ["vkt", "ms"], - "vsi": ["vsi", "sgn"], - "vsl": ["vsl", "sgn"], - "vsv": ["vsv", "sgn"], - "wbs": ["wbs", "sgn"], - "wuu": ["wuu", "zh"], - "xki": ["xki", "sgn"], - "xml": ["xml", "sgn"], - "xmm": ["xmm", "ms"], - "xms": ["xms", "sgn"], - "yds": ["yds", "sgn"], - "ygs": ["ygs", "sgn"], - "yhs": ["yhs", "sgn"], - "ysl": ["ysl", "sgn"], - "yue": ["yue", "zh"], - "zib": ["zib", "sgn"], - "zlm": ["zlm", "ms"], - "zmi": ["zmi", "ms"], - "zsl": ["zsl", "sgn"], - "zsm": ["zsm", "ms"], + var __regionMappings = { + // property names and values must be in canonical case + + "004": "AF", + "008": "AL", + "010": "AQ", + "012": "DZ", + "016": "AS", + "020": "AD", + "024": "AO", + "028": "AG", + "031": "AZ", + "032": "AR", + "036": "AU", + "040": "AT", + "044": "BS", + "048": "BH", + "050": "BD", + "051": "AM", + "052": "BB", + "056": "BE", + "060": "BM", + "062": "034", + "064": "BT", + "068": "BO", + "070": "BA", + "072": "BW", + "074": "BV", + "076": "BR", + "084": "BZ", + "086": "IO", + "090": "SB", + "092": "VG", + "096": "BN", + "100": "BG", + "104": "MM", + "108": "BI", + "112": "BY", + "116": "KH", + "120": "CM", + "124": "CA", + "132": "CV", + "136": "KY", + "140": "CF", + "144": "LK", + "148": "TD", + "152": "CL", + "156": "CN", + "158": "TW", + "162": "CX", + "166": "CC", + "170": "CO", + "174": "KM", + "175": "YT", + "178": "CG", + "180": "CD", + "184": "CK", + "188": "CR", + "191": "HR", + "192": "CU", + "196": "CY", + "203": "CZ", + "204": "BJ", + "208": "DK", + "212": "DM", + "214": "DO", + "218": "EC", + "222": "SV", + "226": "GQ", + "230": "ET", + "231": "ET", + "232": "ER", + "233": "EE", + "234": "FO", + "238": "FK", + "239": "GS", + "242": "FJ", + "246": "FI", + "248": "AX", + "249": "FR", + "250": "FR", + "254": "GF", + "258": "PF", + "260": "TF", + "262": "DJ", + "266": "GA", + "268": "GE", + "270": "GM", + "275": "PS", + "276": "DE", + "278": "DE", + "280": "DE", + "288": "GH", + "292": "GI", + "296": "KI", + "300": "GR", + "304": "GL", + "308": "GD", + "312": "GP", + "316": "GU", + "320": "GT", + "324": "GN", + "328": "GY", + "332": "HT", + "334": "HM", + "336": "VA", + "340": "HN", + "344": "HK", + "348": "HU", + "352": "IS", + "356": "IN", + "360": "ID", + "364": "IR", + "368": "IQ", + "372": "IE", + "376": "IL", + "380": "IT", + "384": "CI", + "388": "JM", + "392": "JP", + "398": "KZ", + "400": "JO", + "404": "KE", + "408": "KP", + "410": "KR", + "414": "KW", + "417": "KG", + "418": "LA", + "422": "LB", + "426": "LS", + "428": "LV", + "430": "LR", + "434": "LY", + "438": "LI", + "440": "LT", + "442": "LU", + "446": "MO", + "450": "MG", + "454": "MW", + "458": "MY", + "462": "MV", + "466": "ML", + "470": "MT", + "474": "MQ", + "478": "MR", + "480": "MU", + "484": "MX", + "492": "MC", + "496": "MN", + "498": "MD", + "499": "ME", + "500": "MS", + "504": "MA", + "508": "MZ", + "512": "OM", + "516": "NA", + "520": "NR", + "524": "NP", + "528": "NL", + "531": "CW", + "533": "AW", + "534": "SX", + "535": "BQ", + "540": "NC", + "548": "VU", + "554": "NZ", + "558": "NI", + "562": "NE", + "566": "NG", + "570": "NU", + "574": "NF", + "578": "NO", + "580": "MP", + "581": "UM", + "583": "FM", + "584": "MH", + "585": "PW", + "586": "PK", + "591": "PA", + "598": "PG", + "600": "PY", + "604": "PE", + "608": "PH", + "612": "PN", + "616": "PL", + "620": "PT", + "624": "GW", + "626": "TL", + "630": "PR", + "634": "QA", + "638": "RE", + "642": "RO", + "643": "RU", + "646": "RW", + "652": "BL", + "654": "SH", + "659": "KN", + "660": "AI", + "662": "LC", + "663": "MF", + "666": "PM", + "670": "VC", + "674": "SM", + "678": "ST", + "682": "SA", + "686": "SN", + "688": "RS", + "690": "SC", + "694": "SL", + "702": "SG", + "703": "SK", + "704": "VN", + "705": "SI", + "706": "SO", + "710": "ZA", + "716": "ZW", + "720": "YE", + "724": "ES", + "728": "SS", + "729": "SD", + "732": "EH", + "736": "SD", + "740": "SR", + "744": "SJ", + "748": "SZ", + "752": "SE", + "756": "CH", + "760": "SY", + "762": "TJ", + "764": "TH", + "768": "TG", + "772": "TK", + "776": "TO", + "780": "TT", + "784": "AE", + "788": "TN", + "792": "TR", + "795": "TM", + "796": "TC", + "798": "TV", + "800": "UG", + "804": "UA", + "807": "MK", + "818": "EG", + "826": "GB", + "830": "JE", + "831": "GG", + "832": "JE", + "833": "IM", + "834": "TZ", + "840": "US", + "850": "VI", + "854": "BF", + "858": "UY", + "860": "UZ", + "862": "VE", + "876": "WF", + "882": "WS", + "886": "YE", + "887": "YE", + "891": "RS", + "894": "ZM", + "958": "AA", + "959": "QM", + "960": "QN", + "962": "QP", + "963": "QQ", + "964": "QR", + "965": "QS", + "966": "QT", + "967": "EU", + "968": "QV", + "969": "QW", + "970": "QX", + "971": "QY", + "972": "QZ", + "973": "XA", + "974": "XB", + "975": "XC", + "976": "XD", + "977": "XE", + "978": "XF", + "979": "XG", + "980": "XH", + "981": "XI", + "982": "XJ", + "983": "XK", + "984": "XL", + "985": "XM", + "986": "XN", + "987": "XO", + "988": "XP", + "989": "XQ", + "990": "XR", + "991": "XS", + "992": "XT", + "993": "XU", + "994": "XV", + "995": "XW", + "996": "XX", + "997": "XY", + "998": "XZ", + "999": "ZZ", + "BU": "MM", + "CS": "RS", + "CT": "KI", + "DD": "DE", + "DY": "BJ", + "FQ": "AQ", + "FX": "FR", + "HV": "BF", + "JT": "UM", + "MI": "UM", + "NH": "VU", + "NQ": "AQ", + "PU": "UM", + "PZ": "PA", + "QU": "EU", + "RH": "ZW", + "TP": "TL", + "UK": "GB", + "VD": "VN", + "WK": "UM", + "YD": "YE", + "YU": "RS", + "ZR": "CD", }; @@ -725,29 +1030,226 @@ function isCanonicalizedStructurallyValidLanguageTag(locale) { var subtags = locale.split("-"); var i = 0; - // handle standard part: all subtags before first singleton or "x" + // handle standard part: all subtags before first variant or singleton subtag + var language; + var script; + var region; while (i < subtags.length) { var subtag = subtags[i]; - if (subtag.length === 1 && (i > 0 || subtag === "x")) { + if (i === 0) { + language = subtag; + } else if (subtag.length === 2 || subtag.length === 3) { + region = subtag.toUpperCase(); + } else if (subtag.length === 4 && !("0" <= subtag[0] && subtag[0] <= "9")) { + script = subtag[0].toUpperCase() + subtag.substring(1).toLowerCase(); + } else { break; - } else if (i !== 0 && subtag.length === 2) { - subtag = subtag.toUpperCase(); - } else if (subtag.length === 4) { - subtag = subtag[0].toUpperCase() + subtag.substring(1).toLowerCase(); } - if (__subtagMappings.hasOwnProperty(subtag)) { - subtag = __subtagMappings[subtag]; - } else if (__extlangMappings.hasOwnProperty(subtag)) { - subtag = __extlangMappings[subtag][0]; - if (i === 1 && __extlangMappings[subtag][1] === subtags[0]) { - subtags.shift(); - i--; - } - } - subtags[i] = subtag; i++; } - var normal = subtags.slice(0, i).join("-"); + + if (__languageMappings.hasOwnProperty(language)) { + language = __languageMappings[language]; + } else { + // Language subtags with complex mappings, CLDR 35. + switch (language) { + case "cnr": + language = "sr"; + if (region === undefined) { + region = "ME"; + } + break; + case "drw": + case "prs": + case "tnf": + language = "fa"; + if (region === undefined) { + region = "AF"; + } + break; + case "hbs": + case "sh": + language = "sr"; + if (script === undefined) { + script = "Latn"; + } + break; + case "swc": + language = "sw"; + if (region === undefined) { + region = "CD"; + } + break; + } + } + + if (region !== undefined) { + if (__regionMappings.hasOwnProperty(region)) { + region = __regionMappings[region]; + } else { + // Region subtags with complex mappings, CLDR 35. + switch (region) { + case "172": + if (language === "ab" || language === "ka" || language === "os" || + (language === "und" && script === "Geor") || language === "xmf") { + region = "GE"; + } + else if (language === "az" || language === "tkr" || language === "tly" || language === "ttt") { + region = "AZ"; + } + else if (language === "be") { + region = "BY"; + } + else if (language === "crh" || language === "got" || language === "ji" || language === "rue" || + language === "uk" || (language === "und" && script === "Goth")) { + region = "UA"; + } + else if (language === "gag") { + region = "MD"; + } + else if (language === "hy" || (language === "und" && script === "Armn")) { + region = "AM"; + } + else if (language === "kaa" || language === "sog" || (language === "und" && script === "Sogd") || + (language === "und" && script === "Sogo") || language === "uz") { + region = "UZ"; + } + else if (language === "kk" || (language === "ug" && script === "Cyrl")) { + region = "KZ"; + } + else if (language === "ky") { + region = "KG"; + } + else if (language === "tg") { + region = "TJ"; + } + else if (language === "tk") { + region = "TM"; + } + else { + region = "RU"; + } + break; + case "200": + if (language === "sk") { + region = "SK"; + } + else { + region = "CZ"; + } + break; + case "530": + case "532": + case "AN": + if (language === "vic") { + region = "SX"; + } + else { + region = "CW"; + } + break; + case "536": + case "NT": + if (language === "akk" || language === "ckb" || (language === "ku" && script === "Arab") || + language === "mis" || language === "syr" || (language === "und" && script === "Xsux") || + (language === "und" && script === "Hatr") || (language === "und" && script === "Syrc")) { + region = "IQ"; + } + else { + region = "SA"; + } + break; + case "582": + case "PC": + if (language === "mh") { + region = "MH"; + } + else if (language === "pau") { + region = "PW"; + } + else { + region = "FM"; + } + break; + case "810": + case "SU": + if (language === "ab" || language === "ka" || language === "os" || language === "xmf" || + (language === "und" && script === "Geor")) { + region = "GE"; + } + else if (language === "az" || language === "tkr" || language === "tly" || language === "ttt") { + region = "AZ"; + } + else if (language === "be") { + region = "BY"; + } + else if (language === "crh" || language === "got" || language === "ji" || language === "rue" || + language === "uk" || (language === "und" && script === "Goth")) { + region = "UA"; + } + else if (language === "et" || language === "vro") { + region = "EE"; + } + else if (language === "gag") { + region = "MD"; + } + else if (language === "hy" || (language === "und" && script === "Armn")) { + region = "AM"; + } + else if (language === "kaa" || language === "sog" || (language === "und" && script === "Sogd") || + (language === "und" && script === "Sogo") || language === "uz") { + region = "UZ"; + } + else if (language === "kk" || (language === "ug" && script === "Cyrl")) { + region = "KZ"; + } + else if (language === "ky") { + region = "KG"; + } + else if (language === "lt" || language === "sgs") { + region = "LT"; + } + else if (language === "ltg" || language === "lv") { + region = "LV"; + } + else if (language === "tg") { + region = "TJ"; + } + else if (language === "tk") { + region = "TM"; + } + else { + region = "RU"; + } + break; + case "890": + if (language === "bs") { + region = "BA"; + } + else if (language === "hr") { + region = "HR"; + } + else if (language === "mk") { + region = "MK"; + } + else if (language === "sl") { + region = "SI"; + } + else { + region = "RS"; + } + break; + } + } + } + + // handle variants + var variants = []; + while (i < subtags.length && subtags[i].length > 1) { + variants.push(subtags[i]); + i += 1; + } + variants.sort(); // handle extensions var extensions = []; @@ -769,7 +1271,16 @@ function isCanonicalizedStructurallyValidLanguageTag(locale) { } // put everything back together - var canonical = normal; + var canonical = language; + if (script !== undefined) { + canonical += "-" + script; + } + if (region !== undefined) { + canonical += "-" + region; + } + if (variants.length > 0) { + canonical += "-" + variants.join("-"); + } if (extensions.length > 0) { canonical += "-" + extensions.join("-"); }