#!/usr/bin/env python # -*- coding: utf-8 -*- # # Copyright (C) 2020 Mozilla Corporation. All rights reserved. # # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. # # Original file: # https://hg.mozilla.org/mozilla-central/file/tip/js/src/builtin/intl/make_intl_data.py """ Usage: make_intl_data.py langtags [cldr_core.zip] Target "langtags": This script extracts information about 1) mappings between deprecated and current Unicode BCP 47 locale identifiers, and 2) deprecated and current BCP 47 Unicode extension value from CLDR. """ from __future__ import print_function import os import re import io import sys from contextlib import closing from functools import partial from operator import itemgetter from zipfile import ZipFile if sys.version_info.major == 2: from urllib2 import urlopen else: from urllib.request import urlopen def read_supplemental_data(core_file): """ Reads CLDR Supplemental Data and extracts information for Intl.js. Information extracted: - grandfatheredMappings: mappings from grandfathered tags to preferred complete language tags - languageMappings: mappings from language subtags to preferred subtags - complexLanguageMappings: mappings from language subtags with complex rules - regionMappings: mappings from region subtags to preferred subtags - complexRegionMappings: mappings from region subtags with complex rules - variantMappings: mappings from variant subtags to preferred subtags Returns these mappings as dictionaries. """ import xml.etree.ElementTree as ET # From Unicode BCP 47 locale identifier . re_unicode_language_id = re.compile( r""" ^ # unicode_language_id = unicode_language_subtag # unicode_language_subtag = alpha{2,3} | alpha{5,8} (?P[a-z]{2,3}|[a-z]{5,8}) # (sep unicode_script_subtag)? # unicode_script_subtag = alpha{4} (?:-(?P