#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2020 Mozilla Corporation. All rights reserved.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Original file:
# https://hg.mozilla.org/mozilla-central/file/tip/js/src/builtin/intl/make_intl_data.py
""" Usage:
make_intl_data.py langtags [cldr_core.zip]
Target "langtags":
This script extracts information about 1) mappings between deprecated and
current Unicode BCP 47 locale identifiers, and 2) deprecated and current
BCP 47 Unicode extension value from CLDR.
"""
from __future__ import print_function
import os
import re
import io
import sys
from contextlib import closing
from functools import partial
from operator import itemgetter
from zipfile import ZipFile
if sys.version_info.major == 2:
from urllib2 import urlopen
else:
from urllib.request import urlopen
def read_supplemental_data(core_file):
""" Reads CLDR Supplemental Data and extracts information for Intl.js.
Information extracted:
- grandfatheredMappings: mappings from grandfathered tags to preferred
complete language tags
- languageMappings: mappings from language subtags to preferred subtags
- complexLanguageMappings: mappings from language subtags with complex rules
- regionMappings: mappings from region subtags to preferred subtags
- complexRegionMappings: mappings from region subtags with complex rules
- variantMappings: mappings from variant subtags to preferred subtags
Returns these mappings as dictionaries.
"""
import xml.etree.ElementTree as ET
# From Unicode BCP 47 locale identifier .
re_unicode_language_id = re.compile(
r"""
^
# unicode_language_id = unicode_language_subtag
# unicode_language_subtag = alpha{2,3} | alpha{5,8}
(?P[a-z]{2,3}|[a-z]{5,8})
# (sep unicode_script_subtag)?
# unicode_script_subtag = alpha{4}
(?:-(?P