#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2020 Mozilla Corporation. All rights reserved.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Original file:
# https://hg.mozilla.org/mozilla-central/file/tip/js/src/builtin/intl/make_intl_data.py
""" Usage:
    make_intl_data.py langtags [cldr_core.zip]
    Target "langtags":
    This script extracts information about 1) mappings between deprecated and
    current Unicode BCP 47 locale identifiers, and 2) deprecated and current
    BCP 47 Unicode extension value from CLDR.
"""
from __future__ import print_function
import os
import re
import io
import sys
from contextlib import closing
from functools import partial
from operator import itemgetter
from zipfile import ZipFile
if sys.version_info.major == 2:
    from urllib2 import urlopen
else:
    from urllib.request import urlopen
def read_supplemental_data(core_file):
    """ Reads CLDR Supplemental Data and extracts information for Intl.js.
        Information extracted:
        - grandfatheredMappings: mappings from grandfathered tags to preferred
          complete language tags
        - languageMappings: mappings from language subtags to preferred subtags
        - complexLanguageMappings: mappings from language subtags with complex rules
        - regionMappings: mappings from region subtags to preferred subtags
        - complexRegionMappings: mappings from region subtags with complex rules
        - variantMappings: mappings from variant subtags to preferred subtags
        Returns these mappings as dictionaries.
    """
    import xml.etree.ElementTree as ET
    # From Unicode BCP 47 locale identifier .
    re_unicode_language_id = re.compile(
        r"""
        ^
        # unicode_language_id = unicode_language_subtag
        #     unicode_language_subtag = alpha{2,3} | alpha{5,8}
        (?P[a-z]{2,3}|[a-z]{5,8})
        # (sep unicode_script_subtag)?
        #     unicode_script_subtag = alpha{4}
        (?:-(?P