#!/usr/bin/python3

# Copyright © 2012, 2013, 2014 Jakub Wilk <jwilk@jwilk.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the “Software”), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import datetime
import functools
import os
import subprocess as ipc
import sys
import xml.etree.cElementTree as etree

class Panic(ValueError):
    pass

@functools.lru_cache()
def get_iso_codes_version():
    version = ipc.check_output(['pkg-config', 'iso-codes', '--modversion'])
    version = version.decode('ASCII').strip()
    return version

@functools.lru_cache()
def get_iso_codes_dir():
    prefix = ipc.check_output(['pkg-config', 'iso-codes', '--variable=prefix'])
    prefix = prefix.decode('ASCII').strip()
    return '{prefix}/share/xml/iso-codes/'.format(prefix=prefix)

def main():
    basedir = os.path.join(
        os.path.dirname(__file__),
        os.pardir,
    )
    path = os.path.join(basedir, 'data', 'iso-codes')
    sys.stdout = open(path + '.tmp', 'wt', encoding='UTF-8')
    print('''\
# This file has been generated automatically by private/update-iso-codes.
# Do not edit.
# iso-codes version: {version}
# Last update: {today}
'''.format(version=get_iso_codes_version(), today=datetime.date.today()))
    generate_iso_639()
    generate_iso_3166()
    sys.stdout.close()
    os.rename(path + '.tmp', path)

def generate_iso_639():
    # =======================
    # ISO 639: language codes
    # =======================
    l2t_to_2b = {}
    iso_639 = {}
    for event, element in etree.iterparse(os.path.join(get_iso_codes_dir(), 'iso_639.xml')):
        if element.tag != 'iso_639_entry':
            continue
        l2b = element.get('iso_639_2B_code')
        l2t = element.get('iso_639_2T_code')
        if l2b == l2t == 'qaa-qtz':
            continue
        for l in l2b, l2t:
            if len(l) != 3:
                raise Panic('len({!r}) != 3'.format(l))
        if l2b != l2t:
            l2t_to_2b[l2t] = l2b
    iso_639 = {}
    for event, element in etree.iterparse(os.path.join(get_iso_codes_dir(), 'iso_639_3.xml')):
        if element.tag != 'iso_639_3_entry':
            continue
        code = element.get('id')
        if len(code) != 3:
            raise Panic('len({!r}) != 3'.format(code))
        code1 = element.get('part1_code')
        code2 = element.get('part2_code')
        if code2 is None:
            # We're not interested in languages that are not in 639-2 (yet?).
            continue
        if code2 != code:
            raise Panic('{!r} != {!r}'.format(code, code2))
        scope = element.get('scope')
        if scope in {'S', 'L'}:
            # Not a real language, ignore.
            continue
        elif scope == 'C':
            # We're not interested in collective languages (yet?).
            if code1 is None:
                continue
        elif scope in {'M', 'I'}:
            pass
        else:
            raise Panic('unknown scope: {!r}'.format(scope))
        status = element.get('status')
        if status == 'Active':
            pass
        elif status == 'Retired':
            continue
        else:
            raise Panic('unknown status: {!r}'.format(status))
        reference_name = element.get('reference_name')
        if reference_name in iso_639:
            raise Panic('duplicate reference name: {!r}'.format(reference_name))
        if code1 is not None:
            if code1.endswith('(deprecated)'):
                code1 = None
        if code1 is not None:
            if len(code1) == 2:
                codelist = [code1, code]
            else:
                raise Panic('len({!r}) != 2'.format(code1))
        else:
            codelist = [code]
        try:
            codelist += [l2t_to_2b[code]]
        except KeyError:
            pass
        iso_639[reference_name] = codelist
    print('[language-codes]')
    iso_639_rev = {}
    for code, *aliases in iso_639.values():
        for alias in aliases:
            iso_639_rev[alias] = code
        if not aliases:
            iso_639_rev[code] = ''
    for alias, code in sorted(iso_639_rev.items()):
        print('{} = {}'.format(alias, code).rstrip())
    print()

def generate_iso_3166():
    # =========================
    # ISO 3166: territory codes
    # =========================
    iso_3166 = set()
    for event, element in etree.iterparse(os.path.join(get_iso_codes_dir(), 'iso_3166.xml')):
        if element.tag != 'iso_3166_entry':
            continue
        cc = element.get('alpha_2_code')
        if cc is None:
            raise ValueError
        iso_3166.add(cc)
    print('[territory-codes]')
    for cc in sorted(iso_3166):
        print('{} ='.format(cc))
    print()
    print('# vi''m:ft=dosini')

if __name__ == '__main__':
    main()

# vim:ts=4 sw=4 et
