[go: up one dir, main page]

unic 0.3.0

UNIC - Unicode and Internationalization Crates
Documentation
# Copyright 2017 The UNIC Project Developers.
#
# See the COPYRIGHT file at the top-level directory of this distribution.
#
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT
# or http://opensource.org/licenses/MIT>, at your option. This file may not be
# copied, modified, or distributed except according to those terms.


import os
import sys

from collections import OrderedDict
from itertools import chain


sys.path.append(os.path.join(os.path.dirname(__file__)))
import common

from unicode_utils import is_surrogate, codepoints_from_string


INDENT_COUNT = 4
INDENT = " " * INDENT_COUNT


PREAMBLE = "// WARNING: Auto-generated by `%s`. DO NOT EDIT MANUALLY!\n"


def char_escape(cp):
    assert not is_surrogate(cp), "Trying to output a surrogate codepoint!"
    return "\\u{%x}" % cp


def char_literal(cp):
    return "'%s'" % char_escape(cp)


def string_literal(codepoints):
    return '"' + ''.join([char_escape(cp) for cp in codepoints]) + '"'


def title_case(name):
    return name.strip().replace('_', ' ').title().replace(' ', '')


def get_rel_path(path):
    return os.path.relpath(path, common.ROOT_DIR)


def emit_preamble(
    script_path,
    output_file,
):
    output_file.write(PREAMBLE % get_rel_path(script_path))


def emit_value(
    script_path,
    output_file,
    value,
    print_fun=lambda x: "%s" % x,
):
    emit_preamble(script_path, output_file)
    output_file.write(print_fun(value))
    output_file.write("\n")


def emit_strings(
    script_path,
    output_file,
    strings,
    print_fun=lambda x: "%s" % x,
):
    codepoints = chain(*[codepoints_from_string(st) for st in strings])
    emit_value(
        script_path,
        output_file,
        "\\\n".join(char_escape(cp) for cp in codepoints),
        print_fun=lambda x: '"\\\n%s\\\n"' % x,
    )


def _write_row(
    output_file,
    value,
    print_fun=lambda x: "%s" % x,
):
    output_file.write("%s%s,\n" % (INDENT, print_fun(value)))


def _write_rows(
    output_file,
    data,
    print_fun=lambda x: "%s" % x,
):
    for value in data:
        _write_row(output_file, value, print_fun)


def emit_table(
    script_path,
    output_file,
    data,
    print_fun=lambda x: "%s" % x,
):
    emit_preamble(script_path, output_file)
    output_file.write("&[\n")
    _write_rows(output_file, data, print_fun)
    output_file.write("]\n")


def emit_class(
    script_path,
    output_file,
    data,
    print_fun=lambda x: "%s" % x,
):
    emit_preamble(script_path, output_file)
    output_file.write("{\n")
    _write_rows(output_file, data, print_fun)
    output_file.write("}\n")


def emit_lookup_tables(
    script_path,
    lookup_file,
    values_file,
    data,
    value_fun=None,
    value_print_fun=char_literal,
):
    keys = data.keys()
    keys.sort()

    if value_fun is None:
        def value_fun(x): return data[x]

    lookup = OrderedDict()
    values = OrderedDict()
    values.offset = 0

    for k in keys:
        value = tuple(value_fun(k))
        if value not in values:
            values[value] = (values.offset, len(value))
            values.offset += len(value)
        lookup[k] = values[value]

    emit_table(
        script_path,
        lookup_file,
        lookup,
        print_fun=lambda v:
            "(%s, Slice { offset: %d, length: %d })"
            % (char_literal(v), lookup[v][0], lookup[v][1])
    )

    emit_table(
        script_path,
        values_file,
        values.keys(),
        print_fun=lambda v: ", ".join(value_print_fun(c) for c in v)
    )