import fileinput
import re
import os
import sys
from collections import OrderedDict
sys.path.append(os.path.join(os.path.dirname(__file__), "pylib"))
from os.path import join
import rustout
import common
from rustout import char_escape
from common import path, memoize
from unicode_utils import is_surrogate, cp_to_char
OUTPUT_DIRS = {
'IDNA': path("components/idna/src/tables"),
}
def data_path(name):
return join(common.IDNA_DATA_DIR, name)
def test_data_path(name):
return join(common.IDNA_TEST_DATA_DIR, name)
@memoize
def get_unicode_version():
with open(data_path("ReadMe.txt")) as readme_file:
pattern = "for Version (\d+)\.(\d+)\.(\d+) of"
return re.search(pattern, readme_file.read()).groups()
def emit_unicode_version(dir):
with open(join(dir, 'unicode_version.rsv'), "w") as version_file:
rustout.emit_value(
__file__,
version_file, get_unicode_version(),
print_fun=lambda x: "(%s, %s, %s)" % x)
@memoize
def get_idna_mapping_table():
map = []
map_string = OrderedDict({'': (0, 0)})
map_string.offset = 0
def map_string_slice(st):
if st not in map_string:
bytes = len(st.encode('utf8'))
map_string[st] = (map_string.offset, bytes)
map_string.offset += bytes
return map_string[st]
def print_slice(s):
return "(StringTableSlice { byte_start: %d, byte_len: %d })" % map_string_slice(s)
for line in fileinput.input(data_path("IdnaMappingTable.txt")):
line, _, _ = line.partition('#')
if len(line.strip()) == 0:
continue
fields = line.split(';')
if fields[0].strip() == 'D800..DFFF':
continue
first, _, last = fields[0].strip().partition('..')
if not last:
last = first
first, last = int(first, 16), int(last, 16)
mapping = rustout.title_case(fields[1])
if len(fields) > 2:
map_chars = fields[2].strip()
if map_chars:
unistr = u''.join(cp_to_char(int(hex, 16))
for hex in map_chars.split(' '))
mapping += print_slice(unistr)
elif mapping == "Deviation":
mapping += print_slice('')
map.append((first, last, mapping))
return (map, map_string)
def emit_idna_map_tables(dir):
(map, map_string) = get_idna_mapping_table()
with open(join(dir, 'idna_map.rsv'), "w") as map_file:
rustout.emit_table(
__file__,
map_file,
map,
print_fun=lambda x: (
"Range { from: '%s', to: '%s', mapping: %s }" % (
char_escape(x[0]),
char_escape(x[1]),
x[2],
)
),
)
with open(join(dir, 'idna_map_string.rsv'), "w") as map_string_file:
rustout.emit_strings(
__file__,
map_string_file,
map_string.iterkeys(),
)
if __name__ == "__main__":
common.cleanup_output_dirs(OUTPUT_DIRS.values())
emit_unicode_version(OUTPUT_DIRS['IDNA'])
emit_idna_map_tables(OUTPUT_DIRS['IDNA'])