mirror of
https://github.com/Samsung/escargot.git
synced 2026-06-22 10:01:50 +00:00
244 lines
11 KiB
Python
Executable file
244 lines
11 KiB
Python
Executable file
#!/usr/bin/env python
|
|
|
|
# Copyright 2020-present Samsung Electronics Co., Ltd.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import subprocess
|
|
import sys
|
|
import os
|
|
|
|
from argparse import ArgumentParser
|
|
|
|
|
|
PARAGRAPH_SEP = "# ================================================\n"
|
|
ID_START_TITLE = "# Derived Property: ID_Start\n"
|
|
ID_CONTINUE_TITLE = "# Derived Property: ID_Continue\n"
|
|
|
|
|
|
BASIC_PLANE_START = dict()
|
|
LONG_RANGE_LENGHT = []
|
|
BASIC_PLANE_CONTINUE = dict()
|
|
|
|
|
|
SUPPLEMENTARY_PLANE = dict()
|
|
MERGED_ID_START = dict()
|
|
|
|
|
|
TERM_RED = '\033[1;31m'
|
|
TERM_GREEN = '\033[1;32m'
|
|
TERM_YELLOW = '\033[1;33m'
|
|
TERM_EMPTY = '\033[0m'
|
|
|
|
|
|
class UnicodeTable:
|
|
def __init__(self):
|
|
self.header = []
|
|
self.data = []
|
|
self.footer = []
|
|
|
|
def add_header(self, header_text):
|
|
self.header.append(header_text)
|
|
self.header.append("")
|
|
|
|
|
|
def add_footer(self, footer_text):
|
|
self.footer.append(footer_text)
|
|
self.footer.append("")
|
|
|
|
|
|
def add_table(self, table, table_name, table_type, table_descr):
|
|
self.data.append("/* %s */" % (table_descr))
|
|
self.data.append("const %s identRange%s[%s] = {" % (table_type,table_name,len(table)))
|
|
self.data.append(' ' + ', '.join(map(str, table)))
|
|
self.data.append("};")
|
|
self.data.append("")
|
|
|
|
def generate(self):
|
|
with open(os.path.dirname(os.path.abspath(__file__)) + "/../src/parser/UnicodeIdentifierTables.cpp", 'w') as unicode_table_file:
|
|
unicode_table_file.write("\n".join([str(i) for i in self.header]))
|
|
unicode_table_file.write("\n".join([str(i) for i in self.data]))
|
|
unicode_table_file.write("\n".join([str(i) for i in self.footer]))
|
|
|
|
|
|
def create_basic_plane_table():
|
|
|
|
# zero_width_non_joiner and zero_width_joiner needs to be added here
|
|
# http://www.ecma-international.org/ecma-262/10.0/#prod-IdentifierName
|
|
# $ and _ are handled directly in the Escargot Lexer
|
|
other_starters = [0x200C,0x200D]
|
|
for key in other_starters:
|
|
BASIC_PLANE_START[key] = 1
|
|
|
|
# Create new merged dict
|
|
for k, v in BASIC_PLANE_START.items():
|
|
MERGED_ID_START[k] = v
|
|
|
|
# Copy unicodes not present in base
|
|
for continue_key in BASIC_PLANE_CONTINUE.keys():
|
|
if not MERGED_ID_START.has_key(continue_key):
|
|
MERGED_ID_START[continue_key] = BASIC_PLANE_CONTINUE[continue_key]
|
|
|
|
# Merge ranges if possible
|
|
# Get collection of keys
|
|
ordered_keys = sorted(MERGED_ID_START.keys())
|
|
for key in range(len(MERGED_ID_START)-1, 1, -1):
|
|
# If key value is equal than
|
|
if ordered_keys[key] == ordered_keys[key-1]+MERGED_ID_START[ordered_keys[key-1]]+1:
|
|
# combine the length of the two
|
|
# refresh key length value with the sum of the sperate values
|
|
# pop next value out from the dict
|
|
new_length = MERGED_ID_START[ordered_keys[key]] + MERGED_ID_START[ordered_keys[key-1]] + 1
|
|
MERGED_ID_START[ordered_keys[key-1]] = new_length
|
|
error = MERGED_ID_START.pop(ordered_keys[key])
|
|
|
|
# Create the long range table
|
|
long_length_index = 0
|
|
for key, value in MERGED_ID_START.iteritems():
|
|
if value >= 200:
|
|
LONG_RANGE_LENGHT.append(value)
|
|
new_value = 200+long_length_index
|
|
long_length_index+=1
|
|
MERGED_ID_START[key] = new_value
|
|
|
|
|
|
def unify_non_basic_plane():
|
|
# Merge ranges if possible
|
|
# Get collection of keys
|
|
ordered_keys = sorted(SUPPLEMENTARY_PLANE.keys())
|
|
for key in range(len(SUPPLEMENTARY_PLANE)-1, 1, -1):
|
|
# If key value is equal than
|
|
if ordered_keys[key] == ordered_keys[key-1]+SUPPLEMENTARY_PLANE[ordered_keys[key-1]]+1:
|
|
# Combine the length of the two
|
|
# Refresh key length value with the sum of the sperate values
|
|
# Pop next value out from the dict
|
|
new_length = SUPPLEMENTARY_PLANE[ordered_keys[key]] + SUPPLEMENTARY_PLANE[ordered_keys[key-1]]
|
|
SUPPLEMENTARY_PLANE[ordered_keys[key-1]] = new_length
|
|
error = SUPPLEMENTARY_PLANE.pop(ordered_keys[key])
|
|
|
|
|
|
def generate_ranges(file):
|
|
bigger = 0
|
|
long_range_index = 0
|
|
with open(file, "r") as fp:
|
|
line = fp.readline()
|
|
while line:
|
|
line = fp.readline()
|
|
if (line == PARAGRAPH_SEP):
|
|
# Read twice to skip empty line
|
|
paragraph_title = fp.readline()
|
|
paragraph_title = fp.readline()
|
|
if (paragraph_title == ID_START_TITLE) or (paragraph_title == ID_CONTINUE_TITLE):
|
|
long_range_index = 0
|
|
# Skip 8 lines, containing definition of ID_START
|
|
# or skip 9 in case of ID_CONTINUE
|
|
skip = 8
|
|
if (paragraph_title == ID_CONTINUE_TITLE):
|
|
skip = 9
|
|
|
|
for _ in range(skip):
|
|
fp.readline()
|
|
unicode_range_line = fp.readline()
|
|
while (unicode_range_line != "\n"):
|
|
sub_lines = ""
|
|
ranges = ""
|
|
# Cut out ranges
|
|
sub_lines = unicode_range_line.split(';')
|
|
# Cut ranges to check length
|
|
# Convert into int
|
|
ranges = sub_lines[0].split('..')
|
|
start = (int(ranges[0],16))
|
|
|
|
if (len(ranges) == 2):
|
|
length = (int(ranges[1],16) - int(ranges[0],16))
|
|
else:
|
|
length = 1
|
|
|
|
# Basic plane for Escargot's uint16_t
|
|
# Skip the ascii part as it is handled directly
|
|
# in the Lexer.cpp
|
|
if (128 < start) and (start <= 65535):
|
|
if skip == 8:
|
|
if not BASIC_PLANE_START.has_key(start):
|
|
BASIC_PLANE_START[start] = length
|
|
else:
|
|
if not BASIC_PLANE_CONTINUE.has_key(start):
|
|
BASIC_PLANE_CONTINUE[start] = length
|
|
# Supplementary Plane data
|
|
elif start > 65535:
|
|
SUPPLEMENTARY_PLANE[start] = length
|
|
unicode_range_line = fp.readline()
|
|
|
|
|
|
def main():
|
|
print("\n%sAutomated Unicode Identifier Table Generator for Escargot %s\n" % (TERM_GREEN,TERM_EMPTY))
|
|
parser = ArgumentParser(description='Kangax runner for the Escargot engine')
|
|
parser.add_argument('--derived_core_properties', metavar='FILE', action='store', required=True, help='specify the unicode data file')
|
|
args = parser.parse_args()
|
|
|
|
if not os.path.isfile(args.derived_core_properties):
|
|
parser.error("\n\n%sArgument file is non-existent!%s\nGiven: %s\n" % (TERM_RED,TERM_EMPTY,args.derived_core_properties))
|
|
|
|
# Process Derived File
|
|
print("%s..Processing Derived Core Properties file %s\n" % (TERM_YELLOW,TERM_EMPTY))
|
|
generate_ranges(args.derived_core_properties)
|
|
|
|
# Porcess data for the basic plane
|
|
# and unify the supplementary
|
|
print("%s..Calculating proper ranges %s\n" % (TERM_YELLOW,TERM_EMPTY))
|
|
create_basic_plane_table()
|
|
unify_non_basic_plane()
|
|
|
|
# Start collecting output data
|
|
print("%s..Collecting output data %s\n" % (TERM_YELLOW,TERM_EMPTY))
|
|
generated_data = UnicodeTable()
|
|
licence = "/* * Copyright (c) 2020-present Samsung Electronics Co., Ltd\n *\n * This library is free software; you can redistribute it and/or\n * modify it under the terms of the GNU Lesser General Public\n * License as published by the Free Software Foundation; either\n * version 2.1 of the License, or (at your option) any later version.\n *\n * This library is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n * Lesser General Public License for more details.\n *\n * You should have received a copy of the GNU Lesser General Public\n * License along with this library; if not, write to the Free Software\n * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301\n * USA\n */\n"
|
|
header = "/* This file is automatically generated by the %s script\n* from https://www.unicode.org/Public/13.0.0/ucd/DerivedCoreProperties.txt\n* DO NOT EDIT!\n*/" % (os.path.basename(__file__))
|
|
ifdef_namespace ="#include \"UnicodeIdentifierTables.h\"\n\nnamespace Escargot {\nnamespace EscargotLexer {\n"
|
|
footer_namespace = "const uint16_t basic_plane_length = sizeof(identRangeStart);\nconst uint16_t supplementary_plane_length = sizeof(identRangeStartSupplementaryPlane);\n}\n}"
|
|
|
|
# Append Generated Warning, licence, ifdef
|
|
generated_data.add_header(licence)
|
|
generated_data.add_header(header)
|
|
generated_data.add_header(ifdef_namespace)
|
|
|
|
# Fetch data for the basic plane
|
|
keys = sorted(MERGED_ID_START.keys())
|
|
values = []
|
|
for key in keys:
|
|
values.append(MERGED_ID_START[key])
|
|
|
|
# Append basic plane values
|
|
generated_data.add_table(keys,"Start","uint16_t","Starting codepoints of identifier ranges.")
|
|
generated_data.add_table(values,"Length","uint16_t","Lengths of identifier ranges.")
|
|
generated_data.add_table(LONG_RANGE_LENGHT,"LongLength","uint16_t","Lengths of identifier ranges greater than LEXER IDENT_RANGE_LONG.")
|
|
|
|
# Fetch data for the supplementary plane
|
|
keys = sorted(SUPPLEMENTARY_PLANE.keys())
|
|
values = []
|
|
for key in keys:
|
|
values.append(SUPPLEMENTARY_PLANE[key])
|
|
|
|
# Append suppelmentary values
|
|
generated_data.add_table(keys,"StartSupplementaryPlane","uint32_t","Identifier starting codepoints for the supplementary plane")
|
|
generated_data.add_table(values,"LengthSupplementaryPlane","uint16_t","Lengths of identifier ranges for the supplementary plane")
|
|
# Append footer
|
|
generated_data.add_footer(footer_namespace)
|
|
|
|
# Writeout data
|
|
print("%s..Writing out data %s\n" % (TERM_YELLOW,TERM_EMPTY))
|
|
generated_data.generate()
|
|
print("\n%sTables generated into src/parser/UnicodeIdentifierTables.cpp %s\n" % (TERM_GREEN,TERM_EMPTY))
|
|
|
|
if __name__ == '__main__':
|
|
main()
|