X-Git-Url: http://pileus.org/git/?a=blobdiff_plain;f=gtk%2Fcompose-parse.py;h=493c48873309a4b5dec58487f396ed820f1871ba;hb=d484721b5ca9e82d6422cca8a3a40f001208f87b;hp=bbff041ca7bf66de87ff5234d817f61ff17f6b7e;hpb=fce9c8b7d45145c4556650843218e0b76c065c18;p=~andy%2Fgtk diff --git a/gtk/compose-parse.py b/gtk/compose-parse.py index bbff041ca..493c48873 100755 --- a/gtk/compose-parse.py +++ b/gtk/compose-parse.py @@ -23,8 +23,9 @@ import getopt # We grab files off the web, left and right. URL_COMPOSE = 'http://gitweb.freedesktop.org/?p=xorg/lib/libX11.git;a=blob_plain;f=nls/en_US.UTF-8/Compose.pre' URL_KEYSYMSTXT = "http://www.cl.cam.ac.uk/~mgk25/ucs/keysyms.txt" -URL_GDKKEYSYMSH = "http://svn.gnome.org/svn/gtk%2B/trunk/gdk/gdkkeysyms.h" -URL_UNICODEDATATXT = 'http://www.unicode.org/Public/5.0.0/ucd/UnicodeData.txt' +URL_GDKKEYSYMSH = "http://git.gnome.org/browse/gtk%2B/plain/gdk/gdkkeysyms.h" +URL_UNICODEDATATXT = 'http://www.unicode.org/Public/6.0.0/ucd/UnicodeData.txt' +FILENAME_COMPOSE_SUPPLEMENTARY = 'gtk-compose-lookaside.txt' # We currently support keysyms of size 2; once upstream xorg gets sorted, # we might produce some tables with size 2 and some with size 4. @@ -37,7 +38,7 @@ keysymdatabase = {} keysymunicodedatabase = {} unicodedatabase = {} -headerfile_start = """/* GTK - The GTK+ Toolkit +headerfile_start = """/* GTK - The GIMP Tool Kit * Copyright (C) 2007, 2008 GNOME Foundation * * This library is free software; you can redistribute it and/or @@ -51,9 +52,7 @@ headerfile_start = """/* GTK - The GTK+ Toolkit * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * License along with this library. If not, see see . */ /* @@ -252,7 +251,7 @@ def process_gdkkeysymsh(): for line in gdkkeysymsh.readlines(): linenum_gdkkeysymsh += 1 line = line.strip() - if line == "" or not match('^#define GDK_', line): + if line == "" or not match('^#define GDK_KEY_', line): continue components = split('\s+', line) if len(components) < 3: @@ -260,16 +259,16 @@ def process_gdkkeysymsh(): % {'linenum': linenum_gdkkeysymsh, 'filename': filename_gdkkeysymsh, 'line': line} print "Was expecting 3 items in the line" sys.exit(-1) - if not match('^GDK_', components[1]): + if not match('^GDK_KEY_', components[1]): print "Invalid line %(linenum)d in %(filename)s: %(line)s"\ % {'linenum': linenum_gdkkeysymsh, 'filename': filename_gdkkeysymsh, 'line': line} - print "Was expecting a keysym starting with GDK_" + print "Was expecting a keysym starting with GDK_KEY_" sys.exit(-1) - if components[2][:2] == '0x' and match('[0-9a-fA-F]+$', components[2][2:]): - unival = atoi(components[2][2:], 16) + if match('^0x[0-9a-fA-F]+$', components[2]): + unival = long(components[2][2:], 16) if unival == 0: continue - keysymdb[components[1][4:]] = unival + keysymdb[components[1][8:]] = unival else: print "Invalid line %(linenum)d in %(filename)s: %(line)s"\ % {'linenum': linenum_gdkkeysymsh, 'filename': filename_gdkkeysymsh, 'line': line} @@ -279,9 +278,17 @@ def process_gdkkeysymsh(): """ Patch up the keysymdb with some of our own stuff """ - """ This is for a missing keysym from the currently upstread file """ + """ This is for a missing keysym from the currently upstream file """ keysymdb['dead_stroke'] = 0x338 + """ This is for a missing keysym from the currently upstream file """ + ###keysymdb['dead_belowring'] = 0x323 + ###keysymdb['dead_belowmacron'] = 0x331 + ###keysymdb['dead_belowcircumflex'] = 0x32d + ###keysymdb['dead_belowtilde'] = 0x330 + ###keysymdb['dead_belowbreve'] = 0x32e + ###keysymdb['dead_belowdiaeresis'] = 0x324 + """ This is^Wwas preferential treatment for Greek """ # keysymdb['dead_tilde'] = 0x342 """ This is^was preferential treatment for Greek """ @@ -319,14 +326,21 @@ def process_keysymstxt(): % {'linenum': linenum_keysymstxt, 'filename': filename_keysymstxt, 'line': line} print "Was expecting 5 items in the line" sys.exit(-1) - if components[1][0] == 'U' and match('[0-9a-fA-F]+$', components[1][1:]): - unival = atoi(components[1][1:], 16) + if match('^U[0-9a-fA-F]+$', components[1]): + unival = long(components[1][1:], 16) if unival == 0: continue keysymdb[components[4]] = unival keysymstxt.close() """ Patch up the keysymdb with some of our own stuff """ + """ This is for a missing keysym from the currently upstream file """ + ###keysymdb['dead_belowring'] = 0x323 + ###keysymdb['dead_belowmacron'] = 0x331 + ###keysymdb['dead_belowcircumflex'] = 0x32d + ###keysymdb['dead_belowtilde'] = 0x330 + ###keysymdb['dead_belowbreve'] = 0x32e + ###keysymdb['dead_belowdiaeresis'] = 0x324 """ This is preferential treatment for Greek """ """ => we get more savings if used for Greek """ @@ -334,10 +348,12 @@ def process_keysymstxt(): """ This is preferential treatment for Greek """ # keysymdb['combining_tilde'] = 0x342 - """ This is for a missing keysym from Marcus Khun's db """ + """ This is for a missing keysym from Markus Kuhn's db """ keysymdb['dead_stroke'] = 0x338 - """ This is for a missing keysym from Marcus Khun's db """ - # keysymdb['Oslash'] = 0x0d8 + """ This is for a missing keysym from Markus Kuhn's db """ + keysymdb['Oslash'] = 0x0d8 + """ This is for a missing keysym from Markus Kuhn's db """ + keysymdb['Ssharp'] = 0x1e9e """ This is for a missing (recently added) keysym """ keysymdb['dead_psili'] = 0x313 @@ -347,6 +363,19 @@ def process_keysymstxt(): """ Allows to import Multi_key sequences """ keysymdb['Multi_key'] = 0xff20 + keysymdb['zerosubscript'] = 0x2080 + keysymdb['onesubscript'] = 0x2081 + keysymdb['twosubscript'] = 0x2082 + keysymdb['threesubscript'] = 0x2083 + keysymdb['foursubscript'] = 0x2084 + keysymdb['fivesubscript'] = 0x2085 + keysymdb['sixsubscript'] = 0x2086 + keysymdb['sevensubscript'] = 0x2087 + keysymdb['eightsubscript'] = 0x2088 + keysymdb['ninesubscript'] = 0x2089 + keysymdb['dead_doublegrave'] = 0x030F + keysymdb['dead_invertedbreve'] = 0x0311 + return keysymdb def keysymvalue(keysym, file = "n/a", linenum = 0): @@ -362,8 +391,9 @@ def keysymvalue(keysym, file = "n/a", linenum = 0): elif keysym[:2] == '0x' and match('[0-9a-fA-F]+$', keysym[2:]): return atoi(keysym[2:], 16) else: - print 'UNKNOWN{%(keysym)s}' % { "keysym": keysym } - sys.exit(-1) + print 'keysymvalue: UNKNOWN{%(keysym)s}' % { "keysym": keysym } + #return -1 + sys.exit(-1) def keysymunicodevalue(keysym, file = "n/a", linenum = 0): """ Extracts a value from the keysym """ @@ -378,16 +408,19 @@ def keysymunicodevalue(keysym, file = "n/a", linenum = 0): elif keysym[:2] == '0x' and match('[0-9a-fA-F]+$', keysym[2:]): return atoi(keysym[2:], 16) else: - print 'UNKNOWN{%(keysym)s}' % { "keysym": keysym } + print 'keysymunicodevalue: UNKNOWN{%(keysym)s}' % { "keysym": keysym } sys.exit(-1) def rename_combining(seq): filtered_sequence = [] for ks in seq: if findall('^combining_', ks): - filtered_sequence.append(sub('^combining_', 'dead_', ks)) - else: - filtered_sequence.append(ks) + ks = sub('^combining_', 'dead_', ks) + if ks == 'dead_double_grave': + ks = 'dead_doublegrave' + if ks == 'dead_inverted_breve': + ks = 'dead_invertedbreve' + filtered_sequence.append(ks) return filtered_sequence @@ -405,17 +438,67 @@ except: print "Unexpected error: ", sys.exc_info()[0] sys.exit(-1) +""" Look if there is a lookaside (supplementary) compose file in the current + directory, and if so, open, then merge with upstream Compose file. +""" +xorg_compose_sequences_raw = [] +for seq in composefile.readlines(): + xorg_compose_sequences_raw.append(seq) + +try: + composefile_lookaside = open(FILENAME_COMPOSE_SUPPLEMENTARY, 'r') + for seq in composefile_lookaside.readlines(): + xorg_compose_sequences_raw.append(seq) +except IOError, (errno, strerror): + if opt_verbose: + print "I/O error(%s): %s" % (errno, strerror) + print "Did not find lookaside compose file. Continuing..." +except: + print "Unexpected error: ", sys.exc_info()[0] + sys.exit(-1) + """ Parse the compose file in xorg_compose_sequences""" xorg_compose_sequences = [] xorg_compose_sequences_algorithmic = [] linenum_compose = 0 -for line in composefile.readlines(): +comment_nest_depth = 0 +for line in xorg_compose_sequences_raw: linenum_compose += 1 line = line.strip() - if line is "" or match("^XCOMM", line) or match("^#", line): + if match("^XCOMM", line) or match("^#", line): + continue + + line = sub(r"\/\*([^\*]*|[\*][^/])\*\/", "", line) + + comment_start = line.find("/*") + + if comment_start >= 0: + if comment_nest_depth == 0: + line = line[:comment_start] + else: + line = "" + + comment_nest_depth += 1 + else: + comment_end = line.find("*/") + + if comment_end >= 0: + comment_nest_depth -= 1 + + if comment_nest_depth < 0: + print "Invalid comment %(linenum_compose)d in %(filename)s: \ + Closing '*/' without opening '/*'" % { "linenum_compose": linenum_compose, "filename": filename_compose } + exit(-1) + + if comment_nest_depth > 0: + line = "" + else: + line = line[comment_end + 2:] + + if line is "": continue - line = line[:-1] + #line = line[:-1] components = split(':', line) if len(components) != 2: print "Invalid line %(linenum_compose)d in %(filename)s: No sequence\ @@ -428,13 +511,21 @@ for line in composefile.readlines(): values = split('\s+', val) unichar_temp = split('"', values[0]) unichar = unichar_temp[1] + if len(values) == 1: + continue codepointstr = values[1] + if values[1] == '#': + # No codepoints that are >1 characters yet. + continue if raw_sequence[0][0] == 'U' and match('[0-9a-fA-F]+$', raw_sequence[0][1:]): raw_sequence[0] = '0x' + raw_sequence[0][1:] - if codepointstr[0] == 'U' and match('[0-9a-fA-F]+$', codepointstr[1:]): - codepoint = atoi(codepointstr[1:], 16) - elif keysymdatabase.has_key(codepointstr): - codepoint = keysymdatabase[codepointstr] + if match('^U[0-9a-fA-F]+$', codepointstr): + codepoint = long(codepointstr[1:], 16) + elif keysymunicodedatabase.has_key(codepointstr): + #if keysymdatabase[codepointstr] != keysymunicodedatabase[codepointstr]: + #print "DIFFERENCE: 0x%(a)X 0x%(b)X" % { "a": keysymdatabase[codepointstr], "b": keysymunicodedatabase[codepointstr]}, + #print raw_sequence, codepointstr + codepoint = keysymunicodedatabase[codepointstr] else: print print "Invalid codepoint at line %(linenum_compose)d in %(filename)s:\ @@ -448,13 +539,30 @@ for line in composefile.readlines(): if opt_plane1: print sequence break + if keysymvalue(i) < 0: + reject_this = True + break if reject_this: continue - if "U0313" in sequence or "U0314" in sequence or "0x0313" in sequence or "0x0314" in sequence: + if "U0342" in sequence or \ + "U0313" in sequence or \ + "U0314" in sequence or \ + "0x0313" in sequence or \ + "0x0342" in sequence or \ + "0x0314" in sequence: + continue + if "dead_belowring" in sequence or\ + "dead_currency" in sequence or\ + "dead_belowcomma" in sequence or\ + "dead_belowmacron" in sequence or\ + "dead_belowtilde" in sequence or\ + "dead_belowbreve" in sequence or\ + "dead_belowdiaeresis" in sequence or\ + "dead_belowcircumflex" in sequence: continue - for i in range(len(sequence)): - if sequence[i] == "0x0342": - sequence[i] = "dead_tilde" + #for i in range(len(sequence)): + # if sequence[i] == "0x0342": + # sequence[i] = "dead_tilde" if "Multi_key" not in sequence: """ Ignore for now >0xFFFF keysyms """ if codepoint < 0xFFFF: @@ -473,7 +581,7 @@ for line in composefile.readlines(): because of lack of dead_perispomeni (i.e. conflict) """ bc = basechar - if sequence[-1] == "dead_tilde" and (bc >= 0x370 and bc <= 0x3ff) or (bc >= 0x1f00 and bc <= 0x1fff): + """if sequence[-1] == "dead_tilde" and (bc >= 0x370 and bc <= 0x3ff) or (bc >= 0x1f00 and bc <= 0x1fff): skipping_this = True break if sequence[-1] == "dead_horn" and (bc >= 0x370 and bc <= 0x3ff) or (bc >= 0x1f00 and bc <= 0x1fff): @@ -486,6 +594,7 @@ for line in composefile.readlines(): sequence[i] = "dead_horn" if sequence[-1] == "dead_dasia": sequence[-1] = "dead_ogonek" + """ unisequence.append(unichr(keysymunicodevalue(sequence.pop(), filename_compose, linenum_compose))) if skipping_this: @@ -646,11 +755,11 @@ for sequence in xorg_compose_sequences_algorithmic_uniqued: if opt_algorithmic: for sequence in xorg_compose_sequences_algorithmic_uniqued: letter = "".join(sequence[-1:]) - print '0x%(cp)04X, %(uni)c, seq: [ <0x%(base)04X>,' % { 'cp': ord(unicode(letter)), 'uni': letter, 'base': sequence[-2] }, + print '0x%(cp)04X, %(uni)s, seq: [ <0x%(base)04X>,' % { 'cp': ord(unicode(letter)), 'uni': letter.encode('utf-8'), 'base': sequence[-2] }, for elem in sequence[:-2]: print "<0x%(keysym)04X>," % { 'keysym': elem }, """ Yeah, verified... We just want to keep the output similar to -u, so we can compare/sort easily """ - print "], recomposed as", letter, "verified" + print "], recomposed as", letter.encode('utf-8'), "verified" def num_of_keysyms(seq): return len(seq) - 1 @@ -663,9 +772,9 @@ def convert_UnotationToHex(arg): def addprefix_GDK(arg): if match('^0x', arg): - return '%(arg)s, ' % { 'arg': arg } + return '%(arg)s, ' % { 'arg': arg } else: - return 'GDK_%(arg)s, ' % { 'arg': arg } + return 'GDK_KEY_%(arg)s, ' % { 'arg': arg } if opt_gtk: first_keysym = "" @@ -709,7 +818,7 @@ if opt_gtk: print "0x%(ks)04X," % { "ks": keysymvalue(i[0]) }, print '%(str)s' % { 'str': "".join(map(lambda x : str(x) + ", ", i[1:])) } elif not match('^0x', i[0]): - print 'GDK_%(str)s' % { 'str': "".join(map(lambda x : str(x) + ", ", i)) } + print 'GDK_KEY_%(str)s' % { 'str': "".join(map(lambda x : str(x) + ", ", i)) } else: print '%(str)s' % { 'str': "".join(map(lambda x : str(x) + ", ", i)) } for i in ct_second_part: