]> Pileus Git - ~andy/gtk/blobdiff - gtk/compose-parse.py
Fix includes
[~andy/gtk] / gtk / compose-parse.py
index dfa94925fd7fc06dfc60350a77b00ebe65c55ddd..493c48873309a4b5dec58487f396ed820f1871ba 100755 (executable)
@@ -23,8 +23,8 @@ import getopt
 # We grab files off the web, left and right.
 URL_COMPOSE = 'http://gitweb.freedesktop.org/?p=xorg/lib/libX11.git;a=blob_plain;f=nls/en_US.UTF-8/Compose.pre'
 URL_KEYSYMSTXT = "http://www.cl.cam.ac.uk/~mgk25/ucs/keysyms.txt"
-URL_GDKKEYSYMSH = "http://svn.gnome.org/svn/gtk%2B/trunk/gdk/gdkkeysyms.h"
-URL_UNICODEDATATXT = 'http://www.unicode.org/Public/5.0.0/ucd/UnicodeData.txt'
+URL_GDKKEYSYMSH = "http://git.gnome.org/browse/gtk%2B/plain/gdk/gdkkeysyms.h"
+URL_UNICODEDATATXT = 'http://www.unicode.org/Public/6.0.0/ucd/UnicodeData.txt'
 FILENAME_COMPOSE_SUPPLEMENTARY = 'gtk-compose-lookaside.txt'
 
 # We currently support keysyms of size 2; once upstream xorg gets sorted, 
@@ -52,9 +52,7 @@ headerfile_start = """/* GTK - The GIMP Tool Kit
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307, USA.
+ * License along with this library. If not, see see <http://www.gnu.org/licenses/>.
  */
 
 /*
@@ -253,7 +251,7 @@ def process_gdkkeysymsh():
        for line in gdkkeysymsh.readlines():
                linenum_gdkkeysymsh += 1
                line = line.strip()
-               if line == "" or not match('^#define GDK_', line):
+               if line == "" or not match('^#define GDK_KEY_', line):
                        continue
                components = split('\s+', line)
                if len(components) < 3:
@@ -261,16 +259,16 @@ def process_gdkkeysymsh():
                        % {'linenum': linenum_gdkkeysymsh, 'filename': filename_gdkkeysymsh, 'line': line}
                        print "Was expecting 3 items in the line"
                        sys.exit(-1)
-               if not match('^GDK_', components[1]):
+               if not match('^GDK_KEY_', components[1]):
                        print "Invalid line %(linenum)d in %(filename)s: %(line)s"\
                        % {'linenum': linenum_gdkkeysymsh, 'filename': filename_gdkkeysymsh, 'line': line}
-                       print "Was expecting a keysym starting with GDK_"
+                       print "Was expecting a keysym starting with GDK_KEY_"
                        sys.exit(-1)
-               if components[2][:2] == '0x' and match('[0-9a-fA-F]+$', components[2][2:]):
-                       unival = atoi(components[2][2:], 16)
+               if match('^0x[0-9a-fA-F]+$', components[2]):
+                       unival = long(components[2][2:], 16)
                        if unival == 0:
                                continue
-                       keysymdb[components[1][4:]] = unival
+                       keysymdb[components[1][8:]] = unival
                else:
                        print "Invalid line %(linenum)d in %(filename)s: %(line)s"\
                        % {'linenum': linenum_gdkkeysymsh, 'filename': filename_gdkkeysymsh, 'line': line}
@@ -328,8 +326,8 @@ def process_keysymstxt():
                        % {'linenum': linenum_keysymstxt, 'filename': filename_keysymstxt, 'line': line}
                        print "Was expecting 5 items in the line"
                        sys.exit(-1)
-               if components[1][0] == 'U' and match('[0-9a-fA-F]+$', components[1][1:]):
-                       unival = atoi(components[1][1:], 16)
+               if match('^U[0-9a-fA-F]+$', components[1]):
+                       unival = long(components[1][1:], 16)
                if unival == 0:
                        continue
                keysymdb[components[4]] = unival
@@ -354,6 +352,8 @@ def process_keysymstxt():
        keysymdb['dead_stroke'] = 0x338
        """ This is for a missing keysym from Markus Kuhn's db """
        keysymdb['Oslash'] = 0x0d8              
+       """ This is for a missing keysym from Markus Kuhn's db """
+       keysymdb['Ssharp'] = 0x1e9e
 
        """ This is for a missing (recently added) keysym """
        keysymdb['dead_psili'] = 0x313          
@@ -363,6 +363,19 @@ def process_keysymstxt():
        """ Allows to import Multi_key sequences """
        keysymdb['Multi_key'] = 0xff20
 
+        keysymdb['zerosubscript'] = 0x2080
+        keysymdb['onesubscript'] = 0x2081
+        keysymdb['twosubscript'] = 0x2082
+        keysymdb['threesubscript'] = 0x2083
+        keysymdb['foursubscript'] = 0x2084
+        keysymdb['fivesubscript'] = 0x2085
+        keysymdb['sixsubscript'] = 0x2086
+        keysymdb['sevensubscript'] = 0x2087
+        keysymdb['eightsubscript'] = 0x2088
+        keysymdb['ninesubscript'] = 0x2089
+        keysymdb['dead_doublegrave'] = 0x030F
+        keysymdb['dead_invertedbreve'] = 0x0311
+
        return keysymdb
 
 def keysymvalue(keysym, file = "n/a", linenum = 0):
@@ -378,9 +391,9 @@ def keysymvalue(keysym, file = "n/a", linenum = 0):
                elif keysym[:2] == '0x' and match('[0-9a-fA-F]+$', keysym[2:]):
                return atoi(keysym[2:], 16)
        else:
-               #print 'UNKNOWN{%(keysym)s}' % { "keysym": keysym }
-                       return -1
-               #sys.exit(-1)
+               print 'keysymvalue: UNKNOWN{%(keysym)s}' % { "keysym": keysym }
+                       #return -1
+               sys.exit(-1)
 
 def keysymunicodevalue(keysym, file = "n/a", linenum = 0):
        """ Extracts a value from the keysym """
@@ -395,16 +408,19 @@ def keysymunicodevalue(keysym, file = "n/a", linenum = 0):
                elif keysym[:2] == '0x' and match('[0-9a-fA-F]+$', keysym[2:]):
                return atoi(keysym[2:], 16)
        else:
-               print 'UNKNOWN{%(keysym)s}' % { "keysym": keysym }
+               print 'keysymunicodevalue: UNKNOWN{%(keysym)s}' % { "keysym": keysym }
                        sys.exit(-1)
 
 def rename_combining(seq):
        filtered_sequence = []
        for ks in seq:
                if findall('^combining_', ks):
-                       filtered_sequence.append(sub('^combining_', 'dead_', ks))
-               else:
-                       filtered_sequence.append(ks)
+                       ks = sub('^combining_', 'dead_', ks)
+                if ks == 'dead_double_grave':
+                        ks = 'dead_doublegrave'
+                if ks == 'dead_inverted_breve':
+                        ks = 'dead_invertedbreve'
+               filtered_sequence.append(ks)
        return filtered_sequence
 
 
@@ -425,30 +441,61 @@ except:
 """ Look if there is a lookaside (supplementary) compose file in the current
     directory, and if so, open, then merge with upstream Compose file.
 """
+xorg_compose_sequences_raw = []
+for seq in composefile.readlines():
+        xorg_compose_sequences_raw.append(seq)
+
 try:
         composefile_lookaside = open(FILENAME_COMPOSE_SUPPLEMENTARY, 'r')
+        for seq in composefile_lookaside.readlines():
+                xorg_compose_sequences_raw.append(seq)
 except IOError, (errno, strerror):
-        if not opt_quiet:
+        if opt_verbose:
                 print "I/O error(%s): %s" % (errno, strerror)
                 print "Did not find lookaside compose file. Continuing..."
 except:
         print "Unexpected error: ", sys.exc_info()[0]
         sys.exit(-1)
 
-xorg_compose_sequences_raw = []
-for seq in composefile.readlines():
-        xorg_compose_sequences_raw.append(seq)
-for seq in composefile_lookaside.readlines():
-        xorg_compose_sequences_raw.append(seq)
-
 """ Parse the compose file in  xorg_compose_sequences"""
 xorg_compose_sequences = []
 xorg_compose_sequences_algorithmic = []
 linenum_compose = 0
+comment_nest_depth = 0
 for line in xorg_compose_sequences_raw:
        linenum_compose += 1
        line = line.strip()
-       if line is "" or match("^XCOMM", line) or match("^#", line):
+       if match("^XCOMM", line) or match("^#", line):
+               continue
+
+       line = sub(r"\/\*([^\*]*|[\*][^/])\*\/", "", line)
+
+       comment_start = line.find("/*")
+
+       if comment_start >= 0:
+               if comment_nest_depth == 0:
+                       line = line[:comment_start]
+               else:
+                       line = ""
+
+               comment_nest_depth += 1
+       else:
+               comment_end = line.find("*/")
+
+               if comment_end >= 0:
+                       comment_nest_depth -= 1
+
+               if comment_nest_depth < 0:
+                       print "Invalid comment %(linenum_compose)d in %(filename)s: \
+                       Closing '*/' without opening '/*'" % { "linenum_compose": linenum_compose, "filename": filename_compose }
+                       exit(-1)
+
+               if comment_nest_depth > 0:
+                       line = ""
+               else:
+                       line = line[comment_end + 2:]
+
+       if line is "":
                continue
 
        #line = line[:-1]
@@ -472,12 +519,12 @@ for line in xorg_compose_sequences_raw:
                continue
        if raw_sequence[0][0] == 'U' and match('[0-9a-fA-F]+$', raw_sequence[0][1:]):
                raw_sequence[0] = '0x' + raw_sequence[0][1:]
-       if codepointstr[0] == 'U' and match('[0-9a-fA-F]+$', codepointstr[1:]):
-               codepoint = atoi(codepointstr[1:], 16)
+       if  match('^U[0-9a-fA-F]+$', codepointstr):
+               codepoint = long(codepointstr[1:], 16)
        elif keysymunicodedatabase.has_key(codepointstr):
-               if keysymdatabase[codepointstr] != keysymunicodedatabase[codepointstr]:
-                       print "DIFFERENCE: 0x%(a)X 0x%(b)X" % { "a": keysymdatabase[codepointstr], "b": keysymunicodedatabase[codepointstr]},
-                       print raw_sequence, codepointstr
+               #if keysymdatabase[codepointstr] != keysymunicodedatabase[codepointstr]:
+                       #print "DIFFERENCE: 0x%(a)X 0x%(b)X" % { "a": keysymdatabase[codepointstr], "b": keysymunicodedatabase[codepointstr]},
+                       #print raw_sequence, codepointstr
                codepoint = keysymunicodedatabase[codepointstr]
        else:
                print
@@ -505,6 +552,7 @@ for line in xorg_compose_sequences_raw:
                "0x0314" in sequence:
                continue
        if "dead_belowring" in sequence or\
+                "dead_currency" in sequence or\
                "dead_belowcomma" in sequence or\
                "dead_belowmacron" in sequence or\
                "dead_belowtilde" in sequence or\
@@ -707,11 +755,11 @@ for sequence in xorg_compose_sequences_algorithmic_uniqued:
 if opt_algorithmic:
        for sequence in xorg_compose_sequences_algorithmic_uniqued:
                letter = "".join(sequence[-1:])
-               print '0x%(cp)04X, %(uni)c, seq: [ <0x%(base)04X>,' % { 'cp': ord(unicode(letter)), 'uni': letter, 'base': sequence[-2] },
+               print '0x%(cp)04X, %(uni)s, seq: [ <0x%(base)04X>,' % { 'cp': ord(unicode(letter)), 'uni': letter.encode('utf-8'), 'base': sequence[-2] },
                for elem in sequence[:-2]:
                        print "<0x%(keysym)04X>," % { 'keysym': elem },
                """ Yeah, verified... We just want to keep the output similar to -u, so we can compare/sort easily """
-               print "], recomposed as", letter, "verified"
+               print "], recomposed as", letter.encode('utf-8'), "verified"
 
 def num_of_keysyms(seq):
        return len(seq) - 1
@@ -724,9 +772,9 @@ def convert_UnotationToHex(arg):
 
 def addprefix_GDK(arg):
        if match('^0x', arg):
-               return '%(arg)s, ' % { 'arg': arg } 
+               return '%(arg)s, ' % { 'arg': arg }
        else:
-               return 'GDK_%(arg)s, ' % { 'arg': arg } 
+               return 'GDK_KEY_%(arg)s, ' % { 'arg': arg }
 
 if opt_gtk:
        first_keysym = ""
@@ -770,7 +818,7 @@ if opt_gtk:
                        print "0x%(ks)04X," % { "ks": keysymvalue(i[0]) },
                        print '%(str)s' % { 'str': "".join(map(lambda x : str(x) + ", ", i[1:])) }
                elif not match('^0x', i[0]):
-                       print 'GDK_%(str)s' % { 'str': "".join(map(lambda x : str(x) + ", ", i)) }
+                       print 'GDK_KEY_%(str)s' % { 'str': "".join(map(lambda x : str(x) + ", ", i)) }
                else:
                        print '%(str)s' % { 'str': "".join(map(lambda x : str(x) + ", ", i)) }
        for i in ct_second_part: