X-Git-Url: http://pileus.org/git/?a=blobdiff_plain;f=gtk%2Fcompose-parse.py;h=493c48873309a4b5dec58487f396ed820f1871ba;hb=d484721b5ca9e82d6422cca8a3a40f001208f87b;hp=bbff041ca7bf66de87ff5234d817f61ff17f6b7e;hpb=fce9c8b7d45145c4556650843218e0b76c065c18;p=~andy%2Fgtk

diff --git a/gtk/compose-parse.py b/gtk/compose-parse.py
index bbff041ca..493c48873 100755
--- a/gtk/compose-parse.py
+++ b/gtk/compose-parse.py
@@ -23,8 +23,9 @@ import getopt
 # We grab files off the web, left and right.
 URL_COMPOSE = 'http://gitweb.freedesktop.org/?p=xorg/lib/libX11.git;a=blob_plain;f=nls/en_US.UTF-8/Compose.pre'
 URL_KEYSYMSTXT = "http://www.cl.cam.ac.uk/~mgk25/ucs/keysyms.txt"
-URL_GDKKEYSYMSH = "http://svn.gnome.org/svn/gtk%2B/trunk/gdk/gdkkeysyms.h"
-URL_UNICODEDATATXT = 'http://www.unicode.org/Public/5.0.0/ucd/UnicodeData.txt'
+URL_GDKKEYSYMSH = "http://git.gnome.org/browse/gtk%2B/plain/gdk/gdkkeysyms.h"
+URL_UNICODEDATATXT = 'http://www.unicode.org/Public/6.0.0/ucd/UnicodeData.txt'
+FILENAME_COMPOSE_SUPPLEMENTARY = 'gtk-compose-lookaside.txt'
 
 # We currently support keysyms of size 2; once upstream xorg gets sorted, 
 # we might produce some tables with size 2 and some with size 4.
@@ -37,7 +38,7 @@ keysymdatabase = {}
 keysymunicodedatabase = {}
 unicodedatabase = {}
 
-headerfile_start = """/* GTK - The GTK+ Toolkit
+headerfile_start = """/* GTK - The GIMP Tool Kit
  * Copyright (C) 2007, 2008 GNOME Foundation
  *
  * This library is free software; you can redistribute it and/or
@@ -51,9 +52,7 @@ headerfile_start = """/* GTK - The GTK+ Toolkit
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307, USA.
+ * License along with this library. If not, see see <http://www.gnu.org/licenses/>.
  */
 
 /*
@@ -252,7 +251,7 @@ def process_gdkkeysymsh():
 	for line in gdkkeysymsh.readlines():
 		linenum_gdkkeysymsh += 1
 		line = line.strip()
-		if line == "" or not match('^#define GDK_', line):
+		if line == "" or not match('^#define GDK_KEY_', line):
 			continue
 		components = split('\s+', line)
 		if len(components) < 3:
@@ -260,16 +259,16 @@ def process_gdkkeysymsh():
 			% {'linenum': linenum_gdkkeysymsh, 'filename': filename_gdkkeysymsh, 'line': line}
 			print "Was expecting 3 items in the line"
 			sys.exit(-1)
-		if not match('^GDK_', components[1]):
+		if not match('^GDK_KEY_', components[1]):
 			print "Invalid line %(linenum)d in %(filename)s: %(line)s"\
 			% {'linenum': linenum_gdkkeysymsh, 'filename': filename_gdkkeysymsh, 'line': line}
-			print "Was expecting a keysym starting with GDK_"
+			print "Was expecting a keysym starting with GDK_KEY_"
 			sys.exit(-1)
-		if components[2][:2] == '0x' and match('[0-9a-fA-F]+$', components[2][2:]):
-			unival = atoi(components[2][2:], 16)
+		if match('^0x[0-9a-fA-F]+$', components[2]):
+			unival = long(components[2][2:], 16)
 			if unival == 0:
 				continue
-			keysymdb[components[1][4:]] = unival
+			keysymdb[components[1][8:]] = unival
 		else:
 			print "Invalid line %(linenum)d in %(filename)s: %(line)s"\
 			% {'linenum': linenum_gdkkeysymsh, 'filename': filename_gdkkeysymsh, 'line': line}
@@ -279,9 +278,17 @@ def process_gdkkeysymsh():
 
 	""" Patch up the keysymdb with some of our own stuff """
 
-	""" This is for a missing keysym from the currently upstread file """
+	""" This is for a missing keysym from the currently upstream file """
 	keysymdb['dead_stroke'] = 0x338
 
+	""" This is for a missing keysym from the currently upstream file """
+	###keysymdb['dead_belowring'] = 0x323
+	###keysymdb['dead_belowmacron'] = 0x331
+	###keysymdb['dead_belowcircumflex'] = 0x32d
+	###keysymdb['dead_belowtilde'] = 0x330
+	###keysymdb['dead_belowbreve'] = 0x32e
+	###keysymdb['dead_belowdiaeresis'] = 0x324
+
 	""" This is^Wwas preferential treatment for Greek """
 	# keysymdb['dead_tilde'] = 0x342  		
 	""" This is^was preferential treatment for Greek """
@@ -319,14 +326,21 @@ def process_keysymstxt():
 			% {'linenum': linenum_keysymstxt, 'filename': filename_keysymstxt, 'line': line}
 			print "Was expecting 5 items in the line"
 			sys.exit(-1)
-		if components[1][0] == 'U' and match('[0-9a-fA-F]+$', components[1][1:]):
-			unival = atoi(components[1][1:], 16)
+		if match('^U[0-9a-fA-F]+$', components[1]):
+			unival = long(components[1][1:], 16)
 		if unival == 0:
 			continue
 		keysymdb[components[4]] = unival
 	keysymstxt.close()
 
 	""" Patch up the keysymdb with some of our own stuff """
+	""" This is for a missing keysym from the currently upstream file """
+	###keysymdb['dead_belowring'] = 0x323
+	###keysymdb['dead_belowmacron'] = 0x331
+	###keysymdb['dead_belowcircumflex'] = 0x32d
+	###keysymdb['dead_belowtilde'] = 0x330
+	###keysymdb['dead_belowbreve'] = 0x32e
+	###keysymdb['dead_belowdiaeresis'] = 0x324
 
 	""" This is preferential treatment for Greek """
 	""" => we get more savings if used for Greek """
@@ -334,10 +348,12 @@ def process_keysymstxt():
 	""" This is preferential treatment for Greek """
 	# keysymdb['combining_tilde'] = 0x342	
 
-	""" This is for a missing keysym from Marcus Khun's db """
+	""" This is for a missing keysym from Markus Kuhn's db """
 	keysymdb['dead_stroke'] = 0x338
-	""" This is for a missing keysym from Marcus Khun's db """
-	# keysymdb['Oslash'] = 0x0d8		
+	""" This is for a missing keysym from Markus Kuhn's db """
+	keysymdb['Oslash'] = 0x0d8		
+	""" This is for a missing keysym from Markus Kuhn's db """
+	keysymdb['Ssharp'] = 0x1e9e
 
 	""" This is for a missing (recently added) keysym """
 	keysymdb['dead_psili'] = 0x313		
@@ -347,6 +363,19 @@ def process_keysymstxt():
 	""" Allows to import Multi_key sequences """
 	keysymdb['Multi_key'] = 0xff20
 
+        keysymdb['zerosubscript'] = 0x2080
+        keysymdb['onesubscript'] = 0x2081
+        keysymdb['twosubscript'] = 0x2082
+        keysymdb['threesubscript'] = 0x2083
+        keysymdb['foursubscript'] = 0x2084
+        keysymdb['fivesubscript'] = 0x2085
+        keysymdb['sixsubscript'] = 0x2086
+        keysymdb['sevensubscript'] = 0x2087
+        keysymdb['eightsubscript'] = 0x2088
+        keysymdb['ninesubscript'] = 0x2089
+        keysymdb['dead_doublegrave'] = 0x030F
+        keysymdb['dead_invertedbreve'] = 0x0311
+
 	return keysymdb
 
 def keysymvalue(keysym, file = "n/a", linenum = 0):
@@ -362,8 +391,9 @@ def keysymvalue(keysym, file = "n/a", linenum = 0):
        	elif keysym[:2] == '0x' and match('[0-9a-fA-F]+$', keysym[2:]):
 		return atoi(keysym[2:], 16)
 	else:
-        	print 'UNKNOWN{%(keysym)s}' % { "keysym": keysym }
-               	sys.exit(-1)
+        	print 'keysymvalue: UNKNOWN{%(keysym)s}' % { "keysym": keysym }
+               	#return -1
+		sys.exit(-1)
 
 def keysymunicodevalue(keysym, file = "n/a", linenum = 0):
 	""" Extracts a value from the keysym """
@@ -378,16 +408,19 @@ def keysymunicodevalue(keysym, file = "n/a", linenum = 0):
        	elif keysym[:2] == '0x' and match('[0-9a-fA-F]+$', keysym[2:]):
 		return atoi(keysym[2:], 16)
 	else:
-        	print 'UNKNOWN{%(keysym)s}' % { "keysym": keysym }
+        	print 'keysymunicodevalue: UNKNOWN{%(keysym)s}' % { "keysym": keysym }
                	sys.exit(-1)
 
 def rename_combining(seq):
 	filtered_sequence = []
 	for ks in seq:
 		if findall('^combining_', ks):
-			filtered_sequence.append(sub('^combining_', 'dead_', ks))
-		else:
-			filtered_sequence.append(ks)
+			ks = sub('^combining_', 'dead_', ks)
+                if ks == 'dead_double_grave':
+                        ks = 'dead_doublegrave'
+                if ks == 'dead_inverted_breve':
+                        ks = 'dead_invertedbreve'
+		filtered_sequence.append(ks)
 	return filtered_sequence
 
 
@@ -405,17 +438,67 @@ except:
 	print "Unexpected error: ", sys.exc_info()[0]
 	sys.exit(-1)
 
+""" Look if there is a lookaside (supplementary) compose file in the current
+    directory, and if so, open, then merge with upstream Compose file.
+"""
+xorg_compose_sequences_raw = []
+for seq in composefile.readlines():
+        xorg_compose_sequences_raw.append(seq)
+
+try:
+        composefile_lookaside = open(FILENAME_COMPOSE_SUPPLEMENTARY, 'r')
+        for seq in composefile_lookaside.readlines():
+                xorg_compose_sequences_raw.append(seq)
+except IOError, (errno, strerror):
+        if opt_verbose:
+                print "I/O error(%s): %s" % (errno, strerror)
+                print "Did not find lookaside compose file. Continuing..."
+except:
+        print "Unexpected error: ", sys.exc_info()[0]
+        sys.exit(-1)
+
 """ Parse the compose file in  xorg_compose_sequences"""
 xorg_compose_sequences = []
 xorg_compose_sequences_algorithmic = []
 linenum_compose = 0
-for line in composefile.readlines():
+comment_nest_depth = 0
+for line in xorg_compose_sequences_raw:
 	linenum_compose += 1
 	line = line.strip()
-	if line is "" or match("^XCOMM", line) or match("^#", line):
+	if match("^XCOMM", line) or match("^#", line):
+		continue
+
+	line = sub(r"\/\*([^\*]*|[\*][^/])\*\/", "", line)
+
+	comment_start = line.find("/*")
+
+	if comment_start >= 0:
+		if comment_nest_depth == 0:
+			line = line[:comment_start]
+		else:
+			line = ""
+
+		comment_nest_depth += 1
+	else:
+		comment_end = line.find("*/")
+
+		if comment_end >= 0:
+			comment_nest_depth -= 1
+
+		if comment_nest_depth < 0:
+			print "Invalid comment %(linenum_compose)d in %(filename)s: \
+			Closing '*/' without opening '/*'" % { "linenum_compose": linenum_compose, "filename": filename_compose }
+			exit(-1)
+
+		if comment_nest_depth > 0:
+			line = ""
+		else:
+			line = line[comment_end + 2:]
+
+	if line is "":
 		continue
 
-	line = line[:-1]
+	#line = line[:-1]
 	components = split(':', line)
 	if len(components) != 2:
 		print "Invalid line %(linenum_compose)d in %(filename)s: No sequence\
@@ -428,13 +511,21 @@ for line in composefile.readlines():
 	values = split('\s+', val)
 	unichar_temp = split('"', values[0])
 	unichar = unichar_temp[1]
+	if len(values) == 1:
+		continue
 	codepointstr = values[1]
+	if values[1] == '#':
+		# No codepoints that are >1 characters yet.
+		continue
 	if raw_sequence[0][0] == 'U' and match('[0-9a-fA-F]+$', raw_sequence[0][1:]):
 		raw_sequence[0] = '0x' + raw_sequence[0][1:]
-	if codepointstr[0] == 'U' and match('[0-9a-fA-F]+$', codepointstr[1:]):
-		codepoint = atoi(codepointstr[1:], 16)
-	elif keysymdatabase.has_key(codepointstr):
-		codepoint = keysymdatabase[codepointstr]
+	if  match('^U[0-9a-fA-F]+$', codepointstr):
+		codepoint = long(codepointstr[1:], 16)
+	elif keysymunicodedatabase.has_key(codepointstr):
+		#if keysymdatabase[codepointstr] != keysymunicodedatabase[codepointstr]:
+			#print "DIFFERENCE: 0x%(a)X 0x%(b)X" % { "a": keysymdatabase[codepointstr], "b": keysymunicodedatabase[codepointstr]},
+			#print raw_sequence, codepointstr
+		codepoint = keysymunicodedatabase[codepointstr]
 	else:
 		print
 		print "Invalid codepoint at line %(linenum_compose)d in %(filename)s:\
@@ -448,13 +539,30 @@ for line in composefile.readlines():
 			if opt_plane1:
 				print sequence
 			break
+		if keysymvalue(i) < 0:
+			reject_this = True
+			break
 	if reject_this:
 		continue
-	if "U0313" in sequence or "U0314" in sequence or "0x0313" in sequence or "0x0314" in sequence:
+	if "U0342" in sequence or \
+		"U0313" in sequence or \
+		"U0314" in sequence or \
+		"0x0313" in sequence or \
+		"0x0342" in sequence or \
+		"0x0314" in sequence:
+		continue
+	if "dead_belowring" in sequence or\
+                "dead_currency" in sequence or\
+		"dead_belowcomma" in sequence or\
+		"dead_belowmacron" in sequence or\
+		"dead_belowtilde" in sequence or\
+		"dead_belowbreve" in sequence or\
+		"dead_belowdiaeresis" in sequence or\
+		"dead_belowcircumflex" in sequence:
 		continue
-	for i in range(len(sequence)):
-		if sequence[i] == "0x0342":
-			sequence[i] = "dead_tilde"
+	#for i in range(len(sequence)):
+	#	if sequence[i] == "0x0342":
+	#		sequence[i] = "dead_tilde"
 	if "Multi_key" not in sequence:
 		""" Ignore for now >0xFFFF keysyms """
 		if codepoint < 0xFFFF:
@@ -473,7 +581,7 @@ for line in composefile.readlines():
 					    because of lack of dead_perispomeni (i.e. conflict)
 					"""
 					bc = basechar
-					if sequence[-1] == "dead_tilde" and (bc >= 0x370 and bc <= 0x3ff) or (bc >= 0x1f00 and bc <= 0x1fff):
+					"""if sequence[-1] == "dead_tilde" and (bc >= 0x370 and bc <= 0x3ff) or (bc >= 0x1f00 and bc <= 0x1fff):
 						skipping_this = True
 						break
 					if sequence[-1] == "dead_horn" and (bc >= 0x370 and bc <= 0x3ff) or (bc >= 0x1f00 and bc <= 0x1fff):
@@ -486,6 +594,7 @@ for line in composefile.readlines():
 						sequence[i] = "dead_horn"
 					if sequence[-1] == "dead_dasia":
 						sequence[-1] = "dead_ogonek"
+					"""
 					unisequence.append(unichr(keysymunicodevalue(sequence.pop(), filename_compose, linenum_compose)))
 					
 				if skipping_this:
@@ -646,11 +755,11 @@ for sequence in xorg_compose_sequences_algorithmic_uniqued:
 if opt_algorithmic:
 	for sequence in xorg_compose_sequences_algorithmic_uniqued:
 		letter = "".join(sequence[-1:])
-		print '0x%(cp)04X, %(uni)c, seq: [ <0x%(base)04X>,' % { 'cp': ord(unicode(letter)), 'uni': letter, 'base': sequence[-2] },
+		print '0x%(cp)04X, %(uni)s, seq: [ <0x%(base)04X>,' % { 'cp': ord(unicode(letter)), 'uni': letter.encode('utf-8'), 'base': sequence[-2] },
 		for elem in sequence[:-2]:
 			print "<0x%(keysym)04X>," % { 'keysym': elem },
 		""" Yeah, verified... We just want to keep the output similar to -u, so we can compare/sort easily """
-		print "], recomposed as", letter, "verified"
+		print "], recomposed as", letter.encode('utf-8'), "verified"
 
 def num_of_keysyms(seq):
 	return len(seq) - 1
@@ -663,9 +772,9 @@ def convert_UnotationToHex(arg):
 
 def addprefix_GDK(arg):
 	if match('^0x', arg):
-		return '%(arg)s, ' % { 'arg': arg } 
+		return '%(arg)s, ' % { 'arg': arg }
 	else:
-		return 'GDK_%(arg)s, ' % { 'arg': arg } 
+		return 'GDK_KEY_%(arg)s, ' % { 'arg': arg }
 
 if opt_gtk:
 	first_keysym = ""
@@ -709,7 +818,7 @@ if opt_gtk:
 			print "0x%(ks)04X," % { "ks": keysymvalue(i[0]) },
 			print '%(str)s' % { 'str': "".join(map(lambda x : str(x) + ", ", i[1:])) }
 		elif not match('^0x', i[0]):
-			print 'GDK_%(str)s' % { 'str': "".join(map(lambda x : str(x) + ", ", i)) }
+			print 'GDK_KEY_%(str)s' % { 'str': "".join(map(lambda x : str(x) + ", ", i)) }
 		else:
 			print '%(str)s' % { 'str': "".join(map(lambda x : str(x) + ", ", i)) }
 	for i in ct_second_part: