15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#!/usr/bin/env python
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# -*- coding: utf-8 -*-
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# compose-parse.py, version 1.3
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# multifunction script that helps manage the compose sequence table in GTK+ (gtk/gtkimcontextsimple.c)
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# the script produces statistics and information about the whole process, run with --help for more.
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# You may need to switch your python installation to utf-8, if you get 'ascii' codec errors.
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# Complain to Simos Xenitellis (simos@gnome.org, http://simos.info/blog) for this craft.
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)from re			import findall, match, split, sub
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)from string		import atoi
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)from unicodedata	import normalize
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)from urllib 		import urlretrieve
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)from os.path		import isfile, getsize
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)from copy 		import copy
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)import sys
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)import getopt
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# We grab files off the web, left and right.
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URL_COMPOSE = 'http://gitweb.freedesktop.org/?p=xorg/lib/libX11.git;a=blob_plain;f=nls/en_US.UTF-8/Compose.pre'
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URL_KEYSYMSTXT = "http://www.cl.cam.ac.uk/~mgk25/ucs/keysyms.txt"
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URL_GDKKEYSYMSH = "http://git.gnome.org/browse/gtk%2B/plain/gdk/gdkkeysyms.h"
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URL_UNICODEDATATXT = 'http://www.unicode.org/Public/5.2.0/ucd/UnicodeData.txt'
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)FILENAME_COMPOSE_SUPPLEMENTARY = 'gtk-compose-lookaside.txt'
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# We currently support keysyms of size 2; once upstream xorg gets sorted,
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# we might produce some tables with size 2 and some with size 4.
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)SIZEOFINT = 2
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# Current max compose sequence length; in case it gets increased.
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)WIDTHOFCOMPOSETABLE = 5
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)keysymdatabase = {}
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)keysymunicodedatabase = {}
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)unicodedatabase = {}
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)headerfile_start = """/* GTK - The GIMP Tool Kit
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Copyright (C) 2007, 2008 GNOME Foundation
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * This library is free software; you can redistribute it and/or
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * modify it under the terms of the GNU Lesser General Public
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * License as published by the Free Software Foundation; either
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * version 2 of the License, or (at your option) any later version.
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * This library is distributed in the hope that it will be useful,
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * but WITHOUT ANY WARRANTY; without even the implied warranty of
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Lesser General Public License for more details.
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * You should have received a copy of the GNU Lesser General Public
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * License along with this library; if not, write to the
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Boston, MA 02111-1307, USA.
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * File auto-generated from script found at http://bugzilla.gnome.org/show_bug.cgi?id=321896
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * using the input files
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *  Input   : http://gitweb.freedesktop.org/?p=xorg/lib/libX11.git;a=blob_plain;f=nls/en_US.UTF-8/Compose.pre
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *  Input   : http://www.cl.cam.ac.uk/~mgk25/ucs/keysyms.txt
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *  Input   : http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * This table is optimised for space and requires special handling to access the content.
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * This table is used solely by http://svn.gnome.org/viewcvs/gtk%2B/trunk/gtk/gtkimcontextsimple.c
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The resulting file is placed at http://svn.gnome.org/viewcvs/gtk%2B/trunk/gtk/gtkimcontextsimpleseqs.h
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * This file is described in bug report http://bugzilla.gnome.org/show_bug.cgi?id=321896
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Modified by the GTK+ Team and others 2007, 2008.  See the AUTHORS
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * file for a list of people on the GTK+ Team.  See the ChangeLog
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * files for a list of changes.  These files are distributed with
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * GTK+ at ftp://ftp.gtk.org/pub/gtk/.
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef __GTK_IM_CONTEXT_SIMPLE_SEQS_H__
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define __GTK_IM_CONTEXT_SIMPLE_SEQS_H__
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* === These are the original comments of the file; we keep for historical purposes ===
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The following table was generated from the X compose tables include with
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * XFree86 4.0 using a set of Perl scripts. Contact Owen Taylor <otaylor@redhat.com>
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * to obtain the relevant perl scripts.
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The following compose letter letter sequences confliced
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *   Dstroke/dstroke and ETH/eth; resolved to Dstroke (Croation, Vietnamese, Lappish), over
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *                                ETH (Icelandic, Faroese, old English, IPA)  [ D- -D d- -d ]
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *   Amacron/amacron and ordfeminine; resolved to ordfeminine                 [ _A A_ a_ _a ]
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *   Amacron/amacron and Atilde/atilde; resolved to atilde                    [ -A A- a- -a ]
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *   Omacron/Omacron and masculine; resolved to masculine                     [ _O O_ o_ _o ]
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *   Omacron/omacron and Otilde/atilde; resolved to otilde                    [ -O O- o- -o ]
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [ Amacron and Omacron are in Latin-4 (Baltic). ordfeminine and masculine are used for
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *   spanish. atilde and otilde are used at least for Portuguese ]
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *   at and Aring; resolved to Aring                                          [ AA ]
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *   guillemotleft and caron; resolved to guillemotleft                       [ << ]
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *   ogonek and cedilla; resolved to cedilla                                  [ ,, ]
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * This probably should be resolved by first checking an additional set of compose tables
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * that depend on the locale or selected input method.
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const guint16 gtk_compose_seqs_compact[] = {"""
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)headerfile_end = """};
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif /* __GTK_IM_CONTEXT_SIMPLE_SEQS_H__ */
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"""
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def stringtohex(str): return atoi(str, 16)
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def factorial(n):
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if n <= 1:
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return 1
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	else:
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return n * factorial(n-1)
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def uniq(*args) :
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" Performs a uniq operation on a list or lists """
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    	theInputList = []
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    	for theList in args:
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    	   theInputList += theList
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    	theFinalList = []
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    	for elem in theInputList:
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if elem not in theFinalList:
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          		theFinalList.append(elem)
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    	return theFinalList
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def all_permutations(seq):
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" Borrowed from http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/252178 """
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" Produces all permutations of the items of a list """
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    	if len(seq) <=1:
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    	    yield seq
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    	else:
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    	    for perm in all_permutations(seq[1:]):
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    	        for i in range(len(perm)+1):
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    	            #nb str[0:1] works in both string and list contexts
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        	        yield perm[:i] + seq[0:1] + perm[i:]
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def usage():
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	print """compose-parse available parameters:
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	-h, --help		this craft
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	-s, --statistics	show overall statistics (both algorithmic, non-algorithmic)
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	-a, --algorithmic	show sequences saved with algorithmic optimisation
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	-g, --gtk		show entries that go to GTK+
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	-u, --unicodedatatxt	show compose sequences derived from UnicodeData.txt (from unicode.org)
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	-v, --verbose		show verbose output
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        -p, --plane1		show plane1 compose sequences
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	-n, --numeric		when used with --gtk, create file with numeric values only
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	-e, --gtk-expanded	when used with --gtk, create file that repeats first column; not usable in GTK+
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	--all-sequences		when used with --gtk, create file with entries rejected by default
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	Default is to show statistics.
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	"""
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)try:
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	opts, args = getopt.getopt(sys.argv[1:], "pvgashune", ["help", "algorithmic", "statistics", "unicodedatatxt",
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		"stats", "gtk", "verbose", "plane1", "numeric", "gtk-expanded", "all-sequences"])
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)except:
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	usage()
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	sys.exit(2)
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)opt_statistics = False
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)opt_algorithmic = False
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)opt_gtk = False
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)opt_unicodedatatxt = False
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)opt_verbose = False
1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)opt_plane1 = False
1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)opt_numeric = False
1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)opt_gtkexpanded = False
1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)opt_allsequences = False
1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)for o, a in opts:
1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if o in ("-h", "--help"):
1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		usage()
1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		sys.exit()
1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if o in ("-s", "--statistics"):
1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		opt_statistics = True
1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if o in ("-a", "--algorithmic"):
1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		opt_algorithmic = True
1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if o in ("-g", "--gtk"):
1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		opt_gtk = True
1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if o in ("-u", "--unicodedatatxt"):
1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		opt_unicodedatatxt = True
1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if o in ("-v", "--verbose"):
1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		opt_verbose = True
1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if o in ("-p", "--plane1"):
1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		opt_plane1 = True
1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if o in ("-n", "--numeric"):
1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		opt_numeric = True
1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if o in ("-e", "--gtk-expanded"):
1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		opt_gtkexpanded = True
2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if o == "--all-sequences":
2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		opt_allsequences = True
2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)if not opt_algorithmic and not opt_gtk and not opt_unicodedatatxt:
2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	opt_statistics = True
2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def download_hook(blocks_transferred, block_size, file_size):
2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" A download hook to provide some feedback when downloading """
2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if blocks_transferred == 0:
2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if file_size > 0:
2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			if opt_verbose:
2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				print "Downloading", file_size, "bytes: ",
2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		else:
2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			if opt_verbose:
2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				print "Downloading: ",
2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	sys.stdout.write('#')
2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	sys.stdout.flush()
2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def download_file(url):
2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" Downloads a file provided a URL. Returns the filename. """
2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" Borks on failure """
2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	localfilename = url.split('/')[-1]
2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if not isfile(localfilename) or getsize(localfilename) <= 0:
2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if opt_verbose:
2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			print "Downloading ", url, "..."
2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		try:
2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			urlretrieve(url, localfilename, download_hook)
2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		except IOError, (errno, strerror):
2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			print "I/O error(%s): %s" % (errno, strerror)
2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			sys.exit(-1)
2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		except:
2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			print "Unexpected error: ", sys.exc_info()[0]
2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			sys.exit(-1)
2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		print " done."
2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        else:
2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if opt_verbose:
2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                	print "Using cached file for ", url
2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return localfilename
2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def process_gdkkeysymsh():
2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" Opens the gdkkeysyms.h file from GTK+/gdk/gdkkeysyms.h """
2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" Fills up keysymdb with contents """
2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	filename_gdkkeysymsh = download_file(URL_GDKKEYSYMSH)
2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	try:
2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		gdkkeysymsh = open(filename_gdkkeysymsh, 'r')
2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	except IOError, (errno, strerror):
2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		print "I/O error(%s): %s" % (errno, strerror)
2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		sys.exit(-1)
2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	except:
2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		print "Unexpected error: ", sys.exc_info()[0]
2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		sys.exit(-1)
2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" Parse the gdkkeysyms.h file and place contents in  keysymdb """
2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	linenum_gdkkeysymsh = 0
2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	keysymdb = {}
2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	for line in gdkkeysymsh.readlines():
2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		linenum_gdkkeysymsh += 1
2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		line = line.strip()
2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if line == "" or not match('^#define GDK_KEY_', line):
2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			continue
2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		components = split('\s+', line)
2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if len(components) < 3:
2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			print "Invalid line %(linenum)d in %(filename)s: %(line)s"\
2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			% {'linenum': linenum_gdkkeysymsh, 'filename': filename_gdkkeysymsh, 'line': line}
2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			print "Was expecting 3 items in the line"
2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			sys.exit(-1)
2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if not match('^GDK_KEY_', components[1]):
2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			print "Invalid line %(linenum)d in %(filename)s: %(line)s"\
2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			% {'linenum': linenum_gdkkeysymsh, 'filename': filename_gdkkeysymsh, 'line': line}
2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			print "Was expecting a keysym starting with GDK_KEY_"
2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			sys.exit(-1)
2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if match('^0x[0-9a-fA-F]+$', components[2]):
2735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			unival = long(components[2][2:], 16)
2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			if unival == 0:
2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				continue
2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			keysymdb[components[1][8:]] = unival
2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		else:
2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			print "Invalid line %(linenum)d in %(filename)s: %(line)s"\
2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			% {'linenum': linenum_gdkkeysymsh, 'filename': filename_gdkkeysymsh, 'line': line}
2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			print "Was expecting a hexadecimal number at the end of the line"
2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			sys.exit(-1)
2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	gdkkeysymsh.close()
2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" Patch up the keysymdb with some of our own stuff """
2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" This is for a missing keysym from the currently upstream file """
2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	#keysymdb['dead_stroke'] = 0x338
2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" This is for a missing keysym from the currently upstream file """
2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	###keysymdb['dead_belowring'] = 0x323
2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	###keysymdb['dead_belowmacron'] = 0x331
2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	###keysymdb['dead_belowcircumflex'] = 0x32d
2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	###keysymdb['dead_belowtilde'] = 0x330
2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	###keysymdb['dead_belowbreve'] = 0x32e
2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	###keysymdb['dead_belowdiaeresis'] = 0x324
2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" This is^Wwas preferential treatment for Greek """
2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	# keysymdb['dead_tilde'] = 0x342
2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" This is^was preferential treatment for Greek """
3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	#keysymdb['combining_tilde'] = 0x342
3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" Fixing VoidSymbol """
3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	keysymdb['VoidSymbol'] = 0xFFFF
3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return keysymdb
3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def process_keysymstxt():
3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" Grabs and opens the keysyms.txt file that Markus Kuhn maintains """
3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" This file keeps a record between keysyms <-> unicode chars """
3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	filename_keysymstxt = download_file(URL_KEYSYMSTXT)
3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	try:
3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		keysymstxt = open(filename_keysymstxt, 'r')
3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	except IOError, (errno, strerror):
3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		print "I/O error(%s): %s" % (errno, strerror)
3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		sys.exit(-1)
3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	except:
3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		print "Unexpected error: ", sys.exc_info()[0]
3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		sys.exit(-1)
3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" Parse the keysyms.txt file and place content in  keysymdb """
3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	linenum_keysymstxt = 0
3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	keysymdb = {}
3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	for line in keysymstxt.readlines():
3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		linenum_keysymstxt += 1
3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		line = line.strip()
3265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if line == "" or match('^#', line):
3275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			continue
3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		components = split('\s+', line)
3295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if len(components) < 5:
3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			print "Invalid line %(linenum)d in %(filename)s: %(line)s'"\
3315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			% {'linenum': linenum_keysymstxt, 'filename': filename_keysymstxt, 'line': line}
3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			print "Was expecting 5 items in the line"
3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			sys.exit(-1)
3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if match('^U[0-9a-fA-F]+$', components[1]):
3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			unival = long(components[1][1:], 16)
3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if unival == 0:
3375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			continue
3385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		keysymdb[components[4]] = unival
3395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	keysymstxt.close()
3405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" Patch up the keysymdb with some of our own stuff """
3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" This is for a missing keysym from the currently upstream file """
3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	###keysymdb['dead_belowring'] = 0x323
3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	###keysymdb['dead_belowmacron'] = 0x331
3455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	###keysymdb['dead_belowcircumflex'] = 0x32d
3465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	###keysymdb['dead_belowtilde'] = 0x330
3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	###keysymdb['dead_belowbreve'] = 0x32e
3485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	###keysymdb['dead_belowdiaeresis'] = 0x324
3495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" This is preferential treatment for Greek """
3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" => we get more savings if used for Greek """
3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	# keysymdb['dead_tilde'] = 0x342
3535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" This is preferential treatment for Greek """
3545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	# keysymdb['combining_tilde'] = 0x342
3555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" This is for a missing keysym from Markus Kuhn's db """
3575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	keysymdb['dead_stroke'] = 0x338
3585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" This is for a missing keysym from Markus Kuhn's db """
3595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	keysymdb['Oslash'] = 0x0d8
3605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" This is for a missing keysym from Markus Kuhn's db """
3615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	keysymdb['Ssharp'] = 0x1e9e
3625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" This is for a missing (recently added) keysym """
3645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	keysymdb['dead_psili'] = 0x313
3655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" This is for a missing (recently added) keysym """
3665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	keysymdb['dead_dasia'] = 0x314
3675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" Allows to import Multi_key sequences """
3695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	keysymdb['Multi_key'] = 0xff20
3705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        keysymdb['zerosubscript'] = 0x2080
3725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        keysymdb['onesubscript'] = 0x2081
3735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        keysymdb['twosubscript'] = 0x2082
3745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        keysymdb['threesubscript'] = 0x2083
3755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        keysymdb['foursubscript'] = 0x2084
3765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        keysymdb['fivesubscript'] = 0x2085
3775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        keysymdb['sixsubscript'] = 0x2086
3785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        keysymdb['sevensubscript'] = 0x2087
3795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        keysymdb['eightsubscript'] = 0x2088
3805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        keysymdb['ninesubscript'] = 0x2089
3815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        keysymdb['dead_doublegrave'] = 0x030F
3825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        keysymdb['dead_invertedbreve'] = 0x0311
3835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return keysymdb
3855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def keysymvalue(keysym, file = "n/a", linenum = 0):
3875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" Extracts a value from the keysym """
3885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" Find the value of keysym, using the data from keysyms """
3895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" Use file and linenum to when reporting errors """
3905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if keysym == "":
3915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return 0
3925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       	if keysymdatabase.has_key(keysym):
3935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)               	return keysymdatabase[keysym]
3945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       	elif keysym[0] == 'U' and match('[0-9a-fA-F]+$', keysym[1:]):
3955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)               	return atoi(keysym[1:], 16)
3965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       	elif keysym[:2] == '0x' and match('[0-9a-fA-F]+$', keysym[2:]):
3975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return atoi(keysym[2:], 16)
3985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	else:
3995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        	print 'keysymvalue: UNKNOWN{%(keysym)s}' % { "keysym": keysym }
4005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)               	#return -1
4015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		sys.exit(-1)
4025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def keysymunicodevalue(keysym, file = "n/a", linenum = 0):
4045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" Extracts a value from the keysym """
4055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" Find the value of keysym, using the data from keysyms """
4065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" Use file and linenum to when reporting errors """
4075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if keysym == "":
4085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return 0
4095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       	if keysymunicodedatabase.has_key(keysym):
4105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)               	return keysymunicodedatabase[keysym]
4115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       	elif keysym[0] == 'U' and match('[0-9a-fA-F]+$', keysym[1:]):
4125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)               	return atoi(keysym[1:], 16)
4135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       	elif keysym[:2] == '0x' and match('[0-9a-fA-F]+$', keysym[2:]):
4145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return atoi(keysym[2:], 16)
4155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	else:
4165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        	print 'keysymunicodevalue: UNKNOWN{%(keysym)s}' % { "keysym": keysym }
4175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)               	sys.exit(-1)
4185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def rename_combining(seq):
4205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	filtered_sequence = []
4215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	for ks in seq:
4225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if findall('^combining_', ks):
4235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			ks = sub('^combining_', 'dead_', ks)
4245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                if ks == 'dead_double_grave':
4255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        ks = 'dead_doublegrave'
4265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                if ks == 'dead_inverted_breve':
4275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        ks = 'dead_invertedbreve'
4285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		filtered_sequence.append(ks)
4295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return filtered_sequence
4305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)keysymunicodedatabase = process_keysymstxt()
4335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)keysymdatabase = process_gdkkeysymsh()
4345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)""" Grab and open the compose file from upstream """
4365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)filename_compose = download_file(URL_COMPOSE)
4375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)try:
4385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	composefile = open(filename_compose, 'r')
4395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)except IOError, (errno, strerror):
4405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	print "I/O error(%s): %s" % (errno, strerror)
4415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	sys.exit(-1)
4425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)except:
4435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	print "Unexpected error: ", sys.exc_info()[0]
4445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	sys.exit(-1)
4455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)""" Look if there is a lookaside (supplementary) compose file in the current
4475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    directory, and if so, open, then merge with upstream Compose file.
4485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"""
4495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)xorg_compose_sequences_raw = []
4505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)for seq in composefile.readlines():
4515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        xorg_compose_sequences_raw.append(seq)
4525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)try:
4545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        composefile_lookaside = open(FILENAME_COMPOSE_SUPPLEMENTARY, 'r')
4555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        for seq in composefile_lookaside.readlines():
4565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                xorg_compose_sequences_raw.append(seq)
4575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)except IOError, (errno, strerror):
4585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if opt_verbose:
4595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                print "I/O error(%s): %s" % (errno, strerror)
4605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                print "Did not find lookaside compose file. Continuing..."
4615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)except:
4625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        print "Unexpected error: ", sys.exc_info()[0]
4635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        sys.exit(-1)
4645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)""" Parse the compose file in  xorg_compose_sequences"""
4665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)xorg_compose_sequences = []
4675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)xorg_compose_sequences_algorithmic = []
4685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)linenum_compose = 0
4695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)comment_nest_depth = 0
4705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)for line in xorg_compose_sequences_raw:
4715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	linenum_compose += 1
4725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	line = line.strip()
4735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if match("^XCOMM", line) or match("^#", line):
4745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		continue
4755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	line = sub(r"\/\*([^\*]*|[\*][^/])\*\/", "", line)
4775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	comment_start = line.find("/*")
4795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if comment_start >= 0:
4815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if comment_nest_depth == 0:
4825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			line = line[:comment_start]
4835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		else:
4845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			line = ""
4855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		comment_nest_depth += 1
4875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	else:
4885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		comment_end = line.find("*/")
4895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if comment_end >= 0:
4915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			comment_nest_depth -= 1
4925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if comment_nest_depth < 0:
4945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			print "Invalid comment %(linenum_compose)d in %(filename)s: \
4955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			Closing '*/' without opening '/*'" % { "linenum_compose": linenum_compose, "filename": filename_compose }
4965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			exit(-1)
4975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if comment_nest_depth > 0:
4995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			line = ""
5005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		else:
5015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			line = line[comment_end + 2:]
5025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if line is "":
5045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		continue
5055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	#line = line[:-1]
5075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	components = split(':', line)
5085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if len(components) != 2:
5095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		print "Invalid line %(linenum_compose)d in %(filename)s: No sequence\
5105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		/value pair found" % { "linenum_compose": linenum_compose, "filename": filename_compose }
5115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		exit(-1)
5125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	(seq, val ) = split(':', line)
5135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	seq = seq.strip()
5145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	val = val.strip()
5155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	raw_sequence = findall('\w+', seq)
5165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	values = split('\s+', val)
5175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	unichar_temp = split('"', values[0])
5185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	unichar = unichar_temp[1]
5195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if len(values) == 1:
5205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		continue
5215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	codepointstr = values[1]
5225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if values[1] == '#':
5235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		# No codepoints that are >1 characters yet.
5245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		continue
5255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if raw_sequence[0][0] == 'U' and match('[0-9a-fA-F]+$', raw_sequence[0][1:]):
5265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		raw_sequence[0] = '0x' + raw_sequence[0][1:]
5275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if  match('^U[0-9a-fA-F]+$', codepointstr):
5285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		codepoint = long(codepointstr[1:], 16)
5295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	elif keysymunicodedatabase.has_key(codepointstr):
5305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		#if keysymdatabase[codepointstr] != keysymunicodedatabase[codepointstr]:
5315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			#print "DIFFERENCE: 0x%(a)X 0x%(b)X" % { "a": keysymdatabase[codepointstr], "b": keysymunicodedatabase[codepointstr]},
5325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			#print raw_sequence, codepointstr
5335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		codepoint = keysymunicodedatabase[codepointstr]
5345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	else:
5355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		print
5365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		print "Invalid codepoint at line %(linenum_compose)d in %(filename)s:\
5375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		 %(line)s" % { "linenum_compose": linenum_compose, "filename": filename_compose, "line": line }
5385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		exit(-1)
5395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	sequence = rename_combining(raw_sequence)
5405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	reject_this = False
5415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	for i in sequence:
5425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if keysymvalue(i) > 0xFFFF:
5435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			reject_this = True
5445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			if opt_plane1:
5455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				print sequence
5465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			break
5475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if keysymvalue(i) < 0:
5485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			reject_this = True
5495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			break
5505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if reject_this:
5515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		continue
5525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if "U0342" in sequence or \
5535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		"U0313" in sequence or \
5545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		"U0314" in sequence or \
5555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		"0x0313" in sequence or \
5565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		"0x0342" in sequence or \
5575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		"0x0314" in sequence:
5585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		continue
5595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if "dead_belowring" in sequence or\
5605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                "dead_currency" in sequence or\
5615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		"dead_belowcomma" in sequence or\
5625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		"dead_belowmacron" in sequence or\
5635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		"dead_belowtilde" in sequence or\
5645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		"dead_belowbreve" in sequence or\
5655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		"dead_belowdiaeresis" in sequence or\
5665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		"dead_belowcircumflex" in sequence:
5675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		continue
5685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	#for i in range(len(sequence)):
5695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	#	if sequence[i] == "0x0342":
5705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	#		sequence[i] = "dead_tilde"
5715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if "Multi_key" not in sequence:
5725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		""" Ignore for now >0xFFFF keysyms """
5735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if codepoint < 0xFFFF:
5745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			original_sequence = copy(sequence)
5755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			stats_sequence = copy(sequence)
5765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			base = sequence.pop()
5775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			basechar = keysymvalue(base, filename_compose, linenum_compose)
5785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			if basechar < 0xFFFF:
5805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				counter = 1
5815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				unisequence = []
5825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				not_normalised = True
5835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				skipping_this = False
5845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				for i in range(0, len(sequence)):
5855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)					""" If the sequence has dead_tilde and is for Greek, we don't do algorithmically
5865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)					    because of lack of dead_perispomeni (i.e. conflict)
5875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)					"""
5885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)					bc = basechar
5895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)					"""if sequence[-1] == "dead_tilde" and (bc >= 0x370 and bc <= 0x3ff) or (bc >= 0x1f00 and bc <= 0x1fff):
5905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)						skipping_this = True
5915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)						break
5925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)					if sequence[-1] == "dead_horn" and (bc >= 0x370 and bc <= 0x3ff) or (bc >= 0x1f00 and bc <= 0x1fff):
5935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)						skipping_this = True
5945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)						break
5955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)					if sequence[-1] == "dead_ogonek" and (bc >= 0x370 and bc <= 0x3ff) or (bc >= 0x1f00 and bc <= 0x1fff):
5965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)						skipping_this = True
5975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)						break
5985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)					if sequence[-1] == "dead_psili":
5995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)						sequence[i] = "dead_horn"
6005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)					if sequence[-1] == "dead_dasia":
6015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)						sequence[-1] = "dead_ogonek"
6025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)					"""
6035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)					unisequence.append(unichr(keysymunicodevalue(sequence.pop(), filename_compose, linenum_compose)))
6045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				if skipping_this:
6065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)					unisequence = []
6075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				for perm in all_permutations(unisequence):
6085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)					# print counter, original_sequence, unichr(basechar) + "".join(perm)
6095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)					# print counter, map(unichr, perm)
6105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)					normalized = normalize('NFC', unichr(basechar) + "".join(perm))
6115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)					if len(normalized) == 1:
6125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)						# print 'Base: %(base)s [%(basechar)s], produces [%(unichar)s] (0x%(codepoint)04X)' \
6135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)						# % { "base": base, "basechar": unichr(basechar), "unichar": unichar, "codepoint": codepoint },
6145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)						# print "Normalized: [%(normalized)s] SUCCESS %(c)d" % { "normalized": normalized, "c": counter }
6155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)						stats_sequence_data = map(keysymunicodevalue, stats_sequence)
6165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)						stats_sequence_data.append(normalized)
6175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)						xorg_compose_sequences_algorithmic.append(stats_sequence_data)
6185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)						not_normalised = False
6195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)						break;
6205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)					counter += 1
6215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				if not_normalised or opt_allsequences:
6225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)					original_sequence.append(codepoint)
6235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)					xorg_compose_sequences.append(original_sequence)
6245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)					""" print xorg_compose_sequences[-1] """
6255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			else:
6275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				print "Error in base char !?!"
6285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				exit(-2)
6295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		else:
6305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			print "OVER", sequence
6315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			exit(-1)
6325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	else:
6335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		sequence.append(codepoint)
6345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		xorg_compose_sequences.append(sequence)
6355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		""" print xorg_compose_sequences[-1] """
6365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def sequence_cmp(x, y):
6385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if keysymvalue(x[0]) > keysymvalue(y[0]):
6395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return 1
6405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	elif keysymvalue(x[0]) < keysymvalue(y[0]):
6415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return -1
6425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	elif len(x) > len(y):
6435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return 1
6445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	elif len(x) < len(y):
6455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return -1
6465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	elif keysymvalue(x[1]) > keysymvalue(y[1]):
6475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return 1
6485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	elif keysymvalue(x[1]) < keysymvalue(y[1]):
6495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return -1
6505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	elif len(x) < 4:
6515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return 0
6525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	elif keysymvalue(x[2]) > keysymvalue(y[2]):
6535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return 1
6545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	elif keysymvalue(x[2]) < keysymvalue(y[2]):
6555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return -1
6565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	elif len(x) < 5:
6575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return 0
6585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	elif keysymvalue(x[3]) > keysymvalue(y[3]):
6595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return 1
6605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	elif keysymvalue(x[3]) < keysymvalue(y[3]):
6615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return -1
6625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	elif len(x) < 6:
6635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return 0
6645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	elif keysymvalue(x[4]) > keysymvalue(y[4]):
6655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return 1
6665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	elif keysymvalue(x[4]) < keysymvalue(y[4]):
6675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return -1
6685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	else:
6695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return 0
6705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def sequence_unicode_cmp(x, y):
6725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if keysymunicodevalue(x[0]) > keysymunicodevalue(y[0]):
6735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return 1
6745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	elif keysymunicodevalue(x[0]) < keysymunicodevalue(y[0]):
6755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return -1
6765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	elif len(x) > len(y):
6775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return 1
6785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	elif len(x) < len(y):
6795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return -1
6805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	elif keysymunicodevalue(x[1]) > keysymunicodevalue(y[1]):
6815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return 1
6825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	elif keysymunicodevalue(x[1]) < keysymunicodevalue(y[1]):
6835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return -1
6845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	elif len(x) < 4:
6855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return 0
6865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	elif keysymunicodevalue(x[2]) > keysymunicodevalue(y[2]):
6875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return 1
6885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	elif keysymunicodevalue(x[2]) < keysymunicodevalue(y[2]):
6895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return -1
6905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	elif len(x) < 5:
6915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return 0
6925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	elif keysymunicodevalue(x[3]) > keysymunicodevalue(y[3]):
6935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return 1
6945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	elif keysymunicodevalue(x[3]) < keysymunicodevalue(y[3]):
6955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return -1
6965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	elif len(x) < 6:
6975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return 0
6985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	elif keysymunicodevalue(x[4]) > keysymunicodevalue(y[4]):
6995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return 1
7005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	elif keysymunicodevalue(x[4]) < keysymunicodevalue(y[4]):
7015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return -1
7025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	else:
7035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return 0
7045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def sequence_algorithmic_cmp(x, y):
7065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if len(x) < len(y):
7075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return -1
7085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	elif len(x) > len(y):
7095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return 1
7105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	else:
7115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		for i in range(len(x)):
7125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			if x[i] < y[i]:
7135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				return -1
7145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			elif x[i] > y[i]:
7155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				return 1
7165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return 0
7175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)xorg_compose_sequences.sort(sequence_cmp)
7205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)xorg_compose_sequences_uniqued = []
7225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)first_time = True
7235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)item = None
7245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)for next_item in xorg_compose_sequences:
7255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if first_time:
7265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		first_time = False
7275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		item = next_item
7285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if sequence_unicode_cmp(item, next_item) != 0:
7295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		xorg_compose_sequences_uniqued.append(item)
7305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	item = next_item
7315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)xorg_compose_sequences = copy(xorg_compose_sequences_uniqued)
7335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)counter_multikey = 0
7355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)for item in xorg_compose_sequences:
7365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if findall('Multi_key', "".join(item[:-1])) != []:
7375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		counter_multikey += 1
7385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)xorg_compose_sequences_algorithmic.sort(sequence_algorithmic_cmp)
7405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)xorg_compose_sequences_algorithmic_uniqued = uniq(xorg_compose_sequences_algorithmic)
7415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)firstitem = ""
7435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)num_first_keysyms = 0
7445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)zeroes = 0
7455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)num_entries = 0
7465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)num_algorithmic_greek = 0
7475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)for sequence in xorg_compose_sequences:
7485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if keysymvalue(firstitem) != keysymvalue(sequence[0]):
7495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		firstitem = sequence[0]
7505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		num_first_keysyms += 1
7515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	zeroes += 6 - len(sequence) + 1
7525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	num_entries += 1
7535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)for sequence in xorg_compose_sequences_algorithmic_uniqued:
7555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ch = ord(sequence[-1:][0])
7565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if ch >= 0x370 and ch <= 0x3ff or ch >= 0x1f00 and ch <= 0x1fff:
7575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		num_algorithmic_greek += 1
7585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)if opt_algorithmic:
7615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	for sequence in xorg_compose_sequences_algorithmic_uniqued:
7625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		letter = "".join(sequence[-1:])
7635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		print '0x%(cp)04X, %(uni)s, seq: [ <0x%(base)04X>,' % { 'cp': ord(unicode(letter)), 'uni': letter.encode('utf-8'), 'base': sequence[-2] },
7645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		for elem in sequence[:-2]:
7655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			print "<0x%(keysym)04X>," % { 'keysym': elem },
7665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		""" Yeah, verified... We just want to keep the output similar to -u, so we can compare/sort easily """
7675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		print "], recomposed as", letter.encode('utf-8'), "verified"
7685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def num_of_keysyms(seq):
7705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return len(seq) - 1
7715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def convert_UnotationToHex(arg):
7735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if isinstance(arg, str):
7745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if match('^U[0-9A-F][0-9A-F][0-9A-F][0-9A-F]$', arg):
7755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			return sub('^U', '0x', arg)
7765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return arg
7775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def addprefix_GDK(arg):
7795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if match('^0x', arg):
7805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return '%(arg)s, ' % { 'arg': arg }
7815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	else:
7825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return 'GDK_KEY_%(arg)s, ' % { 'arg': arg }
7835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)if opt_gtk:
7855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	first_keysym = ""
7865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	sequence = []
7875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	compose_table = []
7885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ct_second_part = []
7895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ct_sequence_width = 2
7905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	start_offset = num_first_keysyms * (WIDTHOFCOMPOSETABLE+1)
7915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	we_finished = False
7925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	counter = 0
7935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	sequence_iterator = iter(xorg_compose_sequences)
7955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	sequence = sequence_iterator.next()
7965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	while True:
7975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		first_keysym = sequence[0]					# Set the first keysym
7985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		compose_table.append([first_keysym, 0, 0, 0, 0, 0])
7995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		while sequence[0] == first_keysym:
8005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			compose_table[counter][num_of_keysyms(sequence)-1] += 1
8015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			try:
8025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				sequence = sequence_iterator.next()
8035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			except StopIteration:
8045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				we_finished = True
8055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				break
8065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if we_finished:
8075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			break
8085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		counter += 1
8095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ct_index = start_offset
8115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	for line_num in range(len(compose_table)):
8125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		for i in range(WIDTHOFCOMPOSETABLE):
8135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			occurences = compose_table[line_num][i+1]
8145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			compose_table[line_num][i+1] = ct_index
8155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			ct_index += occurences * (i+2)
8165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	for sequence in xorg_compose_sequences:
8185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ct_second_part.append(map(convert_UnotationToHex, sequence))
8195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	print headerfile_start
8215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	for i in compose_table:
8225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if opt_gtkexpanded:
8235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			print "0x%(ks)04X," % { "ks": keysymvalue(i[0]) },
8245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			print '%(str)s' % { 'str': "".join(map(lambda x : str(x) + ", ", i[1:])) }
8255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		elif not match('^0x', i[0]):
8265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			print 'GDK_KEY_%(str)s' % { 'str': "".join(map(lambda x : str(x) + ", ", i)) }
8275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		else:
8285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			print '%(str)s' % { 'str': "".join(map(lambda x : str(x) + ", ", i)) }
8295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	for i in ct_second_part:
8305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if opt_numeric:
8315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			for ks in i[1:][:-1]:
8325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				print '0x%(seq)04X, ' % { 'seq': keysymvalue(ks) },
8335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			print '0x%(cp)04X, ' % { 'cp':i[-1] }
8345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			"""
8355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			for ks in i[:-1]:
8365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				print '0x%(seq)04X, ' % { 'seq': keysymvalue(ks) },
8375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			print '0x%(cp)04X, ' % { 'cp':i[-1] }
8385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			"""
8395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		elif opt_gtkexpanded:
8405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			print '%(seq)s0x%(cp)04X, ' % { 'seq': "".join(map(addprefix_GDK, i[:-1])), 'cp':i[-1] }
8415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		else:
8425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			print '%(seq)s0x%(cp)04X, ' % { 'seq': "".join(map(addprefix_GDK, i[:-1][1:])), 'cp':i[-1] }
8435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	print headerfile_end
8445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def redecompose(codepoint):
8465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	(name, decomposition, combiningclass) = unicodedatabase[codepoint]
8475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if decomposition[0] == '' or decomposition[0] == '0':
8485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return [codepoint]
8495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if match('<\w+>', decomposition[0]):
8505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		numdecomposition = map(stringtohex, decomposition[1:])
8515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return map(redecompose, numdecomposition)
8525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	numdecomposition = map(stringtohex, decomposition)
8535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return map(redecompose, numdecomposition)
8545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def process_unicodedata_file(verbose = False):
8565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	""" Grab from wget http://www.unicode.org/Public/UNIDATA/UnicodeData.txt """
8575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	filename_unicodedatatxt = download_file(URL_UNICODEDATATXT)
8585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	try:
8595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		unicodedatatxt = open(filename_unicodedatatxt, 'r')
8605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	except IOError, (errno, strerror):
8615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		print "I/O error(%s): %s" % (errno, strerror)
8625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		sys.exit(-1)
8635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	except:
8645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		print "Unexpected error: ", sys.exc_info()[0]
8655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		sys.exit(-1)
8665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	for line in unicodedatatxt.readlines():
8675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if line[0] == "" or line[0] == '#':
8685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			continue
8695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		line = line[:-1]
8705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		uniproperties = split(';', line)
8715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		codepoint = stringtohex(uniproperties[0])
8725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		""" We don't do Plane 1 or CJK blocks. The latter require reading additional files. """
8735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if codepoint > 0xFFFF or (codepoint >= 0x4E00 and codepoint <= 0x9FFF) or (codepoint >= 0xF900 and codepoint <= 0xFAFF):
8745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			continue
8755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		name = uniproperties[1]
8765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		category = uniproperties[2]
8775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		combiningclass = uniproperties[3]
8785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		decomposition = uniproperties[5]
8795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		unicodedatabase[codepoint] = [name, split('\s+', decomposition), combiningclass]
8805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	counter_combinations = 0
8825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	counter_combinations_greek = 0
8835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	counter_entries = 0
8845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	counter_entries_greek = 0
8855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	for item in unicodedatabase.keys():
8875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		(name, decomposition, combiningclass) = unicodedatabase[item]
8885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if decomposition[0] == '':
8895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			continue
8905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			print name, "is empty"
8915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		elif match('<\w+>', decomposition[0]):
8925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			continue
8935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			print name, "has weird", decomposition[0]
8945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		else:
8955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			sequence = map(stringtohex, decomposition)
8965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			chrsequence = map(unichr, sequence)
8975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			normalized = normalize('NFC', "".join(chrsequence))
8985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			""" print name, sequence, "Combining: ", "".join(chrsequence), normalized, len(normalized),  """
9005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			decomposedsequence = []
9015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			for subseq in map(redecompose, sequence):
9025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				for seqitem in subseq:
9035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)					if isinstance(seqitem, list):
9045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)						for i in seqitem:
9055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)							if isinstance(i, list):
9065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)								for j in i:
9075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)									decomposedsequence.append(j)
9085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)							else:
9095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)								decomposedsequence.append(i)
9105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)					else:
9115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)						decomposedsequence.append(seqitem)
9125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			recomposedchar = normalize('NFC', "".join(map(unichr, decomposedsequence)))
9135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			if len(recomposedchar) == 1 and len(decomposedsequence) > 1:
9145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				counter_entries += 1
9155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				counter_combinations += factorial(len(decomposedsequence)-1)
9165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				ch = item
9175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				if ch >= 0x370 and ch <= 0x3ff or ch >= 0x1f00 and ch <= 0x1fff:
9185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)					counter_entries_greek += 1
9195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)					counter_combinations_greek += factorial(len(decomposedsequence)-1)
9205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				if verbose:
9215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)					print "0x%(cp)04X, %(uni)c, seq:" % { 'cp':item, 'uni':unichr(item) },
9225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)					print "[",
9235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)					for elem in decomposedsequence:
9245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)						print '<0x%(hex)04X>,' % { 'hex': elem },
9255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)					print "], recomposed as", recomposedchar,
9265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)					if unichr(item) == recomposedchar:
9275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)						print "verified"
9285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if verbose == False:
9305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		print "Unicode statistics from UnicodeData.txt"
9315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		print "Number of entries that can be algorithmically produced     :", counter_entries
9325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		print "  of which are for Greek                                   :", counter_entries_greek
9335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		print "Number of compose sequence combinations requiring          :", counter_combinations
9345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		print "  of which are for Greek                                   :", counter_combinations_greek
9355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		print "Note: We do not include partial compositions, "
9365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		print "thus the slight discrepancy in the figures"
9375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		print
9385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)if opt_unicodedatatxt:
9405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	process_unicodedata_file(True)
9415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)if opt_statistics:
9435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	print
9445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	print "Total number of compose sequences (from file)              :", len(xorg_compose_sequences) + len(xorg_compose_sequences_algorithmic)
9455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	print "  of which can be expressed algorithmically                :", len(xorg_compose_sequences_algorithmic)
9465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	print "  of which cannot be expressed algorithmically             :", len(xorg_compose_sequences)
9475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	print "    of which have Multi_key                                :", counter_multikey
9485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	print
9495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	print "Algorithmic (stats for Xorg Compose file)"
9505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	print "Number of sequences off due to algo from file (len(array)) :", len(xorg_compose_sequences_algorithmic)
9515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	print "Number of sequences off due to algo (uniq(sort(array)))    :", len(xorg_compose_sequences_algorithmic_uniqued)
9525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	print "  of which are for Greek                                   :", num_algorithmic_greek
9535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	print
9545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	process_unicodedata_file()
9555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	print "Not algorithmic (stats from Xorg Compose file)"
9565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	print "Number of sequences                                        :", len(xorg_compose_sequences)
9575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	print "Flat array looks like                                      :", len(xorg_compose_sequences), "rows of 6 integers (2 bytes per int, or 12 bytes per row)"
9585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	print "Flat array would have taken up (in bytes)                  :", num_entries * 2 * 6, "bytes from the GTK+ library"
9595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	print "Number of items in flat array                              :", len(xorg_compose_sequences) * 6
9605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	print "  of which are zeroes                                      :", zeroes, "or ", (100 * zeroes) / (len(xorg_compose_sequences) * 6), " per cent"
9615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	print "Number of different first items                            :", num_first_keysyms
9625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	print "Number of max bytes (if using flat array)                  :", num_entries * 2 * 6
9635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	print "Number of savings                                          :", zeroes * 2 - num_first_keysyms * 2 * 5
9645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	print
9655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	print "Memory needs if both algorithmic+optimised table in latest Xorg compose file"
9665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	print "                                                           :", num_entries * 2 * 6 - zeroes * 2 + num_first_keysyms * 2 * 5
9675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	print
9685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	print "Existing (old) implementation in GTK+"
9695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	print "Number of sequences in old gtkimcontextsimple.c            :", 691
9705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	print "The existing (old) implementation in GTK+ takes up         :", 691 * 2 * 12, "bytes"
971