1#! /usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3# Written by Martin v. L�wis <loewis@informatik.hu-berlin.de>
4
5"""Generate binary message catalog from textual translation description.
6
7This program converts a textual Uniforum-style message catalog (.po file) into
8a binary GNU catalog (.mo file).  This is essentially the same function as the
9GNU msgfmt program, however, it is a simpler implementation.
10
11Usage: msgfmt.py [OPTIONS] filename.po
12
13Options:
14    -o file
15    --output-file=file
16        Specify the output file to write to.  If omitted, output will go to a
17        file named filename.mo (based off the input file name).
18
19    -h
20    --help
21        Print this message and exit.
22
23    -V
24    --version
25        Display version information and exit.
26"""
27
28import sys
29import os
30import getopt
31import struct
32import array
33
34__version__ = "1.1"
35
36MESSAGES = {}
37
38
39
40def usage(code, msg=''):
41    print >> sys.stderr, __doc__
42    if msg:
43        print >> sys.stderr, msg
44    sys.exit(code)
45
46
47
48def add(id, str, fuzzy):
49    "Add a non-fuzzy translation to the dictionary."
50    global MESSAGES
51    if not fuzzy and str:
52        MESSAGES[id] = str
53
54
55
56def generate():
57    "Return the generated output."
58    global MESSAGES
59    keys = MESSAGES.keys()
60    # the keys are sorted in the .mo file
61    keys.sort()
62    offsets = []
63    ids = strs = ''
64    for id in keys:
65        # For each string, we need size and file offset.  Each string is NUL
66        # terminated; the NUL does not count into the size.
67        offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id])))
68        ids += id + '\0'
69        strs += MESSAGES[id] + '\0'
70    output = ''
71    # The header is 7 32-bit unsigned integers.  We don't use hash tables, so
72    # the keys start right after the index tables.
73    # translated string.
74    keystart = 7*4+16*len(keys)
75    # and the values start after the keys
76    valuestart = keystart + len(ids)
77    koffsets = []
78    voffsets = []
79    # The string table first has the list of keys, then the list of values.
80    # Each entry has first the size of the string, then the file offset.
81    for o1, l1, o2, l2 in offsets:
82        koffsets += [l1, o1+keystart]
83        voffsets += [l2, o2+valuestart]
84    offsets = koffsets + voffsets
85    output = struct.pack("Iiiiiii",
86                         0x950412deL,       # Magic
87                         0,                 # Version
88                         len(keys),         # # of entries
89                         7*4,               # start of key index
90                         7*4+len(keys)*8,   # start of value index
91                         0, 0)              # size and offset of hash table
92    output += array.array("i", offsets).tostring()
93    output += ids
94    output += strs
95    return output
96
97
98
99def make(filename, outfile):
100    ID = 1
101    STR = 2
102
103    # Compute .mo name from .po name and arguments
104    if filename.endswith('.po'):
105        infile = filename
106    else:
107        infile = filename + '.po'
108    if outfile is None:
109        outfile = os.path.splitext(infile)[0] + '.mo'
110
111    try:
112        lines = open(infile).readlines()
113    except IOError, msg:
114        print >> sys.stderr, msg
115        sys.exit(1)
116
117    section = None
118    fuzzy = 0
119
120    # Parse the catalog
121    lno = 0
122    for l in lines:
123        lno += 1
124        # If we get a comment line after a msgstr, this is a new entry
125        if l[0] == '#' and section == STR:
126            add(msgid, msgstr, fuzzy)
127            section = None
128            fuzzy = 0
129        # Record a fuzzy mark
130        if l[:2] == '#,' and 'fuzzy' in l:
131            fuzzy = 1
132        # Skip comments
133        if l[0] == '#':
134            continue
135        # Now we are in a msgid section, output previous section
136        if l.startswith('msgid') and not l.startswith('msgid_plural'):
137            if section == STR:
138                add(msgid, msgstr, fuzzy)
139            section = ID
140            l = l[5:]
141            msgid = msgstr = ''
142            is_plural = False
143        # This is a message with plural forms
144        elif l.startswith('msgid_plural'):
145            if section != ID:
146                print >> sys.stderr, 'msgid_plural not preceeded by msgid on %s:%d' %\
147                    (infile, lno)
148                sys.exit(1)
149            l = l[12:]
150            msgid += '\0' # separator of singular and plural
151            is_plural = True
152        # Now we are in a msgstr section
153        elif l.startswith('msgstr'):
154            section = STR
155            if l.startswith('msgstr['):
156                if not is_plural:
157                    print >> sys.stderr, 'plural without msgid_plural on %s:%d' %\
158                        (infile, lno)
159                    sys.exit(1)
160                l = l.split(']', 1)[1]
161                if msgstr:
162                    msgstr += '\0' # Separator of the various plural forms
163            else:
164                if is_plural:
165                    print >> sys.stderr, 'indexed msgstr required for plural on  %s:%d' %\
166                        (infile, lno)
167                    sys.exit(1)
168                l = l[6:]
169        # Skip empty lines
170        l = l.strip()
171        if not l:
172            continue
173        # XXX: Does this always follow Python escape semantics?
174        l = eval(l)
175        if section == ID:
176            msgid += l
177        elif section == STR:
178            msgstr += l
179        else:
180            print >> sys.stderr, 'Syntax error on %s:%d' % (infile, lno), \
181                  'before:'
182            print >> sys.stderr, l
183            sys.exit(1)
184    # Add last entry
185    if section == STR:
186        add(msgid, msgstr, fuzzy)
187
188    # Compute output
189    output = generate()
190
191    try:
192        open(outfile,"wb").write(output)
193    except IOError,msg:
194        print >> sys.stderr, msg
195
196
197
198def main():
199    try:
200        opts, args = getopt.getopt(sys.argv[1:], 'hVo:',
201                                   ['help', 'version', 'output-file='])
202    except getopt.error, msg:
203        usage(1, msg)
204
205    outfile = None
206    # parse options
207    for opt, arg in opts:
208        if opt in ('-h', '--help'):
209            usage(0)
210        elif opt in ('-V', '--version'):
211            print >> sys.stderr, "msgfmt.py", __version__
212            sys.exit(0)
213        elif opt in ('-o', '--output-file'):
214            outfile = arg
215    # do it
216    if not args:
217        print >> sys.stderr, 'No input file given'
218        print >> sys.stderr, "Try `msgfmt --help' for more information."
219        return
220
221    for filename in args:
222        make(filename, outfile)
223
224
225if __name__ == '__main__':
226    main()
227