1#! /usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3# Written by Martin v. L�wis <loewis@informatik.hu-berlin.de>
4
5"""Generate binary message catalog from textual translation description.
6
7This program converts a textual Uniforum-style message catalog (.po file) into
8a binary GNU catalog (.mo file).  This is essentially the same function as the
9GNU msgfmt program, however, it is a simpler implementation.
10
11Usage: msgfmt.py [OPTIONS] filename.po
12
13Options:
14    -o file
15    --output-file=file
16        Specify the output file to write to.  If omitted, output will go to a
17        file named filename.mo (based off the input file name).
18
19    -h
20    --help
21        Print this message and exit.
22
23    -V
24    --version
25        Display version information and exit.
26"""
27
28import os
29import sys
30import ast
31import getopt
32import struct
33import array
34
35__version__ = "1.1"
36
37MESSAGES = {}
38
39
40
41def usage(code, msg=''):
42    print >> sys.stderr, __doc__
43    if msg:
44        print >> sys.stderr, msg
45    sys.exit(code)
46
47
48
49def add(id, str, fuzzy):
50    "Add a non-fuzzy translation to the dictionary."
51    global MESSAGES
52    if not fuzzy and str:
53        MESSAGES[id] = str
54
55
56
57def generate():
58    "Return the generated output."
59    global MESSAGES
60    keys = MESSAGES.keys()
61    # the keys are sorted in the .mo file
62    keys.sort()
63    offsets = []
64    ids = strs = ''
65    for id in keys:
66        # For each string, we need size and file offset.  Each string is NUL
67        # terminated; the NUL does not count into the size.
68        offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id])))
69        ids += id + '\0'
70        strs += MESSAGES[id] + '\0'
71    output = ''
72    # The header is 7 32-bit unsigned integers.  We don't use hash tables, so
73    # the keys start right after the index tables.
74    # translated string.
75    keystart = 7*4+16*len(keys)
76    # and the values start after the keys
77    valuestart = keystart + len(ids)
78    koffsets = []
79    voffsets = []
80    # The string table first has the list of keys, then the list of values.
81    # Each entry has first the size of the string, then the file offset.
82    for o1, l1, o2, l2 in offsets:
83        koffsets += [l1, o1+keystart]
84        voffsets += [l2, o2+valuestart]
85    offsets = koffsets + voffsets
86    output = struct.pack("Iiiiiii",
87                         0x950412deL,       # Magic
88                         0,                 # Version
89                         len(keys),         # # of entries
90                         7*4,               # start of key index
91                         7*4+len(keys)*8,   # start of value index
92                         0, 0)              # size and offset of hash table
93    output += array.array("i", offsets).tostring()
94    output += ids
95    output += strs
96    return output
97
98
99
100def make(filename, outfile):
101    ID = 1
102    STR = 2
103
104    # Compute .mo name from .po name and arguments
105    if filename.endswith('.po'):
106        infile = filename
107    else:
108        infile = filename + '.po'
109    if outfile is None:
110        outfile = os.path.splitext(infile)[0] + '.mo'
111
112    try:
113        lines = open(infile).readlines()
114    except IOError, msg:
115        print >> sys.stderr, msg
116        sys.exit(1)
117
118    section = None
119    fuzzy = 0
120
121    # Parse the catalog
122    lno = 0
123    for l in lines:
124        lno += 1
125        # If we get a comment line after a msgstr, this is a new entry
126        if l[0] == '#' and section == STR:
127            add(msgid, msgstr, fuzzy)
128            section = None
129            fuzzy = 0
130        # Record a fuzzy mark
131        if l[:2] == '#,' and 'fuzzy' in l:
132            fuzzy = 1
133        # Skip comments
134        if l[0] == '#':
135            continue
136        # Now we are in a msgid section, output previous section
137        if l.startswith('msgid') and not l.startswith('msgid_plural'):
138            if section == STR:
139                add(msgid, msgstr, fuzzy)
140            section = ID
141            l = l[5:]
142            msgid = msgstr = ''
143            is_plural = False
144        # This is a message with plural forms
145        elif l.startswith('msgid_plural'):
146            if section != ID:
147                print >> sys.stderr, 'msgid_plural not preceded by msgid on %s:%d' %\
148                    (infile, lno)
149                sys.exit(1)
150            l = l[12:]
151            msgid += '\0' # separator of singular and plural
152            is_plural = True
153        # Now we are in a msgstr section
154        elif l.startswith('msgstr'):
155            section = STR
156            if l.startswith('msgstr['):
157                if not is_plural:
158                    print >> sys.stderr, 'plural without msgid_plural on %s:%d' %\
159                        (infile, lno)
160                    sys.exit(1)
161                l = l.split(']', 1)[1]
162                if msgstr:
163                    msgstr += '\0' # Separator of the various plural forms
164            else:
165                if is_plural:
166                    print >> sys.stderr, 'indexed msgstr required for plural on  %s:%d' %\
167                        (infile, lno)
168                    sys.exit(1)
169                l = l[6:]
170        # Skip empty lines
171        l = l.strip()
172        if not l:
173            continue
174        l = ast.literal_eval(l)
175        if section == ID:
176            msgid += l
177        elif section == STR:
178            msgstr += l
179        else:
180            print >> sys.stderr, 'Syntax error on %s:%d' % (infile, lno), \
181                  'before:'
182            print >> sys.stderr, l
183            sys.exit(1)
184    # Add last entry
185    if section == STR:
186        add(msgid, msgstr, fuzzy)
187
188    # Compute output
189    output = generate()
190
191    try:
192        open(outfile,"wb").write(output)
193    except IOError,msg:
194        print >> sys.stderr, msg
195
196
197
198def main():
199    try:
200        opts, args = getopt.getopt(sys.argv[1:], 'hVo:',
201                                   ['help', 'version', 'output-file='])
202    except getopt.error, msg:
203        usage(1, msg)
204
205    outfile = None
206    # parse options
207    for opt, arg in opts:
208        if opt in ('-h', '--help'):
209            usage(0)
210        elif opt in ('-V', '--version'):
211            print >> sys.stderr, "msgfmt.py", __version__
212            sys.exit(0)
213        elif opt in ('-o', '--output-file'):
214            outfile = arg
215    # do it
216    if not args:
217        print >> sys.stderr, 'No input file given'
218        print >> sys.stderr, "Try `msgfmt --help' for more information."
219        return
220
221    for filename in args:
222        make(filename, outfile)
223
224
225if __name__ == '__main__':
226    main()
227