1#! /usr/bin/env python3
2# Written by Martin v. Löwis <loewis@informatik.hu-berlin.de>
3
4"""Generate binary message catalog from textual translation description.
5
6This program converts a textual Uniforum-style message catalog (.po file) into
7a binary GNU catalog (.mo file).  This is essentially the same function as the
8GNU msgfmt program, however, it is a simpler implementation.
9
10Usage: msgfmt.py [OPTIONS] filename.po
11
12Options:
13    -o file
14    --output-file=file
15        Specify the output file to write to.  If omitted, output will go to a
16        file named filename.mo (based off the input file name).
17
18    -h
19    --help
20        Print this message and exit.
21
22    -V
23    --version
24        Display version information and exit.
25"""
26
27import os
28import sys
29import ast
30import getopt
31import struct
32import array
33from email.parser import HeaderParser
34
35__version__ = "1.1"
36
37MESSAGES = {}
38
39
40
41def usage(code, msg=''):
42    print(__doc__, file=sys.stderr)
43    if msg:
44        print(msg, file=sys.stderr)
45    sys.exit(code)
46
47
48
49def add(id, str, fuzzy):
50    "Add a non-fuzzy translation to the dictionary."
51    global MESSAGES
52    if not fuzzy and str:
53        MESSAGES[id] = str
54
55
56
57def generate():
58    "Return the generated output."
59    global MESSAGES
60    # the keys are sorted in the .mo file
61    keys = sorted(MESSAGES.keys())
62    offsets = []
63    ids = strs = b''
64    for id in keys:
65        # For each string, we need size and file offset.  Each string is NUL
66        # terminated; the NUL does not count into the size.
67        offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id])))
68        ids += id + b'\0'
69        strs += MESSAGES[id] + b'\0'
70    output = ''
71    # The header is 7 32-bit unsigned integers.  We don't use hash tables, so
72    # the keys start right after the index tables.
73    # translated string.
74    keystart = 7*4+16*len(keys)
75    # and the values start after the keys
76    valuestart = keystart + len(ids)
77    koffsets = []
78    voffsets = []
79    # The string table first has the list of keys, then the list of values.
80    # Each entry has first the size of the string, then the file offset.
81    for o1, l1, o2, l2 in offsets:
82        koffsets += [l1, o1+keystart]
83        voffsets += [l2, o2+valuestart]
84    offsets = koffsets + voffsets
85    output = struct.pack("Iiiiiii",
86                         0x950412de,       # Magic
87                         0,                 # Version
88                         len(keys),         # # of entries
89                         7*4,               # start of key index
90                         7*4+len(keys)*8,   # start of value index
91                         0, 0)              # size and offset of hash table
92    output += array.array("i", offsets).tostring()
93    output += ids
94    output += strs
95    return output
96
97
98
99def make(filename, outfile):
100    ID = 1
101    STR = 2
102
103    # Compute .mo name from .po name and arguments
104    if filename.endswith('.po'):
105        infile = filename
106    else:
107        infile = filename + '.po'
108    if outfile is None:
109        outfile = os.path.splitext(infile)[0] + '.mo'
110
111    try:
112        lines = open(infile, 'rb').readlines()
113    except IOError as msg:
114        print(msg, file=sys.stderr)
115        sys.exit(1)
116
117    section = None
118    fuzzy = 0
119
120    # Start off assuming Latin-1, so everything decodes without failure,
121    # until we know the exact encoding
122    encoding = 'latin-1'
123
124    # Parse the catalog
125    lno = 0
126    for l in lines:
127        l = l.decode(encoding)
128        lno += 1
129        # If we get a comment line after a msgstr, this is a new entry
130        if l[0] == '#' and section == STR:
131            add(msgid, msgstr, fuzzy)
132            section = None
133            fuzzy = 0
134        # Record a fuzzy mark
135        if l[:2] == '#,' and 'fuzzy' in l:
136            fuzzy = 1
137        # Skip comments
138        if l[0] == '#':
139            continue
140        # Now we are in a msgid section, output previous section
141        if l.startswith('msgid') and not l.startswith('msgid_plural'):
142            if section == STR:
143                add(msgid, msgstr, fuzzy)
144                if not msgid:
145                    # See whether there is an encoding declaration
146                    p = HeaderParser()
147                    charset = p.parsestr(msgstr.decode(encoding)).get_content_charset()
148                    if charset:
149                        encoding = charset
150            section = ID
151            l = l[5:]
152            msgid = msgstr = b''
153            is_plural = False
154        # This is a message with plural forms
155        elif l.startswith('msgid_plural'):
156            if section != ID:
157                print('msgid_plural not preceded by msgid on %s:%d' % (infile, lno),
158                      file=sys.stderr)
159                sys.exit(1)
160            l = l[12:]
161            msgid += b'\0' # separator of singular and plural
162            is_plural = True
163        # Now we are in a msgstr section
164        elif l.startswith('msgstr'):
165            section = STR
166            if l.startswith('msgstr['):
167                if not is_plural:
168                    print('plural without msgid_plural on %s:%d' % (infile, lno),
169                          file=sys.stderr)
170                    sys.exit(1)
171                l = l.split(']', 1)[1]
172                if msgstr:
173                    msgstr += b'\0' # Separator of the various plural forms
174            else:
175                if is_plural:
176                    print('indexed msgstr required for plural on  %s:%d' % (infile, lno),
177                          file=sys.stderr)
178                    sys.exit(1)
179                l = l[6:]
180        # Skip empty lines
181        l = l.strip()
182        if not l:
183            continue
184        l = ast.literal_eval(l)
185        if section == ID:
186            msgid += l.encode(encoding)
187        elif section == STR:
188            msgstr += l.encode(encoding)
189        else:
190            print('Syntax error on %s:%d' % (infile, lno), \
191                  'before:', file=sys.stderr)
192            print(l, file=sys.stderr)
193            sys.exit(1)
194    # Add last entry
195    if section == STR:
196        add(msgid, msgstr, fuzzy)
197
198    # Compute output
199    output = generate()
200
201    try:
202        open(outfile,"wb").write(output)
203    except IOError as msg:
204        print(msg, file=sys.stderr)
205
206
207
208def main():
209    try:
210        opts, args = getopt.getopt(sys.argv[1:], 'hVo:',
211                                   ['help', 'version', 'output-file='])
212    except getopt.error as msg:
213        usage(1, msg)
214
215    outfile = None
216    # parse options
217    for opt, arg in opts:
218        if opt in ('-h', '--help'):
219            usage(0)
220        elif opt in ('-V', '--version'):
221            print("msgfmt.py", __version__)
222            sys.exit(0)
223        elif opt in ('-o', '--output-file'):
224            outfile = arg
225    # do it
226    if not args:
227        print('No input file given', file=sys.stderr)
228        print("Try `msgfmt --help' for more information.", file=sys.stderr)
229        return
230
231    for filename in args:
232        make(filename, outfile)
233
234
235if __name__ == '__main__':
236    main()
237