1#!lua
2-----------------------------------------------------------------------------
3-- lua script picoloadphones.lua --- creates pkb containing phones table.
4--
5-- Copyright (C) 2009 SVOX AG. All rights reserved.
6-----------------------------------------------------------------------------
7
8-- load pico phones src file and create phones pkb file
9
10-- accepted syntax:
11-- - parses line of the following format:
12--   :SYM "<sym>" :PROP mapval = <uint8> { , <propname> = <int> }
13-- - initial '!' and trailing '!.*' are treated as comments, no '[]'
14
15
16--- valid property names
17propnames = {mapval=0, vowel=0, diphth=0, glott=0, nonsyllvowel=0, syllcons=0}
18--- valid property names (that may occur once only)
19upropnames = {primstress=0, secstress=0, syllbound=0, wordbound=0, pause=0}
20
21
22-- init
23if #arg ~= 2 then
24  print("*** error: wrong number of arguments, must be 2"); return
25end
26local infile = io.open(arg[1], "r")
27if not infile then
28  print("*** error: could not open input file: " .. arg[1]); return
29end
30local outfile = io.open(arg[2], "wb")
31if not outfile then
32  print("*** error: could not open output file: " .. arg[2]); return
33end
34
35
36-- tables
37--- table with symbol name keys (not really used currently)
38local syms = {}
39--- table with symbol name number keys (specified with property mapval)
40local symnrs = {}
41--- array of symbol name numer keys used (to check for unique mapvals)
42local symnrsused = {}
43
44
45-- parse input file, build up syms and symnrs tables
46for line in infile:lines() do
47  if string.match(line, "^%s*!.*$") or string.match(line, "^%s*$") then
48    -- discard comment-only lines
49  else
50    cline = string.gsub(line, "^%s*", "")
51    -- get :SYM
52    sym = string.match(cline, "^:SYM%s+\"([^\"]-)\"%s+")
53    if not sym then
54      sym = string.match(cline, "^:SYM%s+'([^']-)'%s+")
55    end
56    if sym then
57      cline = string.gsub(cline, "^:SYM%s+['\"].-['\"]%s+", "")
58      -- get :PROP and mapval prop/propval
59      propval = string.match(cline, "^:PROP%s+mapval%s*=%s*(%d+)%s*")
60      if propval then
61	cline = string.gsub(cline, "^:PROP%s+mapval%s*=%s*%d+%s*", "")
62	-- construct props table and add first mapval property
63	props = {mapval = tonumber(propval)}
64	symnr = tonumber(propval)
65	if not symnrsused[symnr] then
66	  symnrsused[symnr] = true
67	else
68	  io.write("*** error: mapval values must be unique, ", symnr, "\n")
69	  print("line: ", line); return
70	end
71	-- check if remaining part are comments only
72	cline = string.gsub(cline, "^!.*", "")
73	while (#cline > 0) do
74	  -- try to get next prop/propval and add to props
75	  prop, propval = string.match(cline, "^,%s*(%w+)%s*=%s*(%d+)%s*")
76	  if prop and propval then
77	    cline = string.gsub(cline, "^,%s*%w+%s*=%s*%d+%s*", "")
78	    props[prop] = tonumber(propval)
79	  else
80	    print("*** error: syntax error in property list")
81	    print("line: ", line); return
82	  end
83	  -- cleanup if only comments remaining
84	  cline = string.gsub(cline, "^!.*", "")
85	end
86      else
87	print("*** error: no mapval property found")
88	print("line: ", line); return
89      end
90      syms[sym] = props
91      symnrs[symnr] = props
92    else
93      print("*** error: no symbol found")
94      print("line: ", line)
95      return
96    end
97  end
98end
99
100
101-- check syms and symnrs
102
103function checksymtable (st)
104  for s in pairs(propnames) do propnames[s] = 0 end
105  for s in pairs(upropnames) do upropnames[s] = 0 end
106  for s, p in pairs(st) do
107    for prop, propval in pairs(p) do
108      if not propnames[prop] and not upropnames[prop] then
109	io.write("*** error: invalid property name '", prop, "'\n")
110	return
111      end
112      if propnames[prop] then
113	propnames[prop] = propnames[prop] + 1
114      elseif upropnames[prop] then
115	upropnames[prop] = upropnames[prop] + 1
116      end
117    end
118    for prop, propval in pairs(upropnames) do
119      if propval > 1  then
120	io.write("*** error: property '", prop, "' must be unique\n"); return
121      end
122    end
123  end
124end
125
126checksymtable(syms)
127checksymtable(symnrs)
128
129
130-- get IDs of unique specids
131
132specid = {}
133for i = 1, 8 do specid[i] = 0 end
134for s, pl in pairs(symnrs) do
135  if pl["primstress"] then    specid[1] = pl["mapval"]
136  elseif pl["secstress"] then specid[2] = pl["mapval"]
137  elseif pl["syllbound"] then specid[3] = pl["mapval"]
138  elseif pl["pause"] then     specid[4] = pl["mapval"]
139  elseif pl["wordbound"] then specid[5] = pl["mapval"]
140  end
141end
142
143
144-- write out Phones pkb
145
146function encodeprops (n)
147  rv = 0
148  pl = symnrs[n]
149  if pl then
150    if pl["vowel"] then rv = 1 end
151    if pl["diphth"]then rv = rv + 2 end
152    if pl["glott"] then rv = rv + 4 end
153    if pl["nonsyllvowel"] then rv = rv + 8 end
154    if pl["syllcons"] then rv = rv + 16 end
155  end
156  return rv
157end
158
159for i=1,8 do
160  if specid[i] == 0 then outfile:write("\0")
161  else outfile:write(string.format("%c", specid[i]))
162  end
163end
164for i = 0, 255 do
165  nr = encodeprops(i)
166  if nr == 0 then outfile:write("\0")
167  else outfile:write(string.format("%c", nr))
168  end
169end
170
171
172-- tini
173
174infile:close()
175outfile:close()
176
177-- end
178