1#!lua
2-----------------------------------------------------------------------------
3-- lua script picoloaddbg.lua --- creates pkb containing phoneme information.
4--                                This pkb is only used for debug purposes. 
5--
6-- Copyright (C) 2009 SVOX AG. All rights reserved.
7-----------------------------------------------------------------------------
8
9-- load pico phones src file and create dbg pkb file
10
11-- accepted syntax:
12-- - parses line of the following format:
13--   :SYM "<sym>" :PROP mapval = <uint8> { , <propname> = <int> }
14-- - initial '!' and trailing '!.*' are treated as comments, no '[]'
15
16
17--- valid property names
18propnames = {mapval=0, vowel=0, diphth=0, glott=0, nonsyllvowel=0, syllcons=0}
19
20--- valid property names (that may occur once only)
21upropnames = {primstress=0, secstress=0, syllbound=0, wordbound=0, pause=0}
22
23
24-- init
25if #arg ~= 2 then
26  print("*** error: wrong number of arguments, must be 2"); return
27end
28local infile = io.open(arg[1], "r")
29if not infile then
30  print("*** error: could not open input file: " .. arg[1]); return
31end
32local outfile = io.open(arg[2], "wb")
33if not outfile then
34  print("*** error: could not open output file: " .. arg[2]); return
35end
36
37
38-- tables
39--- table with symbol name keys (not really used currently)
40local syms = {}
41--- table with symbol name number keys (specified with property mapval)
42local symnrs = {}
43--- array of symbol name numer keys used (to check for unique mapvals)
44local symnrsused = {}
45
46
47-- parse input file, build up syms and symnrs tables
48for line in infile:lines() do
49  if string.match(line, "^%s*!.*$") or string.match(line, "^%s*$") then
50    -- discard comment-only lines
51  else
52    cline = string.gsub(line, "^%s*", "")
53    -- get :SYM
54    sym = string.match(cline, "^:SYM%s+\"([^\"]-)\"%s+")
55    if not sym then
56      sym = string.match(cline, "^:SYM%s+'([^']-)'%s+")
57    end
58    if sym then
59      cline = string.gsub(cline, "^:SYM%s+['\"].-['\"]%s+", "")
60      -- get :PROP and mapval prop/propval
61      propval = string.match(cline, "^:PROP%s+mapval%s*=%s*(%d+)%s*")
62      if propval then
63	cline = string.gsub(cline, "^:PROP%s+mapval%s*=%s*%d+%s*", "")
64	-- construct props table and add first mapval property
65	props = {mapval = tonumber(propval)}
66	symnr = tonumber(propval)
67	if not symnrsused[symnr] then
68	  symnrsused[symnr] = true
69	else
70	  io.write("*** error: mapval values must be unique, ", symnr, "\n")
71	  print("line: ", line); return
72	end
73	-- check if remaining part are comments only
74	cline = string.gsub(cline, "^!.*", "")
75	while (#cline > 0) do
76	  -- try to get next prop/propval and add to props
77	  prop, propval = string.match(cline, "^,%s*(%w+)%s*=%s*(%d+)%s*")
78	  if prop and propval then
79	    cline = string.gsub(cline, "^,%s*%w+%s*=%s*%d+%s*", "")
80	    props[prop] = tonumber(propval)
81	  else
82	    print("*** error: syntax error in property list")
83	    print("line: ", line); return
84	  end
85	  -- cleanup if only comments remaining
86	  cline = string.gsub(cline, "^!.*", "")
87	end
88      else
89	print("*** error: no mapval property found")
90	print("line: ", line); return
91      end
92      syms[sym] = props
93      symnrs[symnr] = props
94    else
95      print("*** error: no symbol found")
96      print("line: ", line)
97      return
98    end
99  end
100end
101
102
103-- check syms and symnrs
104
105function checksymtable (st)
106  for s in pairs(propnames) do propnames[s] = 0 end
107  for s in pairs(upropnames) do upropnames[s] = 0 end
108  for s, p in pairs(st) do
109    for prop, propval in pairs(p) do
110      if not propnames[prop] and not upropnames[prop] then
111	io.write("*** error: invalid property name '", prop, "'\n")
112	return
113      end
114      if propnames[prop] then
115	propnames[prop] = propnames[prop] + 1
116      elseif upropnames[prop] then
117	upropnames[prop] = upropnames[prop] + 1
118      end
119    end
120    for prop, propval in pairs(upropnames) do
121      if propval > 1  then
122	io.write("*** error: property '", prop, "' must be unique\n"); return
123      end
124    end
125  end
126end
127
128checksymtable(syms)
129checksymtable(symnrs)
130
131
132-- get IDs of unique specids
133
134specid = {}
135for i = 1, 8 do specid[i] = 0 end
136for s, pl in pairs(symnrs) do
137  if pl["primstress"] then    specid[1] = pl["mapval"]
138  elseif pl["secstress"] then specid[2] = pl["mapval"]
139  elseif pl["syllbound"] then specid[3] = pl["mapval"]
140  elseif pl["pause"] then     specid[4] = pl["mapval"]
141  elseif pl["wordbound"] then specid[5] = pl["mapval"]
142  end
143end
144
145
146-- write out Phones pkb
147
148function encodeprops (n)
149  rv = 0
150  pl = symnrs[n]
151  if pl then
152    if pl["vowel"] then rv = 1 end
153    if pl["diphth"]then rv = rv + 2 end
154    if pl["glott"] then rv = rv + 4 end
155    if pl["nonsyllvowel"] then rv = rv + 8 end
156    if pl["syllcons"] then rv = rv + 16 end
157  end
158  return rv
159end
160
161
162symtab = {}
163for k, v in pairs(syms) do
164  symtab[tonumber(v["mapval"])] = k
165end
166
167for i = 0, 255 do
168  if symtab[i] then
169    for j = 1, 8 do
170      if (j <= string.len(symtab[i])) then
171	outfile:write(string.sub(symtab[i], j, j))
172      else
173	outfile:write("\0")
174      end
175
176    end
177  else
178    outfile:write("\0\0\0\0\0\0\0\0")
179  end
180end
181
182
183
184-- tini
185
186infile:close()
187outfile:close()
188
189-- end
190