1/*
2 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16/**
17 * @file picokdt.c
18 *
19 * knowledge handling for decision trees
20 *
21 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
22 * All rights reserved.
23 *
24 * History:
25 * - 2009-04-20 -- initial version
26 *
27 */
28
29#include "picoos.h"
30#include "picodbg.h"
31#include "picobase.h"
32#include "picoknow.h"
33#include "picodata.h"
34#include "picokdt.h"
35
36#ifdef __cplusplus
37extern "C" {
38#endif
39#if 0
40}
41#endif
42
43
44/* ************************************************************/
45/* decision tree */
46/* ************************************************************/
47
48/**
49 * @addtogroup picokdt
50  * ---------------------------------------------------\n
51 * <b> Pico KDT support </b>\n
52 * ---------------------------------------------------\n
53   overview extended binary tree file:
54  - dt consists of optional attribute mapping tables and a non-empty
55    tree part
56  - using the attribute mapping tables an attribute value as used
57    throughout the TTS can be mapped to its smaller representation
58    used in the tree
59  - multi-byte values always little endian
60
61  -------------------------------------------------------------------
62  - bin-file, decision tree knowledge base in binary form
63
64    - dt-kb = header inputmaptables outputmaptables tree
65
66
67    - header = INPMAPTABLEPOS2 OUTMAPTABLEPOS2 TREEPOS2
68
69    - INPMAPTABLEPOS2: two bytes, equals offest in number of bytes from
70                     the start of kb to the start of input map tables,
71                     may not be 0
72    - OUTMAPTABLEPOS2: two bytes, equals offest in number of bytes from
73                     the start of kb to the start of outtables,
74                     may not be 0
75    - TREEPOS2: two bytes, equals offest in number of bytes from the
76              start of kb to the start of the tree
77
78
79    - inputmaptables = maptables
80    - outputmaptables = maptables
81    - maptables = NRMAPTABLES1 {maptable}=NRMAPTABLES1
82    - maptable = LENTABLE2 TABLETYPE1 (   bytemaptable
83                                      | wordmaptable
84                                      | graphinmaptable
85                                      | bytetovarmaptable )
86    - bytemaptable (in or out, usage varies) =  NRBYTES2   {BYTE1}=NRBYTES2
87    - wordmaptable (in or out, usage varies) =  NRWORDS2   {WORD2}=NRWORDS2
88    - graphinmaptable (in only)              =  NRGRAPHS2  {GRAPH1:4}=NRGRAPHS2
89    - bytetovarmaptable (out only)           =  NRINBYTES2 outvarsearchind
90                                              outvaroutputs
91    - outvarsearchind = {OUTVAROFFSET2}=NRINBYTES2
92    - outvaroutputs = {VARVALID1:}=NRINBYTES2
93
94    - bytemaptable: fixed size, *Map*Fixed \n
95    - wordmaptable: fixed size, *Map*Fixed \n
96    - graphinmaptable: search value is variable size (UTF8 grapheme), \n
97                     value to be mapped to is fixed size, one byte \n
98    - bytetovarmaptable: search value is fixed size, one byte, values \n
99                       to be mapped to are of variable size (e.g. several \n
100                       phones) \n
101
102    - NRMAPTABLES1: one byte representing the number of map tables
103    - LENTABLE2: two bytes, equals offset to the next table (or next
104               part of kb, e.g. tree),
105               if LENTABLE2 = 3, and
106               TABLETYPE1 = EMPTY -> empty table, no mapping to be done
107    - TABLETYPE1: one byte, type of map table (byte, word, or graph=utf8)
108    - NRBYTES2: two bytes, number of bytes following in the table (one
109              would be okay, to simplify some implementation also set
110              to 2)
111    - BYTE1: one btye, the sequence is used to determine the values
112           being mapped to, starting with 0
113    - NRWORDS2: two bytes, number of words (two btyes) following in the table
114    - WORD2: two bytes, the sequence is used to determine the values
115           being mapped to, starting with 0
116    - NRGRAPHS2: two bytes, number of graphemes encoded in UTF8 following
117               in table
118    - GRAPH1:4: one to four bytes, UTF8 representation of a grapheme, the
119              sequence of graphemes is used to determine the value being
120              mapped to, starting with 0, the length information is
121              encoded in UTF8, no need for extra length info
122    - NRINBYTES2: two bytes, number of single byte IDs the tree can produce
123    - OUTVAROFFSET2: two bytes, offset from the start of the
124                   outvaroutputs to the start of the following output
125                   phone ID group, ie. the first outvaroffset is the
126                   offset to the start of the second PHONEID
127                   group. Using the previous outvaroffset (or the start
128                   of the outvaroutputs) the start and lenth of the
129                   PHONEID group can be determined and we can get the
130                   sequence of output values we map the chunk value to
131    - VARVALID1:: one to several bytes, one byte each for an output phone ID
132
133    - tree = treenodeinfos TREEBODYSIZE4 treebody
134    - treenodeinfos = NRVFIELDS1 vfields NRATTRIBUTES1 NRQFIELDS1 qfields
135    - vfields = {VFIELD1}=NRVFIELDS1
136    - qfields = {QFIELD1}=NRATTRIBUTES1xNRQFIELDS1
137    - treebody = "cf. code"
138
139    - TREEBODYSIZE4: four bytes, size of treebody in number of bytes
140    - NRVFIELDS1: one byte, number of node properties in the following
141                vector (predefined and fixed sequence of properties)
142    - VFIELD1: number of bits used to represent a node property
143    - NRATTRIBUTES1: one byte, number of attributes (rows) in the
144                   following matrix
145    - NRQFIELDS1: one byte, number (columns) of question-dependent node
146                properties per attribute in the following matrix
147                (predefined and fixed sequence of properties)
148    - QFIELD1: number of bits used to represent a question-dependent
149             property in the matrix
150
151
152    - Currently,
153        - NRVFIELDS1 is fixed at 2 for all trees, ie.
154        - vfields = 2 aVFIELD1 bVFIELD1
155        - aVFIELD1: nr of bits for questions
156        - bVFIELD1: nr of bits for decisions
157
158        - NRQFIELDS1 is fixed at 5 for all trees, ie. \n
159        - qfields = NRATTRIBUTES1 5 aQFIELD1 bQFIELD1 cQFIELD1 dQFIELD1 eQFIELD1 \n
160            - aQFIELD1: nr of bits for fork count \n
161            - bQFIELD1: nr of bits for start position for subsets \n
162            - cQFIELD1: nr of bits for group size \n
163            - dQFIELD1: nr of bits for offset to reach output \n
164            - eQFIELD1: nr of bits for threshold (if continuous node) \n
165*/
166
167
168/* ************************************************************/
169/* decision tree data defines */
170/* may not be changed with current implementation */
171/* ************************************************************/
172
173/* maptables fields */
174#define PICOKDT_MTSPOS_NRMAPTABLES   0
175
176/* position of first byte of first maptable (for omt the only table */
177#define PICOKDT_MTPOS_START          1
178
179/* maptable fields */
180#define PICOKDT_MTPOS_LENTABLE       0
181#define PICOKDT_MTPOS_TABLETYPE      2
182#define PICOKDT_MTPOS_NUMBER         3
183#define PICOKDT_MTPOS_MAPSTART       5
184
185/* treenodeinfos fields */
186#define PICOKDT_NIPOS_NRVFIELDS      0
187#define PICOKDT_NIPOS_NRATTS         3
188#define PICOKDT_NIPOS_NRQFIELDS      4
189
190/* fixed treenodeinfos number of fields */
191#define PICOKDT_NODEINFO_NRVFIELDS   2
192#define PICOKDT_NODEINFO_NRQFIELDS   5
193
194/* fixed number of bits used */
195#define PICOKDT_NODETYPE_NRBITS      2
196#define PICOKDT_SUBSETTYPE_NRBITS    2
197#define PICOKDT_ISDECIDE_NRBITS      1
198
199/* number of inpmaptables for each tree. Since we have a possibly
200   empty input map table for each att, currently these values must be
201   equal to PICOKDT_NRATT* */
202typedef enum {
203    PICOKDT_NRINPMT_POSP = 12,
204    PICOKDT_NRINPMT_POSD =  7,
205    PICOKDT_NRINPMT_G2P  = 16,
206    PICOKDT_NRINPMT_PHR  =  8,
207    PICOKDT_NRINPMT_ACC  = 13,
208    PICOKDT_NRINPMT_PAM  = 60
209} kdt_nrinpmaptables_t;
210
211/* number of outmaptables for each tree, at least one, possibly empty,
212   output map table for each tree */
213typedef enum {
214    PICOKDT_NROUTMT_POSP =  1,
215    PICOKDT_NROUTMT_POSD =  1,
216    PICOKDT_NROUTMT_G2P  =  1,
217    PICOKDT_NROUTMT_PHR  =  1,
218    PICOKDT_NROUTMT_ACC  =  1,
219    PICOKDT_NROUTMT_PAM  =  1
220} kdt_nroutmaptables_t;
221
222/* maptable types */
223typedef enum {
224    PICOKDT_MTTYPE_EMPTY     = 0,
225    PICOKDT_MTTYPE_BYTE      = 1,
226    PICOKDT_MTTYPE_WORD      = 2,
227    PICOKDT_MTTYPE_GRAPH     = 3,
228    PICOKDT_MTTYPE_BYTETOVAR = 4
229} kdt_mttype_t;
230
231
232/* ************************************************************/
233/* decision tree types and loading */
234/* ************************************************************/
235/*  object       : Dt*KnowledgeBase
236 *  shortcut     : kdt*
237 *  derived from : picoknow_KnowledgeBase
238 */
239
240/* subobj shared by all decision trees */
241typedef struct {
242    picokdt_kdttype_t type;
243    picoos_uint8 *inpmaptable;
244    picoos_uint8 *outmaptable;
245    picoos_uint8 *tree;
246    picoos_uint32 beg_offset[128];  /* for efficiency */
247
248    /* tree-internal details for faster processing */
249    picoos_uint8 *vfields;
250    picoos_uint8 *qfields;
251    picoos_uint8  nrattributes;
252    picoos_uint8 *treebody;
253    /*picoos_uint8  nrvfields;*/  /* fix PICOKDT_NODEINFO_NRVFIELDS */
254    /*picoos_uint8  nrqfields;*/  /* fix PICOKDT_NODEINFO_NRQFIELDS */
255
256    /* direct output vector (no output mapping) */
257    picoos_uint8 dset;    /* TRUE if class set, FALSE otherwise */
258    picoos_uint16 dclass;
259} kdt_subobj_t;
260
261/* subobj specific for each decision tree type */
262typedef struct {
263    kdt_subobj_t dt;
264    picoos_uint16 invec[PICOKDT_NRATT_POSP];    /* input vector */
265    picoos_uint8 inveclen;  /* nr of ele set in invec; must be =nrattributes */
266} kdtposp_subobj_t;
267
268typedef struct {
269    kdt_subobj_t dt;
270    picoos_uint16 invec[PICOKDT_NRATT_POSD];    /* input vector */
271    picoos_uint8 inveclen;  /* nr of ele set in invec; must be =nrattributes */
272} kdtposd_subobj_t;
273
274typedef struct {
275    kdt_subobj_t dt;
276    picoos_uint16 invec[PICOKDT_NRATT_G2P];    /* input vector */
277    picoos_uint8 inveclen;  /* nr of ele set in invec; must be =nrattributes */
278} kdtg2p_subobj_t;
279
280typedef struct {
281    kdt_subobj_t dt;
282    picoos_uint16 invec[PICOKDT_NRATT_PHR];    /* input vector */
283    picoos_uint8 inveclen;  /* nr of ele set in invec; must be =nrattributes */
284} kdtphr_subobj_t;
285
286typedef struct {
287    kdt_subobj_t dt;
288    picoos_uint16 invec[PICOKDT_NRATT_ACC];    /* input vector */
289    picoos_uint8 inveclen;  /* nr of ele set in invec; must be =nrattributes */
290} kdtacc_subobj_t;
291
292typedef struct {
293    kdt_subobj_t dt;
294    picoos_uint16 invec[PICOKDT_NRATT_PAM];    /* input vector */
295    picoos_uint8 inveclen;  /* nr of ele set in invec; must be =nrattributes */
296} kdtpam_subobj_t;
297
298
299static pico_status_t kdtDtInitialize(register picoknow_KnowledgeBase this,
300                                     picoos_Common common,
301                                     kdt_subobj_t *dtp) {
302    picoos_uint16 inppos;
303    picoos_uint16 outpos;
304    picoos_uint16 treepos;
305    picoos_uint32 curpos = 0, pos;
306    picoos_uint16 lentable;
307    picoos_uint16 i;
308    picoos_uint8 imtnr;
309
310    PICODBG_DEBUG(("start"));
311
312    /* get inmap, outmap, tree offsets */
313    if ((PICO_OK == picoos_read_mem_pi_uint16(this->base, &curpos, &inppos))
314        && (PICO_OK == picoos_read_mem_pi_uint16(this->base, &curpos, &outpos))
315        && (PICO_OK == picoos_read_mem_pi_uint16(this->base, &curpos,
316                                                 &treepos))) {
317
318        /* all pos are mandatory, verify */
319        if (inppos && outpos && treepos) {
320            dtp->inpmaptable = this->base + inppos;
321            dtp->outmaptable = this->base + outpos;
322            dtp->tree = this->base + treepos;
323            /* precalc beg offset table */
324            imtnr=dtp->inpmaptable[0];
325            pos=1;
326            dtp->beg_offset[0] = 1;
327            for (i = 0; i < imtnr; i++) {
328                lentable = ((picoos_uint16)(dtp->inpmaptable[pos+1])) << 8 |
329                    dtp->inpmaptable[pos];
330                pos += lentable;
331                dtp->beg_offset[i+1] = pos;
332            }
333        } else {
334            dtp->inpmaptable = NULL;
335            dtp->outmaptable = NULL;
336            dtp->tree = NULL;
337            PICODBG_ERROR(("invalid kb position info"));
338            return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT,
339                                           NULL, NULL);
340        }
341
342        /* nr of outmaptables is equal 1 for all trees, verify */
343        if (dtp->outmaptable[PICOKDT_MTSPOS_NRMAPTABLES] != 1) {
344            PICODBG_ERROR(("wrong number of outmaptables"));
345            return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT,
346                                           NULL, NULL);
347        }
348
349        /* check if this is an empty table, ie. len == 3 */
350        if ((dtp->outmaptable[PICOKDT_MTPOS_START + PICOKDT_MTPOS_LENTABLE]
351             == 3)
352            && (dtp->outmaptable[PICOKDT_MTPOS_START + PICOKDT_MTPOS_LENTABLE
353                                 + 1] == 0)) {
354            /* verify that this is supposed to be an empty table and
355               set outmaptable to NULL if so */
356            if (dtp->outmaptable[PICOKDT_MTPOS_START + PICOKDT_MTPOS_TABLETYPE]
357                == PICOKDT_MTTYPE_EMPTY) {
358                dtp->outmaptable = NULL;
359            } else {
360                PICODBG_ERROR(("table length vs. type problem"));
361                return picoos_emRaiseException(common->em,
362                                               PICO_EXC_FILE_CORRUPT,
363                                               NULL, NULL);
364            }
365        }
366
367        dtp->vfields = dtp->tree + 1;
368        dtp->qfields = dtp->tree + PICOKDT_NODEINFO_NRVFIELDS + 3;
369        dtp->nrattributes = dtp->tree[PICOKDT_NIPOS_NRATTS];
370        dtp->treebody = dtp->qfields + 4 +
371            (dtp->nrattributes * PICOKDT_NODEINFO_NRQFIELDS); /* TREEBODYSIZE4*/
372
373        /*dtp->nrvfields = dtp->tree[PICOKDT_NIPOS_NRVFIELDS]; <- is fix */
374        /*dtp->nrqfields = dtp->tree[PICOKDT_NIPOS_NRQFIELDS]; <- is fix */
375        /* verify that nrvfields ad nrqfields are correct */
376        if ((PICOKDT_NODEINFO_NRVFIELDS != dtp->tree[PICOKDT_NIPOS_NRVFIELDS]) ||
377            (PICOKDT_NODEINFO_NRQFIELDS != dtp->tree[PICOKDT_NIPOS_NRQFIELDS])) {
378            PICODBG_ERROR(("problem with nr of vfields (%d) or qfields (%d)",
379                           dtp->tree[PICOKDT_NIPOS_NRVFIELDS],
380                           dtp->tree[PICOKDT_NIPOS_NRQFIELDS]));
381            return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT,
382                                           NULL, NULL);
383        }
384        dtp->dset = 0;
385        dtp->dclass = 0;
386        PICODBG_DEBUG(("tree init: nratt: %d, posomt: %d, postree: %d",
387                       dtp->nrattributes, (dtp->outmaptable - dtp->inpmaptable),
388                       (dtp->tree - dtp->inpmaptable)));
389        return PICO_OK;
390    } else {
391        PICODBG_ERROR(("problem reading kb in memory"));
392        return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT,
393                                       NULL, NULL);
394    }
395}
396
397
398static pico_status_t kdtDtCheck(register picoknow_KnowledgeBase this,
399                                picoos_Common common,
400                                kdt_subobj_t *dtp,
401                                kdt_nratt_t nratt,
402                                kdt_nrinpmaptables_t nrinpmt,
403                                kdt_nroutmaptables_t nroutmt,
404                                kdt_mttype_t mttype) {
405    /* check nr attributes */
406    /* check nr inpmaptables */
407    /* check nr outmaptables */
408    /* check outmaptable is word type */
409    if ((nratt != dtp->nrattributes)
410        || (dtp->inpmaptable == NULL)
411        || (dtp->outmaptable == NULL)
412        || (dtp->inpmaptable[PICOKDT_MTSPOS_NRMAPTABLES] != nrinpmt)
413        || (dtp->outmaptable[PICOKDT_MTSPOS_NRMAPTABLES] != nroutmt)
414        || (dtp->outmaptable[PICOKDT_MTPOS_START+PICOKDT_MTPOS_TABLETYPE]
415            != mttype)) {
416        PICODBG_ERROR(("check failed, nratt %d, nrimt %d, nromt %d, omttype %d",
417                       dtp->nrattributes,
418                       dtp->inpmaptable[PICOKDT_MTSPOS_NRMAPTABLES],
419                       dtp->outmaptable[PICOKDT_MTSPOS_NRMAPTABLES],
420                       dtp->outmaptable[PICOKDT_MTPOS_START +
421                                        PICOKDT_MTPOS_TABLETYPE]));
422        return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT,
423                                       NULL, NULL);
424    }
425    return PICO_OK;
426}
427
428
429
430static pico_status_t kdtPosPInitialize(register picoknow_KnowledgeBase this,
431                                       picoos_Common common) {
432    pico_status_t status;
433    kdtposp_subobj_t *dtposp;
434    kdt_subobj_t *dt;
435    picoos_uint8 i;
436
437    if (NULL == this || NULL == this->subObj) {
438        return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
439                                       NULL, NULL);
440    }
441    dtposp = (kdtposp_subobj_t *)this->subObj;
442    dt = &(dtposp->dt);
443    dt->type = PICOKDT_KDTTYPE_POSP;
444    if ((status = kdtDtInitialize(this, common, dt)) != PICO_OK) {
445        return status;
446    }
447    if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_POSP,
448                             PICOKDT_NRINPMT_POSP, PICOKDT_NROUTMT_POSP,
449                             PICOKDT_MTTYPE_WORD)) != PICO_OK) {
450        return status;
451    }
452
453    /* init specialized subobj part */
454    for (i = 0; i < PICOKDT_NRATT_POSP; i++) {
455        dtposp->invec[i] = 0;
456    }
457    dtposp->inveclen = 0;
458    PICODBG_DEBUG(("posp tree initialized"));
459    return PICO_OK;
460}
461
462
463static pico_status_t kdtPosDInitialize(register picoknow_KnowledgeBase this,
464                                       picoos_Common common) {
465    pico_status_t status;
466    kdtposd_subobj_t *dtposd;
467    kdt_subobj_t *dt;
468    picoos_uint8 i;
469
470    if (NULL == this || NULL == this->subObj) {
471        return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
472                                       NULL, NULL);
473    }
474    dtposd = (kdtposd_subobj_t *)this->subObj;
475    dt = &(dtposd->dt);
476    dt->type = PICOKDT_KDTTYPE_POSD;
477    if ((status = kdtDtInitialize(this, common, dt)) != PICO_OK) {
478        return status;
479    }
480    if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_POSD,
481                             PICOKDT_NRINPMT_POSD, PICOKDT_NROUTMT_POSD,
482                             PICOKDT_MTTYPE_WORD)) != PICO_OK) {
483        return status;
484    }
485
486    /* init spezialized subobj part */
487    for (i = 0; i < PICOKDT_NRATT_POSD; i++) {
488        dtposd->invec[i] = 0;
489    }
490    dtposd->inveclen = 0;
491    PICODBG_DEBUG(("posd tree initialized"));
492    return PICO_OK;
493}
494
495
496static pico_status_t kdtG2PInitialize(register picoknow_KnowledgeBase this,
497                                      picoos_Common common) {
498    pico_status_t status;
499    kdtg2p_subobj_t *dtg2p;
500    kdt_subobj_t *dt;
501    picoos_uint8 i;
502
503    if (NULL == this || NULL == this->subObj) {
504        return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
505                                       NULL, NULL);
506    }
507    dtg2p = (kdtg2p_subobj_t *)this->subObj;
508    dt = &(dtg2p->dt);
509    dt->type = PICOKDT_KDTTYPE_G2P;
510    if ((status = kdtDtInitialize(this, common, dt)) != PICO_OK) {
511        return status;
512    }
513
514    if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_G2P,
515                             PICOKDT_NRINPMT_G2P, PICOKDT_NROUTMT_G2P,
516                             PICOKDT_MTTYPE_BYTETOVAR)) != PICO_OK) {
517        return status;
518    }
519
520    /* init spezialized subobj part */
521    for (i = 0; i < PICOKDT_NRATT_G2P; i++) {
522        dtg2p->invec[i] = 0;
523    }
524    dtg2p->inveclen = 0;
525    PICODBG_DEBUG(("g2p tree initialized"));
526    return PICO_OK;
527}
528
529
530static pico_status_t kdtPhrInitialize(register picoknow_KnowledgeBase this,
531                                      picoos_Common common) {
532    pico_status_t status;
533    kdtphr_subobj_t *dtphr;
534    kdt_subobj_t *dt;
535    picoos_uint8 i;
536
537    if (NULL == this || NULL == this->subObj) {
538        return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
539                                       NULL, NULL);
540    }
541    dtphr = (kdtphr_subobj_t *)this->subObj;
542    dt = &(dtphr->dt);
543    dt->type = PICOKDT_KDTTYPE_PHR;
544    if ((status = kdtDtInitialize(this, common,dt)) != PICO_OK) {
545        return status;
546    }
547
548    if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_PHR,
549                             PICOKDT_NRINPMT_PHR, PICOKDT_NROUTMT_PHR,
550                             PICOKDT_MTTYPE_WORD)) != PICO_OK) {
551        return status;
552    }
553
554    /* init spezialized subobj part */
555    for (i = 0; i < PICOKDT_NRATT_PHR; i++) {
556        dtphr->invec[i] = 0;
557    }
558    dtphr->inveclen = 0;
559    PICODBG_DEBUG(("phr tree initialized"));
560    return PICO_OK;
561}
562
563
564static pico_status_t kdtAccInitialize(register picoknow_KnowledgeBase this,
565                                      picoos_Common common) {
566    pico_status_t status;
567    kdtacc_subobj_t *dtacc;
568    kdt_subobj_t *dt;
569    picoos_uint8 i;
570
571    if (NULL == this || NULL == this->subObj) {
572        return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
573                                       NULL, NULL);
574    }
575    dtacc = (kdtacc_subobj_t *)this->subObj;
576    dt = &(dtacc->dt);
577    dt->type = PICOKDT_KDTTYPE_ACC;
578    if ((status = kdtDtInitialize(this, common, dt)) != PICO_OK) {
579        return status;
580    }
581
582    if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_ACC,
583                             PICOKDT_NRINPMT_ACC, PICOKDT_NROUTMT_ACC,
584                             PICOKDT_MTTYPE_WORD)) != PICO_OK) {
585        return status;
586    }
587
588    /* init spezialized subobj part */
589    for (i = 0; i < PICOKDT_NRATT_ACC; i++) {
590        dtacc->invec[i] = 0;
591    }
592    dtacc->inveclen = 0;
593    PICODBG_DEBUG(("acc tree initialized"));
594    return PICO_OK;
595}
596
597
598static pico_status_t kdtPamInitialize(register picoknow_KnowledgeBase this,
599                                      picoos_Common common) {
600    pico_status_t status;
601    kdtpam_subobj_t *dtpam;
602    kdt_subobj_t *dt;
603    picoos_uint8 i;
604
605    if (NULL == this || NULL == this->subObj) {
606        return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
607                                       NULL, NULL);
608    }
609    dtpam = (kdtpam_subobj_t *)this->subObj;
610    dt = &(dtpam->dt);
611    dt->type = PICOKDT_KDTTYPE_PAM;
612    if ((status = kdtDtInitialize(this, common, dt)) != PICO_OK) {
613        return status;
614    }
615
616    if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_PAM,
617                             PICOKDT_NRINPMT_PAM, PICOKDT_NROUTMT_PAM,
618                             PICOKDT_MTTYPE_WORD)) != PICO_OK) {
619        return status;
620    }
621
622    /* init spezialized subobj part */
623    for (i = 0; i < PICOKDT_NRATT_PAM; i++) {
624        dtpam->invec[i] = 0;
625    }
626    dtpam->inveclen = 0;
627    PICODBG_DEBUG(("pam tree initialized"));
628    return PICO_OK;
629}
630
631
632static pico_status_t kdtSubObjDeallocate(register picoknow_KnowledgeBase this,
633                                         picoos_MemoryManager mm) {
634    if (NULL != this) {
635        picoos_deallocate(mm, (void *) &this->subObj);
636    }
637    return PICO_OK;
638}
639
640
641/* we don't offer a specialized constructor for a *KnowledgeBase but
642 * instead a "specializer" of an allready existing generic
643 * picoknow_KnowledgeBase */
644
645pico_status_t picokdt_specializeDtKnowledgeBase(picoknow_KnowledgeBase this,
646                                                picoos_Common common,
647                                                const picokdt_kdttype_t kdttype) {
648    pico_status_t status;
649
650    if (NULL == this) {
651        return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
652                                       NULL, NULL);
653    }
654    this->subDeallocate = kdtSubObjDeallocate;
655    switch (kdttype) {
656        case PICOKDT_KDTTYPE_POSP:
657            this->subObj = picoos_allocate(common->mm,sizeof(kdtposp_subobj_t));
658            if (NULL == this->subObj) {
659                return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM,
660                                               NULL, NULL);
661            }
662            status = kdtPosPInitialize(this, common);
663            break;
664        case PICOKDT_KDTTYPE_POSD:
665            this->subObj = picoos_allocate(common->mm,sizeof(kdtposd_subobj_t));
666            if (NULL == this->subObj) {
667                return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM,
668                                               NULL, NULL);
669            }
670            status = kdtPosDInitialize(this, common);
671            break;
672        case PICOKDT_KDTTYPE_G2P:
673            this->subObj = picoos_allocate(common->mm,sizeof(kdtg2p_subobj_t));
674            if (NULL == this->subObj) {
675                return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM,
676                                               NULL, NULL);
677            }
678            status = kdtG2PInitialize(this, common);
679            break;
680        case PICOKDT_KDTTYPE_PHR:
681            this->subObj = picoos_allocate(common->mm,sizeof(kdtphr_subobj_t));
682            if (NULL == this->subObj) {
683                return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM,
684                                               NULL, NULL);
685            }
686            status = kdtPhrInitialize(this, common);
687            break;
688        case PICOKDT_KDTTYPE_ACC:
689            this->subObj = picoos_allocate(common->mm,sizeof(kdtacc_subobj_t));
690            if (NULL == this->subObj) {
691                return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM,
692                                               NULL, NULL);
693            }
694            status = kdtAccInitialize(this, common);
695            break;
696        case PICOKDT_KDTTYPE_PAM:
697            this->subObj = picoos_allocate(common->mm,sizeof(kdtpam_subobj_t));
698            if (NULL == this->subObj) {
699                return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM,
700                                               NULL, NULL);
701            }
702            status = kdtPamInitialize(this, common);
703            break;
704        default:
705            return picoos_emRaiseException(common->em, PICO_ERR_OTHER,
706                                           NULL, NULL);
707    }
708
709    if (status != PICO_OK) {
710        picoos_deallocate(common->mm, (void *) &this->subObj);
711        return picoos_emRaiseException(common->em, status, NULL, NULL);
712    }
713    return PICO_OK;
714}
715
716
717/* ************************************************************/
718/* decision tree getDt* */
719/* ************************************************************/
720
721picokdt_DtPosP picokdt_getDtPosP(picoknow_KnowledgeBase this) {
722    return ((NULL == this) ? NULL : ((picokdt_DtPosP) this->subObj));
723}
724
725picokdt_DtPosD picokdt_getDtPosD(picoknow_KnowledgeBase this) {
726    return ((NULL == this) ? NULL : ((picokdt_DtPosD) this->subObj));
727}
728
729picokdt_DtG2P  picokdt_getDtG2P (picoknow_KnowledgeBase this) {
730    return ((NULL == this) ? NULL : ((picokdt_DtG2P) this->subObj));
731}
732
733picokdt_DtPHR  picokdt_getDtPHR (picoknow_KnowledgeBase this) {
734    return ((NULL == this) ? NULL : ((picokdt_DtPHR) this->subObj));
735}
736
737picokdt_DtACC  picokdt_getDtACC (picoknow_KnowledgeBase this) {
738    return ((NULL == this) ? NULL : ((picokdt_DtACC) this->subObj));
739}
740
741picokdt_DtPAM  picokdt_getDtPAM (picoknow_KnowledgeBase this) {
742    return ((NULL == this) ? NULL : ((picokdt_DtPAM) this->subObj));
743}
744
745
746
747/* ************************************************************/
748/* decision tree support functions, tree */
749/* ************************************************************/
750
751
752typedef enum {
753    eQuestion  = 0,   /* index to #bits to identify question */
754    eDecide    = 1    /* index to #bits to identify decision */
755} kdt_vfields_ind_t;
756
757typedef enum {
758    eForkCount = 0,   /* index to #bits for number of forks */
759    eBitNo     = 1,   /* index to #bits for index of 1st element */
760    eBitCount  = 2,   /* index to #bits for size of the group */
761    eJump      = 3,   /* index to #bits for offset to reach output node */
762    eCut       = 4    /* for contin. node: #bits for threshold checked */
763} kdt_qfields_ind_t;
764
765typedef enum {
766    eNTerminal   = 0,
767    eNBinary     = 1,
768    eNContinuous = 2,
769    eNDiscrete   = 3
770} kdt_nodetypes_t;
771
772typedef enum {
773    eOneValue = 0,
774    eTwoValues = 1,
775    eWithoutBitMask = 2,
776    eBitMask = 3
777} kdt_subsettypes_t;
778
779
780/* Name    :   kdt_jump
781   Function:   maps the iJump offset to byte + bit coordinates
782   Input   :   iJump   absolute bit offset (0..(nr-bytes-treebody)*8)
783   Output  :   iByteNo the first byte containing the bits to extract
784                       (0..(nr-bytes-treebody))
785               iBitNo  the first bit to be extracted (0..7)
786   Returns :   void
787   Notes   :   updates the iByteNo + iBitNo fields
788*/
789static void kdt_jump(const picoos_uint32 iJump,
790                     picoos_uint32 *iByteNo,
791                     picoos_int8 *iBitNo) {
792    picoos_uint32 iByteSize;
793
794    iByteSize = (iJump / 8 );
795    *iBitNo = (iJump - (iByteSize * 8)) + (7 - *iBitNo);
796    *iByteNo += iByteSize;
797    if (*iBitNo >= 8) {
798        (*iByteNo)++;
799        *iBitNo = 15 - *iBitNo;
800    } else {
801        *iBitNo = 7 - *iBitNo;
802    }
803}
804
805
806/* replaced inline for speedup */
807/* Name    :   kdtIsVal
808   Function:   Returns the binary value of the bit pointed to by iByteNo, iBitNo
809   Input   :   iByteNo ofsset to the byte containing the bits to extract
810                       (0..sizeof(treebody))
811               iBitNo  ofsset to the first bit to be extracted (0..7)
812   Returns :   0/1 depending on the bit pointed to
813*/
814/*
815static picoos_uint8 kdtIsVal(register kdt_subobj_t *this,
816                             picoos_uint32 iByteNo,
817                             picoos_int8 iBitNo) {
818    return ((this->treebody[iByteNo] & ((1)<<iBitNo)) > 0);
819}
820*/
821
822
823/* @todo : consider replacing inline for speedup */
824
825/* Name    :   kdtGetQFieldsVal (was: m_QuestDependentFields)
826   Function:   gets a byte from qfields
827   Input   :   this      handle to a dt subobj
828               attind    index of the attribute
829               qind      index of the byte to be read
830   Returns :   the requested byte
831   Notes   :   check that attind < this->nrattributes needed before calling
832               this function!
833*/
834static picoos_uint8 kdtGetQFieldsVal(register kdt_subobj_t *this,
835                                     const picoos_uint8 attind,
836                                     const kdt_qfields_ind_t qind) {
837    /* check of qind done in initialize and (for some compilers) with typing */
838    /* check of attind needed before calling this function */
839    return this->qfields[(attind * PICOKDT_NODEINFO_NRQFIELDS) + qind];
840}
841
842
843/* Name    :   kdtGetShiftVal (was: get_shift_value)
844   Function:   returns the (treebody) value pointed to by iByteNo, iBitNo,
845               and with size iSize
846   Input   :   this    reference to the processing unit struct
847               iSize   number of bits to be extracted (0..N)
848               iByteNo ofsset to the byte containing the bits to extract
849                       (0..sizeof(treebody))
850               iBitNo  ofsset to the first bit to be extracted (0..7)
851   Returns :   the value requested (if size==0 --> 0 is returned)
852*/
853/*
854static picoos_uint32 orig_kdtGetShiftVal(register kdt_subobj_t *this,
855                                    const picoos_int16 iSize,
856                                    picoos_uint32 *iByteNo,
857                                    picoos_int8 *iBitNo) {
858    picoos_uint32 iVal;
859    picoos_int16 i;
860
861    iVal = 0;
862    for (i = iSize-1; i >= 0; i--) {
863        if ( (this->treebody[*iByteNo] & ((1)<<(*iBitNo))) > 0) {
864            iVal |= ( (1) << i );
865        }
866        (*iBitNo)--;
867        if (*iBitNo < 0) {
868            *iBitNo = 7;
869            (*iByteNo)++;
870        }
871    }
872    return iVal;
873}
874*/
875/* refactor */
876static picoos_uint32 kdtGetShiftVal(register kdt_subobj_t *this,
877        const picoos_int16 iSize, picoos_uint32 *iByteNo, picoos_int8 *iBitNo)
878{
879    picoos_uint32 v, b, iVal;
880    picoos_int16 i, j, len;
881    picoos_uint8 val;
882
883    if (iSize < 4) {
884        iVal = 0;
885        for (i = iSize - 1; i >= 0; i--) {
886            /* no check that *iByteNo is within valid treebody range */
887            if ((this->treebody[*iByteNo] & ((1) << (*iBitNo))) > 0) {
888                iVal |= ((1) << i);
889            }
890            (*iBitNo)--;
891            if (*iBitNo < 0) {
892                *iBitNo = 7;
893                (*iByteNo)++;
894            }
895        }
896        return iVal;
897    }
898
899    b = *iByteNo;
900    j = *iBitNo;
901    len = iSize;
902    *iBitNo = j - iSize;
903    v = 0;
904    while (*iBitNo < 0) {
905        *iBitNo += 8;
906        (*iByteNo)++;
907    }
908
909    val = this->treebody[b++];
910    if (j < 7) {
911        switch (j) {
912            case 0:
913                val &= 0x01;
914                break;
915            case 1:
916                val &= 0x03;
917                break;
918            case 2:
919                val &= 0x07;
920                break;
921            case 3:
922                val &= 0x0f;
923                break;
924            case 4:
925                val &= 0x1f;
926                break;
927            case 5:
928                val &= 0x3f;
929                break;
930            case 6:
931                val &= 0x7f;
932                break;
933        }
934    }
935    len -= j + 1;
936    if (len < 0) {
937        val >>= -len;
938    }
939    v = val;
940    while (len > 0) {
941        if (len >= 8) {
942            j = 8;
943        } else {
944            j = len;
945        }
946        v <<= j;
947        val = this->treebody[b++];
948        if (j < 8) {
949            switch (j) {
950                case 1:
951                    val &= 0x80;
952                    val >>= 7;
953                    break;
954                case 2:
955                    val &= 0xc0;
956                    val >>= 6;
957                    break;
958                case 3:
959                    val &= 0xe0;
960                    val >>= 5;
961                    break;
962                case 4:
963                    val &= 0xf0;
964                    val >>= 4;
965                    break;
966                case 5:
967                    val &= 0xf8;
968                    val >>= 3;
969                    break;
970                case 6:
971                    val &= 0xfc;
972                    val >>= 2;
973                    break;
974                case 7:
975                    val &= 0xfe;
976                    val >>= 1;
977                    break;
978            }
979        }
980        v |= val;
981        len -= j;
982    }
983    return v;
984}
985
986
987/* Name    :   kdtAskTree
988   Function:   Tree Traversal routine
989   Input   :   iByteNo ofsset to the first byte containing the bits
990               to extract (0..sizeof(treebody))
991               iBitNo  ofsset to the first bit to be extracted (0..7)
992   Returns :   >0    continue, no solution yet found
993               =0    solution found
994               <0    error, no solution found
995   Notes   :
996*/
997static picoos_int8 kdtAskTree(register kdt_subobj_t *this,
998                              picoos_uint16 *invec,
999                              const kdt_nratt_t invecmax,
1000                              picoos_uint32 *iByteNo,
1001                              picoos_int8 *iBitNo) {
1002    picoos_uint32 iNodeType;
1003    picoos_uint8 iQuestion;
1004    picoos_int32 iVal;
1005    picoos_int32 iForks;
1006    picoos_int32 iID;
1007
1008    picoos_int32 iCut, iSubsetType, iBitPos, iBitCount, iPos, iJump, iDecision;
1009    picoos_int32 i;
1010    picoos_char iIsDecide;
1011
1012    PICODBG_TRACE(("start"));
1013
1014    /* get node type, value should be in kdt_nodetype_t range */
1015    iNodeType = kdtGetShiftVal(this, PICOKDT_NODETYPE_NRBITS, iByteNo, iBitNo);
1016    PICODBG_TRACE(("iNodeType: %d", iNodeType));
1017
1018    /* get attribute to be used in question, check if in range, and get val */
1019    /* check of vfields argument done in initialize */
1020    iQuestion = kdtGetShiftVal(this, this->vfields[eQuestion], iByteNo, iBitNo);
1021    if ((iQuestion < this->nrattributes) && (iQuestion < invecmax)) {
1022        iVal = invec[iQuestion];
1023    } else {
1024        this->dset = FALSE;
1025        PICODBG_TRACE(("invalid question"));
1026        return -1;    /* iQuestion invalid */
1027    }
1028    iForks = 0;
1029    iID = -1;
1030    PICODBG_TRACE(("iQuestion: %d", iQuestion));
1031
1032    switch (iNodeType) {
1033        case eNBinary: {
1034            iForks = 2;
1035            iID = iVal;
1036            break;
1037        }
1038        case eNContinuous: {
1039            iForks = 2;
1040            iID = 1;
1041            iCut = kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion, eCut),
1042                                  iByteNo, iBitNo); /*read the threshold*/
1043            if (iVal <= iCut) {
1044                iID = 0;
1045            }
1046            break;
1047        }
1048        case eNDiscrete: {
1049            iForks =
1050                kdtGetShiftVal(this,
1051                               kdtGetQFieldsVal(this, iQuestion, eForkCount),
1052                               iByteNo, iBitNo);
1053
1054            for (i = 0; i < iForks-1; i++) {
1055                iSubsetType =
1056                    kdtGetShiftVal(this, PICOKDT_SUBSETTYPE_NRBITS,
1057                                   iByteNo, iBitNo);
1058
1059                switch (iSubsetType) {
1060                    case eOneValue: {
1061                        if (iID > -1) {
1062                            kdt_jump(kdtGetQFieldsVal(this, iQuestion, eBitNo),
1063                                     iByteNo, iBitNo);
1064                            break;
1065                        }
1066                        iBitPos =
1067                            kdtGetShiftVal(this,
1068                                           kdtGetQFieldsVal(this, iQuestion,
1069                                                            eBitNo),
1070                                           iByteNo, iBitNo);
1071                        if (iVal == iBitPos) {
1072                            iID = i;
1073                        }
1074                        break;
1075                    }
1076                    case eTwoValues: {
1077                        if (iID > -1) {
1078                            kdt_jump((kdtGetQFieldsVal(this, iQuestion, eBitNo) +
1079                                      kdtGetQFieldsVal(this, iQuestion, eBitCount)),
1080                                     iByteNo, iBitNo);
1081                            break;
1082                        }
1083
1084                        iBitPos =
1085                            kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion,
1086                                                                  eBitNo),
1087                                           iByteNo, iBitNo);
1088                        iBitCount =
1089                            kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion,
1090                                                                  eBitCount),
1091                                           iByteNo, iBitNo);
1092                        if ((iVal == iBitPos) || (iVal == iBitCount)) {
1093                            iID = i;
1094                        }
1095                        break;
1096                    }
1097                    case eWithoutBitMask: {
1098                        if (iID > -1) {
1099                            kdt_jump((kdtGetQFieldsVal(this, iQuestion, eBitNo) +
1100                                      kdtGetQFieldsVal(this, iQuestion, eBitCount)),
1101                                     iByteNo, iBitNo);
1102                            break;
1103                        }
1104
1105                        iBitPos =
1106                            kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion,
1107                                                                  eBitNo),
1108                                           iByteNo, iBitNo);
1109                        iBitCount =
1110                            kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion,
1111                                                                  eBitCount),
1112                                           iByteNo, iBitNo);
1113                        if ((iVal >= iBitPos) && (iVal < (iBitPos + iBitCount))) {
1114                            iID = i;
1115                        }
1116                        break;
1117                    }
1118                    case eBitMask: {
1119                        iBitPos = 0;
1120                        if (iID > -1) {
1121                            kdt_jump(kdtGetQFieldsVal(this, iQuestion, eBitNo),
1122                                     iByteNo, iBitNo);
1123                        } else {
1124                            iBitPos =
1125                                kdtGetShiftVal(this,
1126                                               kdtGetQFieldsVal(this, iQuestion,
1127                                                                eBitNo),
1128                                               iByteNo, iBitNo);
1129                        }
1130
1131                        iBitCount =
1132                            kdtGetShiftVal(this,
1133                                           kdtGetQFieldsVal(this, iQuestion,
1134                                                            eBitCount),
1135                                           iByteNo, iBitNo);
1136                        if (iID > -1) {
1137                            kdt_jump(iBitCount, iByteNo, iBitNo);
1138                            break;
1139                        }
1140
1141                        if ((iVal >= iBitPos) && (iVal < (iBitPos + iBitCount))) {
1142                            iPos = iVal - iBitPos;
1143                            kdt_jump((iVal - iBitPos), iByteNo, iBitNo);
1144                         /* if (kdtIsVal(this, *iByteNo, *iBitNo))*/
1145                            if ((this->treebody[*iByteNo] & ((1)<<(*iBitNo))) > 0) {
1146                                iID = i;
1147                            }
1148                            kdt_jump((iBitCount - (iVal-iBitPos)), iByteNo, iBitNo);
1149                        } else {
1150                            kdt_jump(iBitCount, iByteNo, iBitNo);
1151                        }
1152                        break;
1153                    }/*end case eBitMask*/
1154                }/*end switch (iSubsetType)*/
1155            }/*end for ( i = 0; i < iForks-1; i++ ) */
1156
1157            /*default tree branch*/
1158            if (-1 == iID) {
1159                iID = iForks-1;
1160            }
1161            break;
1162        }/*end case eNDiscrete*/
1163    }/*end switch (iNodeType)*/
1164
1165    for (i = 0; i < iForks; i++) {
1166        iIsDecide = kdtGetShiftVal(this, PICOKDT_ISDECIDE_NRBITS, iByteNo, iBitNo);
1167
1168        PICODBG_TRACE(("doing forks: %d", i));
1169
1170        if (!iIsDecide) {
1171            if (iID == i) {
1172                iJump =
1173                    kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion, eJump),
1174                                   iByteNo, iBitNo);
1175                kdt_jump(iJump, iByteNo, iBitNo);
1176                this->dset = FALSE;
1177                return 1;    /* to be continued, no solution yet found */
1178            } else {
1179                kdt_jump(kdtGetQFieldsVal(this, iQuestion, eJump),
1180                         iByteNo, iBitNo);
1181            }
1182        } else {
1183            if (iID == i) {
1184                /* check of vfields argument done in initialize */
1185                iDecision = kdtGetShiftVal(this, this->vfields[eDecide],
1186                                           iByteNo, iBitNo);
1187                this->dclass = iDecision;
1188                this->dset = TRUE;
1189                return 0;    /* solution found */
1190            } else {
1191                /* check of vfields argument done in initialize */
1192                kdt_jump(this->vfields[eDecide], iByteNo, iBitNo);
1193            }
1194        }/*end if (!iIsDecide)*/
1195    }/*end for (i = 0; i < iForks; i++ )*/
1196
1197    this->dset = FALSE;
1198    PICODBG_TRACE(("problem determining class"));
1199    return -1; /* solution not found, problem determining a class */
1200}
1201
1202
1203
1204/* ************************************************************/
1205/* decision tree support functions, mappings */
1206/* ************************************************************/
1207
1208
1209/* size==1 -> MapInByte, size==2 -> MapInWord,
1210   size determined from table type contained in kb.
1211   if the inmaptable is empty, outval = inval */
1212
1213static picoos_uint8 kdtMapInFixed(const kdt_subobj_t *dt,
1214                                  const picoos_uint8 imtnr,
1215                                  const picoos_uint16 inval,
1216                                  picoos_uint16 *outval,
1217                                  picoos_uint16 *outfallbackval) {
1218    picoos_uint8 size;
1219    picoos_uint32 pos;
1220    picoos_uint16 lentable;
1221    picoos_uint16 posbound;
1222    picoos_uint16 i;
1223
1224    *outval = 0;
1225    *outfallbackval = 0;
1226
1227    size = 0;
1228    pos = 0;
1229
1230    /* check what can be checked */
1231    if (imtnr >= dt->inpmaptable[pos++]) {   /* outside tablenr range? */
1232        PICODBG_ERROR(("check failed: nrtab: %d, imtnr: %d",
1233                       dt->inpmaptable[pos-1], imtnr));
1234        return FALSE;
1235    }
1236
1237    /* go forward to the needed tablenr */
1238    if (imtnr > 0) {
1239        pos = dt->beg_offset[imtnr];
1240    }
1241
1242    /* get length */
1243    lentable = ((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 |
1244        dt->inpmaptable[pos];
1245    posbound = pos + lentable;
1246    pos += 2;
1247
1248    /* check type of table and set size */
1249    if (dt->inpmaptable[pos] == PICOKDT_MTTYPE_EMPTY) {
1250        /* empty table no mapping needed */
1251        PICODBG_TRACE(("empty table: %d", imtnr));
1252        *outval = inval;
1253        return TRUE;
1254    } else if (dt->inpmaptable[pos] == PICOKDT_MTTYPE_BYTE) {
1255        size = 1;
1256    } else if (dt->inpmaptable[pos] == PICOKDT_MTTYPE_WORD) {
1257        size = 2;
1258    } else {
1259        /* wrong table type */
1260        PICODBG_ERROR(("wrong table type %d", dt->inpmaptable[pos]));
1261        return FALSE;
1262    }
1263    pos++;
1264
1265    /* set fallback value in case of failed mapping, and set upper bound pos */
1266    *outfallbackval = ((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 |
1267        dt->inpmaptable[pos];
1268    pos += 2;
1269
1270    /* size must be 1 or 2 here, keep 'redundant' so save time */
1271    if (size == 1) {
1272        for (i = 0; (i < *outfallbackval) && (pos < posbound); i++) {
1273            if (inval == dt->inpmaptable[pos]) {
1274                *outval = i;
1275                PICODBG_TRACE(("s1 %d in %d -> out %d", imtnr, inval, *outval));
1276                return TRUE;
1277            }
1278            pos++;
1279        }
1280    } else if (size == 2) {
1281        posbound--;
1282        for (i = 0; (i < *outfallbackval) && (pos < posbound); i++) {
1283            if (inval == (((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 |
1284                          dt->inpmaptable[pos])) {
1285                *outval = i;
1286                PICODBG_TRACE(("s2 %d in %d -> out %d", imtnr, inval, *outval));
1287                return TRUE;
1288            }
1289            pos += 2;
1290        }
1291    } else {
1292        /* impossible size */
1293        PICODBG_ERROR(("wrong size %d", size));
1294        return FALSE;
1295    }
1296
1297    PICODBG_DEBUG(("no mapping found, fallback: %d", *outfallbackval));
1298    return FALSE;
1299}
1300
1301
1302static picoos_uint8 kdtMapInGraph(const kdt_subobj_t *dt,
1303                                  const picoos_uint8 imtnr,
1304                                  const picoos_uint8 *inval,
1305                                  const picoos_uint8 invalmaxlen,
1306                                  picoos_uint16 *outval,
1307                                  picoos_uint16 *outfallbackval) {
1308    picoos_uint8 ilen;
1309    picoos_uint8 tlen;
1310    picoos_uint8 cont;
1311    picoos_uint32 pos;
1312    picoos_uint16 lentable;
1313    picoos_uint16 posbound;
1314    picoos_uint16 i;
1315    picoos_uint8 j;
1316
1317    *outfallbackval = 0;
1318
1319    pos = 0;
1320    /* check what can be checked */
1321    if ((imtnr >= dt->inpmaptable[pos++]) ||     /* outside tablenr range? */
1322        (invalmaxlen == 0) ||                    /* too short? */
1323        ((ilen = picobase_det_utf8_length(inval[0])) == 0) ||   /* invalid? */
1324        (ilen > invalmaxlen)) {                  /* not accessible? */
1325        PICODBG_ERROR(("check failed: nrtab: %d, imtnr: %d, invalmaxlen: %d, "
1326                       "ilen: %d",
1327                       dt->inpmaptable[pos-1], imtnr, invalmaxlen, ilen));
1328        return FALSE;
1329    }
1330
1331    /* go forward to the needed tablenr */
1332    for (i = 0; i < imtnr; i++) {
1333        lentable = ((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 |
1334            dt->inpmaptable[pos];
1335        pos += lentable;
1336    }
1337
1338    /* get length and check type of inpmaptable */
1339    lentable = ((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 |
1340        dt->inpmaptable[pos];
1341    posbound = pos + lentable;
1342    pos += 2;
1343
1344#if defined(PICO_DEBUG)
1345    if (1) {
1346        int id;
1347        PICODBG_TRACE(("imtnr %d", imtnr));
1348        for (id = pos-2; id < posbound; id++) {
1349            PICODBG_TRACE(("imtbyte pos %d, %c %d", id - (pos-2),
1350                           dt->inpmaptable[id], dt->inpmaptable[id]));
1351        }
1352    }
1353#endif
1354
1355    /* check type of table */
1356    if (dt->inpmaptable[pos] != PICOKDT_MTTYPE_GRAPH) {
1357        /* empty table does not make sense for graph */
1358        /* wrong table type */
1359        PICODBG_ERROR(("wrong table type"));
1360        return FALSE;
1361    }
1362    pos++;
1363
1364    /* set fallback value in case of failed mapping, and set upper bound pos */
1365    *outfallbackval = ((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 |
1366        dt->inpmaptable[pos];
1367    pos += 2;
1368
1369    /* sequential search */
1370    for (i = 0; (i < *outfallbackval) && (pos < posbound); i++) {
1371        tlen = picobase_det_utf8_length(dt->inpmaptable[pos]);
1372        if ((pos + tlen) > posbound) {
1373            PICODBG_ERROR(("trying outside imt, posb: %d, pos: %d, tlen: %d",
1374                           posbound, pos, tlen));
1375            return FALSE;
1376        }
1377        if (ilen == tlen) {
1378            cont = TRUE;
1379            for (j = 0; cont && (j < ilen); j++) {
1380                if (dt->inpmaptable[pos + j] != inval[j]) {
1381                    cont = FALSE;
1382                }
1383            }
1384            if (cont && (j == ilen)) {    /* match found */
1385                *outval = i;
1386                PICODBG_TRACE(("found mapval, posb %d, pos %d, i %d, tlen %d",
1387                               posbound, pos, i, tlen));
1388                return TRUE;
1389            }
1390        }
1391        pos += tlen;
1392    }
1393    PICODBG_DEBUG(("outside imt %d, posb/pos/i: %d/%d/%d, fallback: %d",
1394                   imtnr, posbound, pos, i, *outfallbackval));
1395    return FALSE;
1396}
1397
1398
1399/* size==1 -> MapOutByte,    size==2 -> MapOutWord */
1400static picoos_uint8 kdtMapOutFixed(const kdt_subobj_t *dt,
1401                                   const picoos_uint16 inval,
1402                                   picoos_uint16 *outval) {
1403    picoos_uint8 size;
1404    picoos_uint16 nr;
1405
1406    /* no check of lentable vs. nr in initialize done */
1407
1408    size = 0;
1409
1410    /* type */
1411    nr = dt->outmaptable[PICOKDT_MTPOS_START + PICOKDT_MTPOS_TABLETYPE];
1412
1413    /* check type of table and set size */
1414    if (nr == PICOKDT_MTTYPE_EMPTY) {
1415        /* empty table no mapping needed */
1416        PICODBG_TRACE(("empty table"));
1417        *outval = inval;
1418        return TRUE;
1419    } else if (nr == PICOKDT_MTTYPE_BYTE) {
1420        size = 1;
1421    } else if (nr == PICOKDT_MTTYPE_WORD) {
1422        size = 2;
1423    } else {
1424        /* wrong table type */
1425        PICODBG_ERROR(("wrong table type %d", nr));
1426        return FALSE;
1427    }
1428
1429    /* number of mapvalues */
1430    nr = ((picoos_uint16)(dt->outmaptable[PICOKDT_MTPOS_START +
1431                                          PICOKDT_MTPOS_NUMBER + 1])) << 8
1432        | dt->outmaptable[PICOKDT_MTPOS_START + PICOKDT_MTPOS_NUMBER];
1433
1434    if (inval < nr) {
1435        if (size == 1) {
1436            *outval = dt->outmaptable[PICOKDT_MTPOS_START +
1437                                      PICOKDT_MTPOS_MAPSTART + (size * inval)];
1438        } else {
1439            *outval = ((picoos_uint16)(dt->outmaptable[PICOKDT_MTPOS_START +
1440                          PICOKDT_MTPOS_MAPSTART + (size * inval) + 1])) << 8
1441                                     | dt->outmaptable[PICOKDT_MTPOS_START +
1442                          PICOKDT_MTPOS_MAPSTART + (size * inval)];
1443        }
1444        return TRUE;
1445    } else {
1446        *outval = 0;
1447        return FALSE;
1448    }
1449}
1450
1451
1452/* size==1 -> ReverseMapOutByte,    size==2 -> ReverseMapOutWord */
1453/* outmaptable also used to map from decoded tree output domain to
1454   direct tree output domain */
1455static picoos_uint8 kdtReverseMapOutFixed(const kdt_subobj_t *dt,
1456                                          const picoos_uint16 inval,
1457                                          picoos_uint16 *outval,
1458                                          picoos_uint16 *outfallbackval) {
1459    picoos_uint8 size;
1460    picoos_uint32 pos;
1461    picoos_uint16 lentable;
1462    picoos_uint16 posbound;
1463    picoos_uint16 i;
1464
1465    /* no check of lentable vs. nr in initialize done */
1466
1467    size = 0;
1468    pos = 0;
1469    *outval = 0;
1470    *outfallbackval = 0;
1471
1472    if (dt->outmaptable == NULL) {
1473        /* empty table no mapping needed */
1474        PICODBG_TRACE(("empty table"));
1475        *outval = inval;
1476        return TRUE;
1477    }
1478
1479    /* check what can be checked */
1480    if (dt->outmaptable[pos++] != 1) {   /* only one omt possible */
1481        PICODBG_ERROR(("check failed: nrtab: %d", dt->outmaptable[pos-1]));
1482        return FALSE;
1483    }
1484
1485    /* get length */
1486    lentable = ((picoos_uint16)(dt->outmaptable[pos+1])) << 8 |
1487        dt->outmaptable[pos];
1488    posbound = pos + lentable;
1489    pos += 2;
1490
1491    /* check type of table and set size */
1492    /* if (dt->outmaptable[pos] == PICOKDT_MTTYPE_EMPTY), in
1493       ...Initialize the omt is set to NULL if not existing, checked
1494       above */
1495
1496    if (dt->outmaptable[pos] == PICOKDT_MTTYPE_BYTE) {
1497        size = 1;
1498    } else if (dt->outmaptable[pos] == PICOKDT_MTTYPE_WORD) {
1499        size = 2;
1500    } else {
1501        /* wrong table type */
1502        PICODBG_ERROR(("wrong table type %d", dt->outmaptable[pos]));
1503        return FALSE;
1504    }
1505    pos++;
1506
1507    /* set fallback value in case of failed mapping, and set upper bound pos */
1508    *outfallbackval = ((picoos_uint16)(dt->outmaptable[pos+1])) << 8 |
1509        dt->outmaptable[pos];
1510    pos += 2;
1511
1512    /* size must be 1 or 2 here, keep 'redundant' so save time */
1513    if (size == 1) {
1514        for (i = 0; (i < *outfallbackval) && (pos < posbound); i++) {
1515            if (inval == dt->outmaptable[pos]) {
1516                *outval = i;
1517                PICODBG_TRACE(("s1 inval %d -> outval %d", inval, *outval));
1518                return TRUE;
1519            }
1520            pos++;
1521        }
1522    } else if (size == 2) {
1523        posbound--;
1524        for (i = 0; (i < *outfallbackval) && (pos < posbound); i++) {
1525            if (inval == (((picoos_uint16)(dt->outmaptable[pos+1])) << 8 |
1526                          dt->outmaptable[pos])) {
1527                *outval = i;
1528                PICODBG_TRACE(("s2 inval %d -> outval %d", inval, *outval));
1529                return TRUE;
1530            }
1531            pos += 2;
1532        }
1533    } else {
1534        /* impossible size */
1535        PICODBG_ERROR(("wrong size %d", size));
1536        return FALSE;
1537    }
1538
1539    PICODBG_DEBUG(("no mapping found, fallback: %d", *outfallbackval));
1540    return FALSE;
1541}
1542
1543
1544picoos_uint8 picokdt_dtPosDreverseMapOutFixed(const picokdt_DtPosD this,
1545                                          const picoos_uint16 inval,
1546                                          picoos_uint16 *outval,
1547                                          picoos_uint16 *outfallbackval) {
1548
1549    kdtposd_subobj_t * dtposd = (kdtposd_subobj_t *)this;
1550    kdt_subobj_t * dt = &(dtposd->dt);
1551    return kdtReverseMapOutFixed(dt,inval, outval, outfallbackval);
1552}
1553
1554/* not yet impl. size==1 -> MapOutByteToVar,
1555   fix:  size==2 -> MapOutWordToVar */
1556static picoos_uint8 kdtMapOutVar(const kdt_subobj_t *dt,
1557                                 const picoos_uint16 inval,
1558                                 picoos_uint8 *nr,
1559                                 picoos_uint16 *outval,
1560                                 const picoos_uint16 outvalmaxlen) {
1561    picoos_uint16 pos;
1562    picoos_uint16 off2ind;
1563    picoos_uint16 lentable;
1564    picoos_uint16 nrinbytes;
1565    picoos_uint8 size;
1566    picoos_uint16 offset1;
1567    picoos_uint16 i;
1568
1569    if (dt->outmaptable == NULL) {
1570        /* empty table not possible */
1571        PICODBG_ERROR(("no table found"));
1572        return FALSE;
1573    }
1574
1575    /* nr of tables == 1 already checked in *Initialize, no need here, go
1576       directly to position 1 */
1577    pos = 1;
1578
1579    /* get length of table */
1580    lentable = (((picoos_uint16)(dt->outmaptable[pos + 1])) << 8 |
1581                dt->outmaptable[pos]);
1582    pos += 2;
1583
1584    /* check table type */
1585    if (dt->outmaptable[pos] != PICOKDT_MTTYPE_BYTETOVAR) {
1586        /* wrong table type */
1587        PICODBG_ERROR(("wrong table type %d", dt->outmaptable[pos]));
1588        return FALSE;
1589    }
1590    size = 2;
1591    pos++;
1592
1593    /* get nr of ele in maptable (= nr of possible invals) */
1594    nrinbytes = (((picoos_uint16)(dt->outmaptable[pos+1])) << 8 |
1595                 dt->outmaptable[pos]);
1596    pos += 2;
1597
1598    /* check what's checkable */
1599    if (nrinbytes == 0) {
1600        PICODBG_ERROR(("table with length zero"));
1601        return FALSE;
1602    } else if (inval >= nrinbytes) {
1603        PICODBG_ERROR(("inval %d outside valid range %d", inval, nrinbytes));
1604        return FALSE;
1605    }
1606
1607    PICODBG_TRACE(("inval %d, lentable %d, nrinbytes %d, pos %d", inval,
1608                   lentable, nrinbytes, pos));
1609
1610    /* set off2ind to the position of the start of offset2-val */
1611    /* offset2 points to start of next ele */
1612    off2ind = pos + (size*inval);
1613
1614    /* get number of output values, offset2 - offset1 */
1615    if (inval == 0) {
1616        offset1 = 0;
1617    } else {
1618        offset1 = (((picoos_uint16)(dt->outmaptable[off2ind - 1])) << 8 |
1619                   dt->outmaptable[off2ind - 2]);
1620    }
1621    *nr = (((picoos_uint16)(dt->outmaptable[off2ind + 1])) << 8 |
1622           dt->outmaptable[off2ind]) - offset1;
1623
1624    PICODBG_TRACE(("offset1 %d, nr %d, pos %d", offset1, *nr, pos));
1625
1626    /* set pos to position of 1st value being mapped to */
1627    pos += (size * nrinbytes) + offset1;
1628
1629    if ((pos + *nr - 1) > lentable) {
1630        /* outside table, should not happen */
1631        PICODBG_ERROR(("problem with table index, pos %d, nr %d, len %d",
1632                       pos, *nr, lentable));
1633        return FALSE;
1634    }
1635    if (*nr > outvalmaxlen) {
1636        /* not enough space in outval */
1637        PICODBG_ERROR(("overflow in outval, %d > %d", *nr, outvalmaxlen));
1638        return FALSE;
1639    }
1640
1641    /* finally, copy outmap result to outval */
1642    for (i = 0; i < *nr; i++) {
1643        outval[i] = dt->outmaptable[pos++];
1644    }
1645    return TRUE;
1646}
1647
1648
1649
1650/* ************************************************************/
1651/* decision tree POS prediction (PosP) functions */
1652/* ************************************************************/
1653
1654/* number of prefix and suffix graphemes used to construct the input vector */
1655#define KDT_POSP_NRGRAPHPREFATT   4
1656#define KDT_POSP_NRGRAPHSUFFATT   6
1657#define KDT_POSP_NRGRAPHATT      10
1658
1659/* positions of specgraph and nrgraphs attributes */
1660#define KDT_POSP_SPECGRAPHATTPOS 10
1661#define KDT_POSP_NRGRAPHSATTPOS  11
1662
1663
1664/* construct PosP input vector
1665
1666   PosP invec: 12 elements
1667
1668   prefix        0-3  prefix graphemes (encoded using tree inpmaptable 0-3)
1669   suffix        4-9  suffix graphemes (encoded using tree inpmaptable 4-9)
1670   isspecchar    10   is a special grapheme (e.g. hyphen) inside the word (0/1)?
1671   nr-utf-graphs 11   number of graphemes (ie. UTF8 chars)
1672
1673   if there are less than 10 graphemes, each grapheme is used only
1674   once, with the suffix having higher priority, ie.  elements 0-9 are
1675   filled as follows:
1676
1677    #graph
1678    1        0 0 0 0  0 0 0 0 0 1
1679    2        0 0 0 0  0 0 0 0 1 2
1680    3        0 0 0 0  0 0 0 1 2 3
1681    4        0 0 0 0  0 0 1 2 3 4
1682    5        0 0 0 0  0 1 2 3 4 5
1683    6        0 0 0 0  1 2 3 4 5 6
1684    7        1 0 0 0  2 3 4 5 6 7
1685    8        1 2 0 0  3 4 5 6 7 8
1686    9        1 2 3 0  4 5 6 7 8 9
1687    10       1 2 3 4  5 6 7 8 9 10
1688    11       1 2 3 4  6 7 8 9 10 11
1689    ...
1690
1691    1-6: Fill chbuf
1692    7-10: front to invec 1st part, remove front, add rear
1693    >10: remove front, add rear
1694    no more graph ->
1695    while chbuflen>0:
1696      add rear to the last empty slot in 2nd part of invec, remove rear
1697*/
1698
1699
1700picoos_uint8 picokdt_dtPosPconstructInVec(const picokdt_DtPosP this,
1701                                          const picoos_uint8 *graph,
1702                                          const picoos_uint16 graphlen,
1703                                          const picoos_uint8 specgraphflag) {
1704    kdtposp_subobj_t *dtposp;
1705
1706    /* utf8 circular char buffer, used as restricted input deque */
1707    /* 2nd part of graph invec has KDT_POSP_NRGRAPHSUFFATT elements, */
1708    /* max of UTF8_MAXLEN bytes per utf8 char */
1709    picoos_uint8 chbuf[KDT_POSP_NRGRAPHSUFFATT][PICOBASE_UTF8_MAXLEN];
1710    picoos_uint8 chbrear;   /* next free pos */
1711    picoos_uint8 chbfront;  /* next read pos */
1712    picoos_uint8 chblen;    /* empty=0; full=KDT_POSP_NRGRAPHSUFFATT */
1713
1714    picoos_uint16 poscg;    /* position of current graph (= utf8 char) */
1715    picoos_uint16 lencg = 0;    /* length of current grapheme */
1716    picoos_uint16 nrutfg;   /* number of utf graphemes */
1717    picoos_uint8 invecpos;  /* next element to add in invec */
1718    picoos_uint16 fallback; /* fallback value for failed graph encodings */
1719    picoos_uint8 i;
1720
1721    dtposp = (kdtposp_subobj_t *)this;
1722    chbrear = 0;
1723    chbfront = 0;
1724    chblen = 0;
1725    poscg = 0;
1726    nrutfg = 0;
1727    invecpos = 0;
1728
1729    PICODBG_DEBUG(("graphlen %d", graphlen));
1730
1731    /* not needed, since all elements are set
1732    for (i = 0; i < PICOKDT_NRATT_POSP; i++) {
1733        dtposp->invec[i] = '\x63';
1734    }
1735    */
1736
1737    dtposp->inveclen = 0;
1738
1739    while ((poscg < graphlen) &&
1740           ((lencg = picobase_det_utf8_length(graph[poscg])) > 0)) {
1741        if (chblen >= KDT_POSP_NRGRAPHSUFFATT) {      /* chbuf full */
1742            if (invecpos < KDT_POSP_NRGRAPHPREFATT) { /* prefix not full */
1743                /* att-encode front utf graph and add in invec */
1744                if (!kdtMapInGraph(&(dtposp->dt), invecpos,
1745                                   chbuf[chbfront], PICOBASE_UTF8_MAXLEN,
1746                                   &(dtposp->invec[invecpos]),
1747                                   &fallback)) {
1748                    if (fallback) {
1749                        dtposp->invec[invecpos] = fallback;
1750                    } else {
1751                        return FALSE;
1752                    }
1753                }
1754                invecpos++;
1755            }
1756            /* remove front utf graph */
1757            chbfront++;
1758            chbfront %= KDT_POSP_NRGRAPHSUFFATT;
1759            chblen--;
1760        }
1761        /* add current utf graph to chbuf */
1762        for (i=0; i<lencg; i++) {
1763            chbuf[chbrear][i] = graph[poscg++];
1764        }
1765        if (i < PICOBASE_UTF8_MAXLEN) {
1766            chbuf[chbrear][i] = '\0';
1767        }
1768        chbrear++;
1769        chbrear %= KDT_POSP_NRGRAPHSUFFATT;
1770        chblen++;
1771        /* increase utf graph count */
1772        nrutfg++;
1773    }
1774
1775    if ((lencg == 0) || (chblen == 0)) {
1776        return FALSE;
1777    } else if (chblen > 0) {
1778
1779        while (invecpos < KDT_POSP_NRGRAPHPREFATT) { /* fill up prefix */
1780            if (!kdtMapInGraph(&(dtposp->dt), invecpos,
1781                               PICOKDT_OUTSIDEGRAPH_DEFSTR,
1782                               PICOKDT_OUTSIDEGRAPH_DEFLEN,
1783                               &(dtposp->invec[invecpos]), &fallback)) {
1784                if (fallback) {
1785                    dtposp->invec[invecpos] = fallback;
1786                } else {
1787                    return FALSE;
1788                }
1789            }
1790            invecpos++;
1791        }
1792
1793        for (i = (KDT_POSP_NRGRAPHATT - 1);
1794             i >= KDT_POSP_NRGRAPHPREFATT; i--) {
1795            if (chblen > 0) {
1796                if (chbrear == 0) {
1797                    chbrear = KDT_POSP_NRGRAPHSUFFATT - 1;
1798                } else {
1799                    chbrear--;
1800                }
1801                if (!kdtMapInGraph(&(dtposp->dt), i, chbuf[chbrear],
1802                                   PICOBASE_UTF8_MAXLEN,
1803                                   &(dtposp->invec[i]), &fallback)) {
1804                    if (fallback) {
1805                        dtposp->invec[i] = fallback;
1806                    } else {
1807                        return FALSE;
1808                    }
1809                }
1810                chblen--;
1811            } else {
1812                if (!kdtMapInGraph(&(dtposp->dt), i,
1813                                   PICOKDT_OUTSIDEGRAPH_DEFSTR,
1814                                   PICOKDT_OUTSIDEGRAPH_DEFLEN,
1815                                   &(dtposp->invec[i]), &fallback)) {
1816                    if (fallback) {
1817                        dtposp->invec[i] = fallback;
1818                    } else {
1819                        return FALSE;
1820                    }
1821                }
1822            }
1823        }
1824
1825        /* set isSpecChar attribute, reuse var i */
1826        i = (specgraphflag ? 1 : 0);
1827        if (!kdtMapInFixed(&(dtposp->dt), KDT_POSP_SPECGRAPHATTPOS, i,
1828                           &(dtposp->invec[KDT_POSP_SPECGRAPHATTPOS]),
1829                           &fallback)) {
1830            if (fallback) {
1831                dtposp->invec[KDT_POSP_SPECGRAPHATTPOS] = fallback;
1832            } else {
1833                return FALSE;
1834            }
1835        }
1836
1837        /* set nrGraphs attribute */
1838        if (!kdtMapInFixed(&(dtposp->dt), KDT_POSP_NRGRAPHSATTPOS, nrutfg,
1839                           &(dtposp->invec[KDT_POSP_NRGRAPHSATTPOS]),
1840                           &fallback)) {
1841            if (fallback) {
1842                dtposp->invec[KDT_POSP_NRGRAPHSATTPOS] = fallback;
1843            } else {
1844                return FALSE;
1845            }
1846        }
1847        PICODBG_DEBUG(("posp-invec: [%d,%d,%d,%d|%d,%d,%d,%d,%d,%d|%d|%d]",
1848                       dtposp->invec[0], dtposp->invec[1], dtposp->invec[2],
1849                       dtposp->invec[3], dtposp->invec[4], dtposp->invec[5],
1850                       dtposp->invec[6], dtposp->invec[7], dtposp->invec[8],
1851                       dtposp->invec[9], dtposp->invec[10],
1852                       dtposp->invec[11], dtposp->invec[12]));
1853        dtposp->inveclen = PICOKDT_NRINPMT_POSP;
1854        return TRUE;
1855    }
1856
1857    return FALSE;
1858}
1859
1860
1861picoos_uint8 picokdt_dtPosPclassify(const picokdt_DtPosP this) {
1862    picoos_uint32 iByteNo;
1863    picoos_int8 iBitNo;
1864    picoos_int8 rv;
1865    kdtposp_subobj_t *dtposp;
1866    kdt_subobj_t *dt;
1867
1868    dtposp = (kdtposp_subobj_t *)this;
1869    dt = &(dtposp->dt);
1870    iByteNo = 0;
1871    iBitNo = 7;
1872    while ((rv = kdtAskTree(dt, dtposp->invec, PICOKDT_NRATT_POSP,
1873                            &iByteNo, &iBitNo)) > 0) {
1874        PICODBG_TRACE(("asking tree"));
1875    }
1876    PICODBG_DEBUG(("done: %d", dt->dclass));
1877    return ((rv == 0) && dt->dset);
1878}
1879
1880
1881picoos_uint8 picokdt_dtPosPdecomposeOutClass(const picokdt_DtPosP this,
1882                                             picokdt_classify_result_t *dtres) {
1883    kdtposp_subobj_t *dtposp;
1884    picoos_uint16 val;
1885
1886    dtposp = (kdtposp_subobj_t *)this;
1887
1888    if (dtposp->dt.dset &&
1889        kdtMapOutFixed(&(dtposp->dt), dtposp->dt.dclass, &val)) {
1890        dtres->set = TRUE;
1891        dtres->class = val;
1892        return TRUE;
1893    } else {
1894        dtres->set = FALSE;
1895        return FALSE;
1896    }
1897}
1898
1899
1900
1901/* ************************************************************/
1902/* decision tree POS disambiguation (PosD) functions */
1903/* ************************************************************/
1904
1905
1906picoos_uint8 picokdt_dtPosDconstructInVec(const picokdt_DtPosD this,
1907                                          const picoos_uint16 * input) {
1908    kdtposd_subobj_t *dtposd;
1909    picoos_uint8 i;
1910    picoos_uint16 fallback = 0;
1911
1912    dtposd = (kdtposd_subobj_t *)this;
1913    dtposd->inveclen = 0;
1914
1915    PICODBG_DEBUG(("in: [%d,%d,%d|%d|%d,%d,%d]",
1916                   input[0], input[1], input[2],
1917                   input[3], input[4], input[5],
1918                   input[6]));
1919    for (i = 0; i < PICOKDT_NRATT_POSD; i++) {
1920
1921        /* do the imt mapping for all inval */
1922        if (!kdtMapInFixed(&(dtposd->dt), i, input[i],
1923                           &(dtposd->invec[i]), &fallback)) {
1924            if (fallback) {
1925                PICODBG_DEBUG(("*** using fallback for input mapping: %i -> %i", input[i], fallback));
1926                dtposd->invec[i] = fallback;
1927            } else {
1928                PICODBG_ERROR(("problem doing input mapping"));
1929                return FALSE;
1930            }
1931        }
1932    }
1933
1934    PICODBG_DEBUG(("out: [%d,%d,%d|%d|%d,%d,%d]",
1935                   dtposd->invec[0], dtposd->invec[1], dtposd->invec[2],
1936                   dtposd->invec[3], dtposd->invec[4], dtposd->invec[5],
1937                   dtposd->invec[6]));
1938    dtposd->inveclen = PICOKDT_NRINPMT_POSD;
1939    return TRUE;
1940}
1941
1942
1943picoos_uint8 picokdt_dtPosDclassify(const picokdt_DtPosD this,
1944                                    picoos_uint16 *treeout) {
1945    picoos_uint32 iByteNo;
1946    picoos_int8 iBitNo;
1947    picoos_int8 rv;
1948    kdtposd_subobj_t *dtposd;
1949    kdt_subobj_t *dt;
1950
1951    dtposd = (kdtposd_subobj_t *)this;
1952    dt = &(dtposd->dt);
1953    iByteNo = 0;
1954    iBitNo = 7;
1955    while ((rv = kdtAskTree(dt, dtposd->invec, PICOKDT_NRATT_POSD,
1956                            &iByteNo, &iBitNo)) > 0) {
1957        PICODBG_TRACE(("asking tree"));
1958    }
1959    PICODBG_DEBUG(("done: %d", dt->dclass));
1960    if ((rv == 0) && dt->dset) {
1961        *treeout = dt->dclass;
1962        return TRUE;
1963    } else {
1964        return FALSE;
1965    }
1966}
1967
1968
1969/* decompose the tree output and return the class in dtres
1970   dtres:         POS classification result
1971   returns:       TRUE if okay, FALSE otherwise
1972*/
1973picoos_uint8 picokdt_dtPosDdecomposeOutClass(const picokdt_DtPosD this,
1974                                             picokdt_classify_result_t *dtres) {
1975    kdtposd_subobj_t *dtposd;
1976    picoos_uint16 val;
1977
1978    dtposd = (kdtposd_subobj_t *)this;
1979
1980    if (dtposd->dt.dset &&
1981        kdtMapOutFixed(&(dtposd->dt), dtposd->dt.dclass, &val)) {
1982        dtres->set = TRUE;
1983        dtres->class = val;
1984        return TRUE;
1985    } else {
1986        dtres->set = FALSE;
1987        return FALSE;
1988    }
1989}
1990
1991
1992
1993/* ************************************************************/
1994/* decision tree grapheme-to-phoneme (G2P) functions */
1995/* ************************************************************/
1996
1997
1998/* get the nr'th (starting at 0) utf char in utfgraph */
1999static picoos_uint8 kdtGetUTF8char(const picoos_uint8 *utfgraph,
2000                                   const picoos_uint16 graphlen,
2001                                   const picoos_uint16 nr,
2002                                   picoos_uint8 *utf8char) {
2003    picoos_uint16 i;
2004    picoos_uint32 pos;
2005
2006    pos = 0;
2007    for (i = 0; i < nr; i++) {
2008        if (!picobase_get_next_utf8charpos(utfgraph, graphlen, &pos)) {
2009            return FALSE;
2010        }
2011    }
2012    return picobase_get_next_utf8char(utfgraph, graphlen, &pos, utf8char);
2013}
2014
2015/* determine the utfchar count (starting at 1) of the utfchar starting at pos */
2016static picoos_uint16 kdtGetUTF8Nr(const picoos_uint8 *utfgraph,
2017                                  const picoos_uint16 graphlen,
2018                                  const picoos_uint16 pos) {
2019    picoos_uint32 postmp;
2020    picoos_uint16 count;
2021
2022    count = 0;
2023    postmp = 0;
2024    while ((postmp <= pos) && (count < graphlen)) {
2025        if (!picobase_get_next_utf8charpos(utfgraph, graphlen, &postmp)) {
2026            PICODBG_ERROR(("invalid utf8 string, count: %d, pos: %d, post: %d",
2027                           count, pos, postmp));
2028            return count + 1;
2029        }
2030        count++;
2031    }
2032    return count;
2033}
2034
2035
2036picoos_uint8 picokdt_dtG2PconstructInVec(const picokdt_DtG2P this,
2037                                         const picoos_uint8 *graph,
2038                                         const picoos_uint16 graphlen,
2039                                         const picoos_uint8 count,
2040                                         const picoos_uint8 pos,
2041                                         const picoos_uint8 nrvow,
2042                                         const picoos_uint8 ordvow,
2043                                         picoos_uint8 *primstressflag,
2044                                         const picoos_uint16 phonech1,
2045                                         const picoos_uint16 phonech2,
2046                                         const picoos_uint16 phonech3) {
2047    kdtg2p_subobj_t *dtg2p;
2048    picoos_uint16 fallback = 0;
2049    picoos_uint8 iAttr;
2050    picoos_uint8 utf8char[PICOBASE_UTF8_MAXLEN + 1];
2051    picoos_uint16 inval;
2052    picoos_int16 cinv;
2053    picoos_uint8 retval;
2054    picoos_int32 utfgraphlen;
2055    picoos_uint16 utfcount;
2056
2057    dtg2p = (kdtg2p_subobj_t *)this;
2058    retval = TRUE;
2059    inval = 0;
2060
2061    PICODBG_TRACE(("in:  [%d,%d,%d|%d,%d|%d|%d,%d,%d]", graphlen, count, pos,
2062                   nrvow, ordvow, *primstressflag, phonech1, phonech2,
2063                   phonech3));
2064
2065    dtg2p->inveclen = 0;
2066
2067    /* many speed-ups possible */
2068
2069    /* graph attributes */
2070    /*   count   >     =         <=     count
2071       iAttr lowbound eow     upbound  delta
2072         0     4      4       graphlen    5
2073         1     3      3       graphlen    4
2074         2     2      2       graphlen    3
2075         3     1      1       graphlen    2
2076         4     0      -       graphlen    1
2077
2078         5     0  graphlen    graphlen-1  0
2079         6     0  graphlen-1  graphlen-2 -1
2080         7     0  graphlen-2  graphlen-3 -2
2081         8     0  graphlen-3  graphlen-4 -3
2082     */
2083
2084    /* graph attributes left (context -4/-3/-2/-1) and current, MapInGraph */
2085
2086    utfgraphlen = picobase_utf8_length(graph, graphlen);
2087    if (utfgraphlen <= 0) {
2088        utfgraphlen = 0;
2089    }
2090    utfcount = kdtGetUTF8Nr(graph, graphlen, count);
2091
2092    cinv = 4;
2093    for (iAttr = 0; iAttr < 5; iAttr++) {
2094        if ((utfcount > cinv) && (utfcount <= utfgraphlen)) {
2095
2096/*            utf8char[0] = graph[count - cinv - 1];*/
2097            if (!kdtGetUTF8char(graph, graphlen, utfcount-cinv-1,
2098                                utf8char)) {
2099                PICODBG_WARN(("problem getting UTF char %d", utfcount-cinv-1));
2100                utf8char[0] = PICOKDT_OUTSIDEGRAPH_DEFCH;
2101                utf8char[1] = '\0';
2102            }
2103        } else {
2104            if ((utfcount == cinv) && (iAttr != 4)) {
2105                utf8char[0] = PICOKDT_OUTSIDEGRAPH_EOW_DEFCH;
2106            } else {
2107                utf8char[0] = PICOKDT_OUTSIDEGRAPH_DEFCH;
2108            }
2109            utf8char[1] = '\0';
2110        }
2111
2112        if (!kdtMapInGraph(&(dtg2p->dt), iAttr,
2113                           utf8char, PICOBASE_UTF8_MAXLEN,
2114                           &(dtg2p->invec[iAttr]),
2115                           &fallback)) {
2116            if (fallback) {
2117                dtg2p->invec[iAttr] = fallback;
2118            } else {
2119                PICODBG_WARN(("setting attribute %d to zero", iAttr));
2120                dtg2p->invec[iAttr] = 0;
2121                retval = FALSE;
2122            }
2123        }
2124        PICODBG_TRACE(("invec %d %c", iAttr, utf8char[0]));
2125        cinv--;
2126    }
2127
2128    /* graph attributes right (context 1/2/3/4), MapInGraph */
2129    cinv = utfgraphlen;
2130    for (iAttr = 5; iAttr < 9; iAttr++) {
2131        if ((utfcount > 0) && (utfcount <= (cinv - 1))) {
2132/*            utf8char[0] = graph[count + graphlen - cinv];*/
2133            if (!kdtGetUTF8char(graph, graphlen, utfcount+utfgraphlen-cinv,
2134                                utf8char)) {
2135                PICODBG_WARN(("problem getting UTF char %d",
2136                              utfcount+utfgraphlen-cinv-1));
2137                utf8char[0] = PICOKDT_OUTSIDEGRAPH_DEFCH;
2138                utf8char[1] = '\0';
2139            }
2140        } else {
2141            if (utfcount == cinv) {
2142                utf8char[0] = PICOKDT_OUTSIDEGRAPH_EOW_DEFCH;
2143                utf8char[1] = '\0';
2144            } else {
2145                utf8char[0] = PICOKDT_OUTSIDEGRAPH_DEFCH;
2146                utf8char[1] = '\0';
2147            }
2148        }
2149        if (!kdtMapInGraph(&(dtg2p->dt), iAttr,
2150                           utf8char, PICOBASE_UTF8_MAXLEN,
2151                           &(dtg2p->invec[iAttr]),
2152                           &fallback)) {
2153            if (fallback) {
2154                dtg2p->invec[iAttr] = fallback;
2155            } else {
2156                PICODBG_WARN(("setting attribute %d to zero", iAttr));
2157                dtg2p->invec[iAttr] = 0;
2158                retval = FALSE;
2159            }
2160        }
2161        PICODBG_TRACE(("invec %d %c", iAttr, utf8char[0]));
2162        cinv--;
2163    }
2164
2165    /* other attributes, MapInFixed */
2166    for (iAttr = 9; iAttr < PICOKDT_NRATT_G2P; iAttr++) {
2167        switch (iAttr) {
2168            case 9:     /* word POS, Fix1 */
2169                inval = pos;
2170                break;
2171            case 10:    /* nr of vowel-like graphs in word, if vowel, Fix2  */
2172                inval = nrvow;
2173                break;
2174            case 11:    /* order of current vowel-like graph in word, Fix2 */
2175                inval = ordvow;
2176                break;
2177            case 12:    /* primary stress mark, Fix2 */
2178                if (*primstressflag == 1) {
2179                    /*already set previously*/
2180                    inval = 1;
2181                } else {
2182                    inval = 0;
2183                }
2184                break;
2185            case 13:    /* phone chunk right context +1, Hist */
2186                inval = phonech1;
2187                break;
2188            case 14:    /* phone chunk right context +2, Hist */
2189                inval = phonech2;
2190                break;
2191            case 15:    /* phone chunk right context +3, Hist */
2192                inval = phonech3;
2193                break;
2194        }
2195
2196        PICODBG_TRACE(("invec %d %d", iAttr, inval));
2197
2198        if (!kdtMapInFixed(&(dtg2p->dt), iAttr, inval,
2199                           &(dtg2p->invec[iAttr]), &fallback)) {
2200            if (fallback) {
2201                dtg2p->invec[iAttr] = fallback;
2202            } else {
2203                PICODBG_WARN(("setting attribute %d to zero", iAttr));
2204                dtg2p->invec[iAttr] = 0;
2205                retval = FALSE;
2206            }
2207        }
2208    }
2209
2210    PICODBG_TRACE(("out: [%d,%d%,%d,%d|%d|%d,%d,%d,%d|%d,%d,%d,%d|"
2211                   "%d,%d,%d]", dtg2p->invec[0], dtg2p->invec[1],
2212                   dtg2p->invec[2], dtg2p->invec[3], dtg2p->invec[4],
2213                   dtg2p->invec[5], dtg2p->invec[6], dtg2p->invec[7],
2214                   dtg2p->invec[8], dtg2p->invec[9], dtg2p->invec[10],
2215                   dtg2p->invec[11], dtg2p->invec[12], dtg2p->invec[13],
2216                   dtg2p->invec[14], dtg2p->invec[15]));
2217
2218    dtg2p->inveclen = PICOKDT_NRINPMT_G2P;
2219    return retval;
2220}
2221
2222
2223
2224
2225picoos_uint8 picokdt_dtG2Pclassify(const picokdt_DtG2P this,
2226                                   picoos_uint16 *treeout) {
2227    picoos_uint32 iByteNo;
2228    picoos_int8 iBitNo;
2229    picoos_int8 rv;
2230    kdtg2p_subobj_t *dtg2p;
2231    kdt_subobj_t *dt;
2232
2233    dtg2p = (kdtg2p_subobj_t *)this;
2234    dt = &(dtg2p->dt);
2235    iByteNo = 0;
2236    iBitNo = 7;
2237    while ((rv = kdtAskTree(dt, dtg2p->invec, PICOKDT_NRATT_G2P,
2238                            &iByteNo, &iBitNo)) > 0) {
2239        PICODBG_TRACE(("asking tree"));
2240    }
2241    PICODBG_TRACE(("done: %d", dt->dclass));
2242    if ((rv == 0) && dt->dset) {
2243        *treeout = dt->dclass;
2244        return TRUE;
2245    } else {
2246        return FALSE;
2247    }
2248}
2249
2250
2251
2252picoos_uint8 picokdt_dtG2PdecomposeOutClass(const picokdt_DtG2P this,
2253                                  picokdt_classify_vecresult_t *dtvres) {
2254    kdtg2p_subobj_t *dtg2p;
2255
2256    dtg2p = (kdtg2p_subobj_t *)this;
2257
2258    if (dtg2p->dt.dset &&
2259        kdtMapOutVar(&(dtg2p->dt), dtg2p->dt.dclass, &(dtvres->nr),
2260                     dtvres->classvec, PICOKDT_MAXSIZE_OUTVEC)) {
2261        return TRUE;
2262    } else {
2263        dtvres->nr = 0;
2264        return FALSE;
2265    }
2266    return TRUE;
2267}
2268
2269
2270
2271/* ************************************************************/
2272/* decision tree phrasing (PHR) functions */
2273/* ************************************************************/
2274
2275picoos_uint8 picokdt_dtPHRconstructInVec(const picokdt_DtPHR this,
2276                                         const picoos_uint8 pre2,
2277                                         const picoos_uint8 pre1,
2278                                         const picoos_uint8 src,
2279                                         const picoos_uint8 fol1,
2280                                         const picoos_uint8 fol2,
2281                                         const picoos_uint16 nrwordspre,
2282                                         const picoos_uint16 nrwordsfol,
2283                                         const picoos_uint16 nrsyllsfol) {
2284    kdtphr_subobj_t *dtphr;
2285    picoos_uint8 i;
2286    picoos_uint16 inval = 0;
2287    picoos_uint16 fallback = 0;
2288
2289    dtphr = (kdtphr_subobj_t *)this;
2290    PICODBG_DEBUG(("in:  [%d,%d|%d|%d,%d|%d,%d,%d]",
2291                   pre2, pre1, src, fol1, fol2,
2292                   nrwordspre, nrwordsfol, nrsyllsfol));
2293    dtphr->inveclen = 0;
2294
2295    for (i = 0; i < PICOKDT_NRATT_PHR; i++) {
2296        switch (i) {
2297            case 0: inval = pre2; break;
2298            case 1: inval = pre1; break;
2299            case 2: inval = src; break;
2300            case 3: inval = fol1;  break;
2301            case 4: inval = fol2; break;
2302            case 5: inval = nrwordspre; break;
2303            case 6: inval = nrwordsfol; break;
2304            case 7: inval = nrsyllsfol; break;
2305            default:
2306                PICODBG_ERROR(("size mismatch"));
2307                return FALSE;
2308                break;
2309        }
2310
2311        /* do the imt mapping for all inval */
2312        if (!kdtMapInFixed(&(dtphr->dt), i, inval,
2313                           &(dtphr->invec[i]), &fallback)) {
2314            if (fallback) {
2315                dtphr->invec[i] = fallback;
2316            } else {
2317                PICODBG_ERROR(("problem doing input mapping"));
2318                return FALSE;
2319            }
2320        }
2321    }
2322
2323    PICODBG_DEBUG(("out: [%d,%d|%d|%d,%d|%d,%d,%d]",
2324                   dtphr->invec[0], dtphr->invec[1], dtphr->invec[2],
2325                   dtphr->invec[3], dtphr->invec[4], dtphr->invec[5],
2326                   dtphr->invec[6], dtphr->invec[7]));
2327    dtphr->inveclen = PICOKDT_NRINPMT_PHR;
2328    return TRUE;
2329}
2330
2331
2332picoos_uint8 picokdt_dtPHRclassify(const picokdt_DtPHR this) {
2333    picoos_uint32 iByteNo;
2334    picoos_int8 iBitNo;
2335    picoos_int8 rv;
2336    kdtphr_subobj_t *dtphr;
2337    kdt_subobj_t *dt;
2338
2339    dtphr = (kdtphr_subobj_t *)this;
2340    dt = &(dtphr->dt);
2341    iByteNo = 0;
2342    iBitNo = 7;
2343    while ((rv = kdtAskTree(dt, dtphr->invec, PICOKDT_NRATT_PHR,
2344                            &iByteNo, &iBitNo)) > 0) {
2345        PICODBG_TRACE(("asking tree"));
2346    }
2347    PICODBG_DEBUG(("done: %d", dt->dclass));
2348    return ((rv == 0) && dt->dset);
2349}
2350
2351
2352picoos_uint8 picokdt_dtPHRdecomposeOutClass(const picokdt_DtPHR this,
2353                                            picokdt_classify_result_t *dtres) {
2354    kdtphr_subobj_t *dtphr;
2355    picoos_uint16 val;
2356
2357    dtphr = (kdtphr_subobj_t *)this;
2358
2359    if (dtphr->dt.dset &&
2360        kdtMapOutFixed(&(dtphr->dt), dtphr->dt.dclass, &val)) {
2361        dtres->set = TRUE;
2362        dtres->class = val;
2363        return TRUE;
2364    } else {
2365        dtres->set = FALSE;
2366        return FALSE;
2367    }
2368}
2369
2370
2371
2372/* ************************************************************/
2373/* decision tree phono-acoustical model (PAM) functions */
2374/* ************************************************************/
2375
2376picoos_uint8 picokdt_dtPAMconstructInVec(const picokdt_DtPAM this,
2377                                         const picoos_uint8 *vec,
2378                                         const picoos_uint8 veclen) {
2379    kdtpam_subobj_t *dtpam;
2380    picoos_uint8 i;
2381    picoos_uint16 fallback = 0;
2382
2383    dtpam = (kdtpam_subobj_t *)this;
2384
2385    PICODBG_TRACE(("in0:  %d %d %d %d %d %d %d %d %d %d",
2386                   vec[0], vec[1], vec[2], vec[3], vec[4],
2387                   vec[5], vec[6], vec[7], vec[8], vec[9]));
2388    PICODBG_TRACE(("in1:  %d %d %d %d %d %d %d %d %d %d",
2389                   vec[10], vec[11], vec[12], vec[13], vec[14],
2390                   vec[15], vec[16], vec[17], vec[18], vec[19]));
2391    PICODBG_TRACE(("in2:  %d %d %d %d %d %d %d %d %d %d",
2392                   vec[20], vec[21], vec[22], vec[23], vec[24],
2393                   vec[25], vec[26], vec[27], vec[28], vec[29]));
2394    PICODBG_TRACE(("in3:  %d %d %d %d %d %d %d %d %d %d",
2395                   vec[30], vec[31], vec[32], vec[33], vec[34],
2396                   vec[35], vec[36], vec[37], vec[38], vec[39]));
2397    PICODBG_TRACE(("in4:  %d %d %d %d %d %d %d %d %d %d",
2398                   vec[40], vec[41], vec[42], vec[43], vec[44],
2399                   vec[45], vec[46], vec[47], vec[48], vec[49]));
2400    PICODBG_TRACE(("in5:  %d %d %d %d %d %d %d %d %d %d",
2401                   vec[50], vec[51], vec[52], vec[53], vec[54],
2402                   vec[55], vec[56], vec[57], vec[58], vec[59]));
2403
2404    dtpam->inveclen = 0;
2405
2406    /* check veclen */
2407    if (veclen != PICOKDT_NRINPMT_PAM) {
2408        PICODBG_ERROR(("wrong number of input vector elements"));
2409        return FALSE;
2410    }
2411
2412    for (i = 0; i < PICOKDT_NRATT_PAM; i++) {
2413
2414        /* do the imt mapping for all vec eles */
2415        if (!kdtMapInFixed(&(dtpam->dt), i, vec[i],
2416                           &(dtpam->invec[i]), &fallback)) {
2417            if (fallback) {
2418                dtpam->invec[i] = fallback;
2419            } else {
2420                PICODBG_ERROR(("problem doing input mapping, %d %d", i,vec[i]));
2421                return FALSE;
2422            }
2423        }
2424    }
2425
2426    PICODBG_TRACE(("in0:  %d %d %d %d %d %d %d %d %d %d",
2427                   dtpam->invec[0], dtpam->invec[1], dtpam->invec[2],
2428                   dtpam->invec[3], dtpam->invec[4], dtpam->invec[5],
2429                   dtpam->invec[6], dtpam->invec[7], dtpam->invec[8],
2430                   dtpam->invec[9]));
2431    PICODBG_TRACE(("in1:  %d %d %d %d %d %d %d %d %d %d",
2432                   dtpam->invec[10], dtpam->invec[11], dtpam->invec[12],
2433                   dtpam->invec[13], dtpam->invec[14], dtpam->invec[15],
2434                   dtpam->invec[16], dtpam->invec[17], dtpam->invec[18],
2435                   dtpam->invec[19]));
2436    PICODBG_TRACE(("in2:  %d %d %d %d %d %d %d %d %d %d",
2437                   dtpam->invec[20], dtpam->invec[21], dtpam->invec[22],
2438                   dtpam->invec[23], dtpam->invec[24], dtpam->invec[25],
2439                   dtpam->invec[26], dtpam->invec[27], dtpam->invec[28],
2440                   dtpam->invec[29]));
2441    PICODBG_TRACE(("in3:  %d %d %d %d %d %d %d %d %d %d",
2442                   dtpam->invec[30], dtpam->invec[31], dtpam->invec[32],
2443                   dtpam->invec[33], dtpam->invec[34], dtpam->invec[35],
2444                   dtpam->invec[36], dtpam->invec[37], dtpam->invec[38],
2445                   dtpam->invec[39]));
2446    PICODBG_TRACE(("in4:  %d %d %d %d %d %d %d %d %d %d",
2447                   dtpam->invec[40], dtpam->invec[41], dtpam->invec[42],
2448                   dtpam->invec[43], dtpam->invec[44], dtpam->invec[45],
2449                   dtpam->invec[46], dtpam->invec[47], dtpam->invec[48],
2450                   dtpam->invec[49]));
2451    PICODBG_TRACE(("in5:  %d %d %d %d %d %d %d %d %d %d",
2452                   dtpam->invec[50], dtpam->invec[51], dtpam->invec[52],
2453                   dtpam->invec[53], dtpam->invec[54], dtpam->invec[55],
2454                   dtpam->invec[56], dtpam->invec[57], dtpam->invec[58],
2455                   dtpam->invec[59]));
2456
2457    dtpam->inveclen = PICOKDT_NRINPMT_PAM;
2458    return TRUE;
2459}
2460
2461
2462picoos_uint8 picokdt_dtPAMclassify(const picokdt_DtPAM this) {
2463    picoos_uint32 iByteNo;
2464    picoos_int8 iBitNo;
2465    picoos_int8 rv;
2466    kdtpam_subobj_t *dtpam;
2467    kdt_subobj_t *dt;
2468
2469    dtpam = (kdtpam_subobj_t *)this;
2470    dt = &(dtpam->dt);
2471    iByteNo = 0;
2472    iBitNo = 7;
2473    while ((rv = kdtAskTree(dt, dtpam->invec, PICOKDT_NRATT_PAM,
2474                            &iByteNo, &iBitNo)) > 0) {
2475        PICODBG_TRACE(("asking tree"));
2476    }
2477    PICODBG_DEBUG(("done: %d", dt->dclass));
2478    return ((rv == 0) && dt->dset);
2479}
2480
2481
2482picoos_uint8 picokdt_dtPAMdecomposeOutClass(const picokdt_DtPAM this,
2483                                            picokdt_classify_result_t *dtres) {
2484    kdtpam_subobj_t *dtpam;
2485    picoos_uint16 val;
2486
2487    dtpam = (kdtpam_subobj_t *)this;
2488
2489    if (dtpam->dt.dset &&
2490        kdtMapOutFixed(&(dtpam->dt), dtpam->dt.dclass, &val)) {
2491        dtres->set = TRUE;
2492        dtres->class = val;
2493        return TRUE;
2494    } else {
2495        dtres->set = FALSE;
2496        return FALSE;
2497    }
2498}
2499
2500
2501
2502/* ************************************************************/
2503/* decision tree accentuation (ACC) functions */
2504/* ************************************************************/
2505
2506picoos_uint8 picokdt_dtACCconstructInVec(const picokdt_DtACC this,
2507                                         const picoos_uint8 pre2,
2508                                         const picoos_uint8 pre1,
2509                                         const picoos_uint8 src,
2510                                         const picoos_uint8 fol1,
2511                                         const picoos_uint8 fol2,
2512                                         const picoos_uint16 hist1,
2513                                         const picoos_uint16 hist2,
2514                                         const picoos_uint16 nrwordspre,
2515                                         const picoos_uint16 nrsyllspre,
2516                                         const picoos_uint16 nrwordsfol,
2517                                         const picoos_uint16 nrsyllsfol,
2518                                         const picoos_uint16 footwordsfol,
2519                                         const picoos_uint16 footsyllsfol) {
2520    kdtacc_subobj_t *dtacc;
2521    picoos_uint8 i;
2522    picoos_uint16 inval = 0;
2523    picoos_uint16 fallback = 0;
2524
2525    dtacc = (kdtacc_subobj_t *)this;
2526    PICODBG_DEBUG(("in:  [%d,%d,%d,%d,%d|%d,%d|%d,%d,%d,%d|%d,%d]",
2527                   pre2, pre1, src, fol1, fol2, hist1, hist2,
2528                   nrwordspre, nrsyllspre, nrwordsfol, nrsyllsfol,
2529                   footwordsfol, footsyllsfol));
2530    dtacc->inveclen = 0;
2531
2532    for (i = 0; i < PICOKDT_NRATT_ACC; i++) {
2533        switch (i) {
2534            case 0: inval = pre2; break;
2535            case 1: inval = pre1; break;
2536            case 2: inval = src; break;
2537            case 3: inval = fol1;  break;
2538            case 4: inval = fol2; break;
2539            case 5: inval = hist1; break;
2540            case 6: inval = hist2; break;
2541            case 7: inval = nrwordspre; break;
2542            case 8: inval = nrsyllspre; break;
2543            case 9: inval = nrwordsfol; break;
2544            case 10: inval = nrsyllsfol; break;
2545            case 11: inval = footwordsfol; break;
2546            case 12: inval = footsyllsfol; break;
2547            default:
2548                PICODBG_ERROR(("size mismatch"));
2549                return FALSE;
2550                break;
2551        }
2552
2553        if (((i == 5) || (i == 6)) && (inval == PICOKDT_HISTORY_ZERO)) {
2554            /* in input to this function the HISTORY_ZERO is used to
2555               mark the no-value-available case. For sparsity reasons
2556               this was not used in the training. For
2557               no-value-available cases, instead, do reverse out
2558               mapping of ACC0 to get tree domain for ACC0  */
2559            if (!kdtReverseMapOutFixed(&(dtacc->dt), PICODATA_ACC0,
2560                                       &inval, &fallback)) {
2561                if (fallback) {
2562                    inval = fallback;
2563                } else {
2564                    PICODBG_ERROR(("problem doing reverse output mapping"));
2565                    return FALSE;
2566                }
2567            }
2568        }
2569
2570        /* do the imt mapping for all inval */
2571        if (!kdtMapInFixed(&(dtacc->dt), i, inval,
2572                           &(dtacc->invec[i]), &fallback)) {
2573            if (fallback) {
2574                dtacc->invec[i] = fallback;
2575            } else {
2576                PICODBG_ERROR(("problem doing input mapping"));
2577                return FALSE;
2578            }
2579        }
2580    }
2581
2582    PICODBG_DEBUG(("out: [%d,%d,%d,%d,%d|%d,%d|%d,%d,%d,%d|%d,%d]",
2583                   dtacc->invec[0], dtacc->invec[1], dtacc->invec[2],
2584                   dtacc->invec[3], dtacc->invec[4], dtacc->invec[5],
2585                   dtacc->invec[6], dtacc->invec[7], dtacc->invec[8],
2586                   dtacc->invec[9], dtacc->invec[10], dtacc->invec[11],
2587                   dtacc->invec[12]));
2588    dtacc->inveclen = PICOKDT_NRINPMT_ACC;
2589    return TRUE;
2590}
2591
2592
2593picoos_uint8 picokdt_dtACCclassify(const picokdt_DtACC this,
2594                                   picoos_uint16 *treeout) {
2595    picoos_uint32 iByteNo;
2596    picoos_int8 iBitNo;
2597    picoos_int8 rv;
2598    kdtacc_subobj_t *dtacc;
2599    kdt_subobj_t *dt;
2600
2601    dtacc = (kdtacc_subobj_t *)this;
2602    dt = &(dtacc->dt);
2603    iByteNo = 0;
2604    iBitNo = 7;
2605    while ((rv = kdtAskTree(dt, dtacc->invec, PICOKDT_NRATT_ACC,
2606                            &iByteNo, &iBitNo)) > 0) {
2607        PICODBG_TRACE(("asking tree"));
2608    }
2609    PICODBG_TRACE(("done: %d", dt->dclass));
2610    if ((rv == 0) && dt->dset) {
2611        *treeout = dt->dclass;
2612        return TRUE;
2613    } else {
2614        return FALSE;
2615    }
2616}
2617
2618
2619picoos_uint8 picokdt_dtACCdecomposeOutClass(const picokdt_DtACC this,
2620                                            picokdt_classify_result_t *dtres) {
2621    kdtacc_subobj_t *dtacc;
2622    picoos_uint16 val;
2623
2624    dtacc = (kdtacc_subobj_t *)this;
2625
2626    if (dtacc->dt.dset &&
2627        kdtMapOutFixed(&(dtacc->dt), dtacc->dt.dclass, &val)) {
2628        dtres->set = TRUE;
2629        dtres->class = val;
2630        return TRUE;
2631    } else {
2632        dtres->set = FALSE;
2633        return FALSE;
2634    }
2635}
2636
2637#ifdef __cplusplus
2638}
2639#endif
2640
2641
2642/* end */
2643