1/*---------------------------------------------------------------------------*
2 *  parseStringTest.c  *
3 *                                                                           *
4 *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
5 *                                                                           *
6 *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7 *  you may not use this file except in compliance with the License.         *
8 *                                                                           *
9 *  You may obtain a copy of the License at                                  *
10 *      http://www.apache.org/licenses/LICENSE-2.0                           *
11 *                                                                           *
12 *  Unless required by applicable law or agreed to in writing, software      *
13 *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 *  See the License for the specific language governing permissions and      *
16 *  limitations under the License.                                           *
17 *                                                                           *
18 *---------------------------------------------------------------------------*/
19
20
21
22#include "pstdio.h"
23#include "pmemory.h"
24#include "plog.h"
25
26
27#include "HashMap.h"
28#include "SR_Grammar.h"
29#include "SR_SemanticResult.h"
30#include "ESR_Session.h"
31#include "ESR_Locale.h"
32#include "LCHAR.h"
33
34#include "PFileSystem.h"
35#include "PANSIFileSystem.h"
36
37/* for testing RecognizerImpl.c, see below */
38#include"buildopt.h"
39#include"setting.h"
40#include"srec_sizes.h"
41#include"SR_GrammarImpl.h"
42
43/* defines */
44#define MAX_LINE_LENGTH 256
45#define MAX_STR_LENGTH  512
46#define MAX_SEM_RESULTS   3
47#define MAX_KEYS         30
48
49/* protos */
50ESR_ReturnCode process_single_key_line(SR_Grammar* grammar, PFile* fin, PFile* fout);
51ESR_ReturnCode process_multi_key_line(SR_Grammar* grammar, const LCHAR* rootrule, PFile* fin, PFile* fout);
52
53/* struct */
54typedef struct Opts
55{
56  int use_parse_by_string_ids;
57  int do_check_all_ids;
58}
59Opts;
60
61int usage(LCHAR* exename)
62{
63  pfprintf(PSTDOUT, "usage: %s -base <basefilename> [-in <input file>] [-out <output file>] [-itest <testfilename>]\n", exename);
64  return 1;
65}
66
67void lstr_strip_multiple_spaces(LCHAR* trans)
68{
69  char *src=trans, *dst=trans;
70  for( ;(*dst = *src)!=L('\0'); src++) {
71    if(*dst != ' ') dst++;
72    else if(src[1] != ' ') dst++;
73  }
74}
75
76/**
77 * Display the Semantic Result
78 */
79void display_results(SR_SemanticResult *result, PFile* fout)
80{
81  size_t i, size, len;
82  LCHAR* keys[MAX_KEYS]; /* array of pointers to strings */
83  LCHAR  value[MAX_STR_LENGTH];
84  ESR_ReturnCode rc;
85
86  size = MAX_KEYS;
87  rc = result->getKeyList(result, (LCHAR**) & keys, &size); /* get the key list */
88  if (rc == ESR_SUCCESS)
89  {
90    for (i = 0; i < size; i++)
91    {
92      len = MAX_STR_LENGTH;
93      if ((rc = result->getValue(result, keys[i], value, &len)) == ESR_SUCCESS)
94        pfprintf(fout, "{%s : %s}\n", keys[i], value);
95      else
96        pfprintf(fout, "Error: %s\n", ESR_rc2str(rc));
97    }
98    pfprintf(fout, "--Done--\n");
99  }
100  else
101    pfprintf(fout, "Error: %s\n", ESR_rc2str(rc));
102}
103
104ESR_ReturnCode Parse(SR_Grammar* grammar, LCHAR* trans, PFile* fout, Opts* opts)
105{
106  ESR_ReturnCode rc = ESR_SUCCESS;
107  size_t i, result_count, key_count;
108  SR_SemanticResult* semanticResults[MAX_SEM_RESULTS];
109  wordID wordIDs[32], *wordIDptr;
110  SR_GrammarImpl* pgrammar = (SR_GrammarImpl*)grammar;
111  wordmap* wmap;
112
113  if (opts->do_check_all_ids)
114  {
115    wordID id;
116    Opts myopts;
117    memcpy(&myopts, opts, sizeof(myopts));
118    myopts.do_check_all_ids = 0;
119    wmap = pgrammar->syntax->synx->olabels;
120    /* start at word 4 because "eps, -pau- -pau2- @root */
121    for (id = 4; id < wmap->num_words; id++)
122    {
123      trans = wmap->words[id];
124      Parse(grammar, trans, fout, &myopts);
125    }
126    return 0;
127  }
128
129  result_count = MAX_SEM_RESULTS; /* initially not greater than MAX */
130  for (i = 0; i < result_count; i++)
131    SR_SemanticResultCreate(&semanticResults[i]); /* create the result holders */
132  lstrtrim(trans);
133  /* check for multiple space separators! */
134  lstr_strip_multiple_spaces(trans);
135
136  if (!opts->use_parse_by_string_ids)
137  {
138    rc = grammar->checkParse(grammar, trans, semanticResults, (size_t*) & result_count);
139  }
140  else
141  {
142    char copy_of_trans[256], *p;
143    strcpy(copy_of_trans, trans);
144    wmap = pgrammar->syntax->synx->olabels;
145    wordIDs[0] = wordIDs[1] = MAXwordID;
146    wordIDptr = &wordIDs[0];
147    for (p = strtok(copy_of_trans, " "); p; p = strtok(NULL, " "))
148    {
149      for (i = 0; i < wmap->num_words; i++)
150        if (!strcmp(wmap->words[i], p))
151        {
152          *wordIDptr++ = (wordID)i;
153          break;
154        }
155      if (i == wmap->num_words)
156      {
157        wordIDs[0] = MAXwordID;
158        break;
159      }
160    }
161    *wordIDptr++ = MAXwordID;
162
163    /* printf("wordids:");
164       for(wordIDptr=&wordIDs[0]; *wordIDptr!=MAXwordID; wordIDptr++)
165       printf(" %d/%s", *wordIDptr, wmap->words[*wordIDptr]);
166       printf("\n"); */
167
168    if (wordIDs[0] == MAXwordID)
169    {
170      result_count = 0;
171      rc = ESR_SUCCESS;
172    }
173    else
174    {
175      rc = pgrammar->semproc->flush(pgrammar->semproc);
176      rc = pgrammar->semproc->setParam(pgrammar->semproc, L("literal"), trans);
177      rc = pgrammar->semproc->checkParseByWordID(pgrammar->semproc, pgrammar->semgraph,
178           wordIDs, semanticResults, &result_count);
179    }
180  }
181  if (rc != ESR_SUCCESS)
182  {
183    pfprintf(fout, "error (%s)\n\n", trans);
184    return rc;
185  }
186
187  if (result_count < 1)
188  {
189    pfprintf(fout, "no parse (%s)\n\n", trans);
190  }
191  else
192  {
193    key_count = 0xffff;
194    rc = SR_SemanticResultGetKeyCount(semanticResults[0], &key_count);
195    pfprintf(fout, "parse ok (%d results) (%s) (%d)\n", result_count, trans, key_count);
196    for (i = 0; i < result_count; i++)
197      display_results(semanticResults[i], fout);
198
199    for (i = 0; i < MAX_SEM_RESULTS; i++)
200    {
201      rc = semanticResults[i]->destroy(semanticResults[i]);
202      if (rc != ESR_SUCCESS)
203        return rc;
204    }
205  }
206  return ESR_SUCCESS;
207}
208
209/* tests the transcription against the grammar and then decided based on what was expected of the test
210whether or not is it considered a pass or fail */
211ESR_ReturnCode ParseTestSet(SR_Grammar* grammar, LCHAR* trans, LCHAR* key, LCHAR* ref, LCHAR* result, PFile* fout)
212{
213  size_t len;
214  ESR_ReturnCode rc;
215  int i, result_count;
216  SR_SemanticResult* semanticResults[MAX_SEM_RESULTS];
217  LCHAR  value[MAX_STR_LENGTH];
218
219  result_count = MAX_SEM_RESULTS;
220  for (i = 0; i < result_count; i++)
221    SR_SemanticResultCreate(&semanticResults[i]);
222
223  lstrtrim(trans);
224  /* check for multiple space separators! */
225  lstr_strip_multiple_spaces(trans);
226
227  pfprintf(fout, "checking (%s) ref(%s) res(%s)\n", trans, ref, result);
228  rc = grammar->checkParse(grammar, trans, semanticResults, (size_t*) & result_count);
229  if (rc != ESR_SUCCESS)
230    return rc;
231
232  /*result file will contain
233  transcription | key | reference | result | PASSESD/FAILED */
234
235  if (result_count < 1) /*failed to parse, but this could still be a pass if you expected a failure*/
236  {
237    pfprintf(fout, "NO PARSE FOR: %s|%s|%s|  |", trans, key, ref);
238    if (strcmp("FAIL", result) == 0)
239      pfprintf(fout, "PASSED (%s)\n", trans);
240    else
241      pfprintf(fout, "FAILED (%s)\n", trans);
242  }
243  else /*parsed, look at what was expected, what was returned and which of PASS/FAIL is expected */
244  {
245    for (i = 0; i < result_count; i++)
246    {
247      len = MAX_STR_LENGTH;
248      if ((rc = semanticResults[i]->getValue(semanticResults[i], key, value, &len)) == ESR_SUCCESS)
249      {
250        pfprintf(fout, "%s|%s|%s|%s|", trans, key, ref, value);
251
252        if (strcmp(value, ref) == 0 && strcmp("PASS", result) == 0)
253          pfprintf(fout, "PASSED\n");
254        else
255          pfprintf(fout, "FAILED\n");
256      }
257      else
258      {
259        pfprintf(fout, "ERROR: %s, while checking key='%s'\n", ESR_rc2str(rc), key);
260      }
261    }
262
263    /*deallocate semantic results*/
264    for (i = 0; i < MAX_SEM_RESULTS; i++)
265    {
266      rc = semanticResults[i]->destroy(semanticResults[i]);
267      if (rc != ESR_SUCCESS)
268        return rc;
269    }
270  }
271  return ESR_SUCCESS;
272}
273
274int main(int argc, char **argv)
275{
276  LCHAR trans[MAX_LINE_LENGTH];
277  SR_Grammar* grammar = NULL;
278  ESR_ReturnCode rc;
279  LCHAR base[P_PATH_MAX] = L("");
280  LCHAR infilename[P_PATH_MAX] = L("");
281  LCHAR inRTfilename[P_PATH_MAX] = L("");
282  LCHAR outfilename[P_PATH_MAX] = L("");
283  PFile *fin = NULL, *fout = NULL;
284  int i;
285  LCHAR *rootrule = L("myRoot"), *p;
286  Opts opts = { 0, 0 };
287
288  /*
289   * Initialize portable library.
290   */
291  CHKLOG(rc, PMemInit());
292
293  fin = PSTDIN;
294  fout = PSTDOUT;
295
296  if (argc < 3)
297  {
298    usage(argv[0]);
299    exit(EXIT_FAILURE);
300  }
301  for (i = 1; i < argc; ++i)
302  {
303    if (!LSTRCMP(argv[i], L("-base")))
304    {
305      ++i;
306      LSTRCPY(base, argv[i]);
307    }
308    else if (!LSTRCMP(argv[i], L("-in")))
309    {
310      ++i;
311      LSTRCPY(infilename, argv[i]);
312    }
313    else if (!LSTRCMP(argv[i], L("-out")))
314    {
315      ++i;
316      LSTRCPY(outfilename, argv[i]);
317    }
318    else if (!LSTRCMP(argv[i], L("-itest")))
319    {
320      ++i;
321      LSTRCPY(inRTfilename, argv[i]);
322    }
323    else if (!LSTRCMP(argv[i], L("-ids")))
324    {
325      opts.use_parse_by_string_ids = 1;
326    }
327    else if (!LSTRCMP(argv[i], L("-allids")))
328    {
329      opts.do_check_all_ids = 1;
330      opts.use_parse_by_string_ids = 1;
331    }
332    else
333      return usage(argv[0]);
334  }
335
336  CHK(rc, PLogInit(NULL, 0));
337
338  rc = SR_GrammarLoad(base, &grammar);
339  if (rc != ESR_SUCCESS)
340    goto CLEANUP;
341
342  if (*outfilename)
343  {
344    if ((fout = pfopen(outfilename, "w")) == NULL)
345    {
346      pfprintf(PSTDOUT, "Could not open file: %s\n", outfilename);
347      rc = 1;
348      goto CLEANUP;
349    }
350  }
351
352  if (opts.do_check_all_ids)
353  {
354    rc = Parse(grammar, NULL, fout, &opts);
355  }
356  else if (*infilename)
357  {
358    if (LSTRCMP(infilename, "-") == 0)
359    {
360      fin = PSTDIN;
361    }
362    else if ((fin = pfopen(infilename, "r")) == NULL)
363    {
364      pfprintf(PSTDOUT, "Could not open file: %s\n", infilename);
365      rc = 1;
366      goto CLEANUP;
367    }
368    for (;;)
369    {
370      if (pfgets(trans, MAX_LINE_LENGTH, fin) == NULL)
371      {
372        if (!pfeof(fin))
373        {
374          rc = ESR_READ_ERROR;
375          PLogError(ESR_rc2str(rc));
376        }
377        break;
378      }
379      if (trans[0] == '#') continue;
380      lstrtrim(trans);
381      /* check for multiple space separators! */
382      lstr_strip_multiple_spaces(trans);
383      pfprintf(fout, "Transcription: %s\n", trans);
384      if ((rc = Parse(grammar, trans, fout, &opts)) != ESR_SUCCESS)
385        goto CLEANUP;
386      pfprintf(fout, "\n");
387    }
388  }
389  else if (*inRTfilename) /*using a test file*/
390  {
391    if ((fin = pfopen(inRTfilename, "r")) == NULL)
392    {
393      pfprintf(PSTDOUT, "Could not open test file: %s\n", inRTfilename);
394      rc = 1;
395      goto CLEANUP;
396    }
397
398    /*read through the test file parsing it into the variables
399     FORMAT: "the transciption" key "value"
400    */
401    while (ESR_TRUE)
402    {
403      if (0) rc = process_single_key_line(grammar, fin, fout);
404      else  rc = process_multi_key_line(grammar, rootrule, fin, fout);
405      if (rc == ESR_READ_ERROR)
406      {
407        rc = ESR_SUCCESS;
408        break;
409      }
410    }
411  }
412  else
413  {
414    /* get some transcriptions from the user */
415    pfprintf(PSTDOUT, "\nSemantic Parser Test Program for esr (Nuance Communicaitions, 2007)\n");
416    pfprintf(PSTDOUT, "'qqq' to quit\n");
417
418    while (ESR_TRUE)
419    {
420      pfprintf(PSTDOUT, "> ");
421
422      if (!fgets(trans, MAX_LINE_LENGTH, PSTDIN))
423        break;
424      // remove trailing whitespace
425      for(p=&trans[0]; *p!=0 && *p!='\n' && *p!='\r'; p++) {}
426      *p=0;
427
428      if (!LSTRCMP("qqq", trans))
429        break;
430      else
431        if ((rc = Parse(grammar, trans, fout, &opts)) != ESR_SUCCESS)
432          goto CLEANUP;
433    }
434  }
435CLEANUP:
436  if (fin && fin != PSTDIN)
437    pfclose(fin);
438  if (fout && fout != PSTDOUT)
439    pfclose(fout);
440  if (grammar) grammar->destroy(grammar);
441  PLogShutdown();
442/*  PANSIFileSystemDestroy();
443  PFileSystemDestroy();*/
444  PMemShutdown();
445  return rc;
446}
447
448ESR_ReturnCode process_single_key_line(SR_Grammar* grammar, PFile* fin, PFile* fout)
449{
450  LCHAR* position;
451  LCHAR line[MAX_LINE_LENGTH];
452  LCHAR trans[MAX_LINE_LENGTH];
453  LCHAR key[MAX_LINE_LENGTH];
454  LCHAR refValue[MAX_LINE_LENGTH];
455  LCHAR result[MAX_LINE_LENGTH];
456  ESR_ReturnCode rc;
457
458  position = pfgets(line, MAX_LINE_LENGTH, fin);
459  if (line[0] == '#')
460    return ESR_SUCCESS;
461  if (!strncmp(line, "__END__", 7))
462    return ESR_READ_ERROR;
463  if (position == NULL)
464  {
465    if (pfeof(fin))
466      return ESR_READ_ERROR;
467    else
468    {
469      PLogError(L("ESR_READ_ERROR"));
470      return ESR_READ_ERROR;
471    }
472  }
473
474  //get the transcription to test
475  if ((position = strtok(line, "\"")) != NULL)
476  {
477    LSTRCPY(trans, position);
478  }
479  else
480  {
481    pfprintf(fout, "INVALID FORMAT for input line 1 \n");
482    rc = ESR_INVALID_ARGUMENT;
483    goto CLEANUP;
484  }
485
486  //get the key (meaning)
487  if ((position = strtok(NULL, " \t")) != NULL)
488  {
489    LSTRCPY(key, position);
490  }
491  else
492  {
493    pfprintf(fout, "INVALID FORMAT for input line 2\n");
494    rc = ESR_INVALID_ARGUMENT;
495    goto CLEANUP;
496  }
497
498  //get the expected return string
499  if ((position = strtok(NULL, "\"")) != NULL)
500  {
501    LSTRCPY(refValue, position);
502  }
503  else
504  {
505    pfprintf(fout, "INVALID FORMAT for input line 3\n");
506    rc = ESR_INVALID_ARGUMENT;
507    goto CLEANUP;
508  }
509
510  //get the expected result PASS/FAIL
511  //there is no need to write PASS, if nothing is written PASS is assumed
512  if ((position = strtok(NULL, " \t\r\n\"")) != NULL)
513  {
514    LSTRCPY(result, position);
515
516    if (strcmp(result, "PASS") != 0 && strcmp(result, "FAIL") != 0)
517    {
518      pfprintf(fout, "INVALID FORMAT for input line, use either PASS or FAIL\n");
519      rc = ESR_INVALID_ARGUMENT;
520      goto CLEANUP;
521    }
522
523    if ((rc = ParseTestSet(grammar, trans, key, refValue, result, fout)) != ESR_SUCCESS)
524      goto CLEANUP;
525  }
526  else
527  {
528    if ((rc = ParseTestSet(grammar, trans, key, refValue, "PASS", fout)) != ESR_SUCCESS)
529      goto CLEANUP;
530  }
531  rc = ESR_SUCCESS;
532CLEANUP:
533  return rc;
534}
535
536ESR_ReturnCode process_multi_key_line(SR_Grammar* grammar, const LCHAR* rootrule, PFile* fin, PFile* fout)
537{
538  LCHAR *position, *p;
539  LCHAR line[MAX_LINE_LENGTH];
540  LCHAR trans[MAX_LINE_LENGTH];
541  LCHAR keyvals[MAX_LINE_LENGTH];
542  ESR_ReturnCode rc;
543  SR_SemanticResult* semanticResults[MAX_SEM_RESULTS];
544  LCHAR refkey[MAX_LINE_LENGTH];
545  LCHAR refval[MAX_LINE_LENGTH], value[MAX_STR_LENGTH];
546  size_t i, j, len;
547  size_t result_count;
548
549  position = pfgets(line, MAX_LINE_LENGTH, fin);
550  if (line[0] == '#')
551    return ESR_SUCCESS;
552  if (!strncmp(line, "__END__", 7))
553    return ESR_READ_ERROR;
554  if (position == NULL)
555  {
556    if (pfeof(fin))
557      return ESR_READ_ERROR;
558    else
559    {
560      PLogError(L("ESR_READ_ERROR"));
561      return ESR_READ_ERROR;
562    }
563  }
564
565  /* we're trying to parse
566    Hello there : BONJOUR
567   */
568  p = strtok(line, ":");
569  LSTRCPY(trans, p);
570  /* strip trailing spaces */
571  for (len = strlen(trans); len > 0 && trans[len-1] == ' '; len--)
572    trans[len-1] = 0;
573
574  p = strtok(NULL, "\n\r");
575  /* strip leading spaces */
576  while (*p == ' ' || *p == '\t')  p++;
577  LSTRCPY(keyvals, p);
578
579  result_count = MAX_SEM_RESULTS;
580  for (i = 0; i < result_count; i++)
581    SR_SemanticResultCreate(&semanticResults[i]);
582
583  /* pfprintf(fout,"checking (%s) ref(%s)\n", trans, keyvals); */
584  rc = grammar->checkParse(grammar, trans, semanticResults, (size_t*) & result_count);
585  if (rc != ESR_SUCCESS)
586    return rc;
587
588  /*result file will contain
589  transcription | key | reference | result | PASSESD/FAILED */
590
591  if (result_count < 1) /*failed to parse, but this could still be a pass if you expected a failure*/
592  {
593    pfprintf(fout, "%s|%s|  |", trans, keyvals);
594    if (!strcmp("FAIL", keyvals) || !strcmp(keyvals, "-"))
595      pfprintf(fout, "PASSED\n");
596    else
597      pfprintf(fout, "FAILED\n");
598  }
599  else /*parsed, look at what was expected, what was returned and which of PASS/FAIL is expected */
600  {
601    size_t size, len;
602    LCHAR* keys_available[MAX_KEYS]; /* array of pointers to strings */
603    size = MAX_KEYS;
604    rc = semanticResults[0]->getKeyList(semanticResults[0], (LCHAR**) & keys_available, &size);
605
606    for (p = strtok(keyvals, ";"); p; p = strtok(NULL, ";"))
607    {
608      sprintf(refkey, "%s.%s", rootrule, p);
609      p = strchr(refkey, '=');
610      assert(p);
611      *p = 0;
612      p++;
613      if (*p == '\'') p++;
614      LSTRCPY(refval, p);
615      if (refval[ strlen(refval)-1] == '\'') refval[strlen(refval)-1] = 0;
616
617      for (i = 0; i < result_count; i++)
618      {
619        len = MAX_STR_LENGTH;
620        for (j = 0; j < size; j++)
621          if (!strcmp(keys_available[j], refkey)) break;
622        if (j < size)
623          rc = semanticResults[i]->getValue(semanticResults[i], refkey, value, &len);
624        else
625        {
626          LSTRCPY(value, "<NOSUCHKEY>");
627          rc = ESR_NO_MATCH_ERROR;
628        }
629        pfprintf(fout, "%s|%s|%s|%s|", trans, refkey, refval, value);
630        if (strcmp(value, refval) == 0)
631          pfprintf(fout, "PASSED\n");
632        else
633          pfprintf(fout, "FAILED\n");
634      }
635    }
636
637    /*deallocate semantic results*/
638    for (i = 0; i < MAX_SEM_RESULTS; i++)
639    {
640      rc = semanticResults[i]->destroy(semanticResults[i]);
641      if (rc != ESR_SUCCESS)
642        PLogError("%s while destroying", ESR_rc2str(rc));
643    }
644  }
645  return ESR_SUCCESS;
646}
647
648