1/*---------------------------------------------------------------------------*
2 *  LexicalAnalyzer.c  *
3 *                                                                           *
4 *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
5 *                                                                           *
6 *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7 *  you may not use this file except in compliance with the License.         *
8 *                                                                           *
9 *  You may obtain a copy of the License at                                  *
10 *      http://www.apache.org/licenses/LICENSE-2.0                           *
11 *                                                                           *
12 *  Unless required by applicable law or agreed to in writing, software      *
13 *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 *  See the License for the specific language governing permissions and      *
16 *  limitations under the License.                                           *
17 *                                                                           *
18 *---------------------------------------------------------------------------*/
19
20#include "SR_LexicalAnalyzer.h"
21#include "plog.h"
22#include "pmemory.h"
23
24
25static const char* MTAG = __FILE__;
26
27ESR_BOOL isIdentifierChar(LCHAR p);
28
29ESR_ReturnCode LA_Init(LexicalAnalyzer** self)
30{
31  LexicalAnalyzer* Interface;
32
33  if (self == NULL)
34  {
35    PLogError(L("ESR_INVALID_ARGUMENT"));
36    return ESR_INVALID_ARGUMENT;
37  }
38
39  Interface = NEW(LexicalAnalyzer, MTAG);
40  if (Interface == NULL)
41  {
42    PLogError(L("ESR_OUT_OF_MEMORY"));
43    return ESR_OUT_OF_MEMORY;
44  }
45  *self = Interface;
46  return ESR_SUCCESS;
47}
48
49ESR_ReturnCode LA_Analyze(LexicalAnalyzer *lex, LCHAR *script)
50{
51  if (lex == NULL || script == NULL)
52  {
53    PLogError(L("ESR_INVALID_ARGUMENT"));
54    return ESR_INVALID_ARGUMENT;
55  }
56
57  /* point to the first char */
58  lex->nextToken = lex->script = script;
59  return ESR_SUCCESS;
60}
61
62ESR_ReturnCode LA_Free(LexicalAnalyzer *lex)
63{
64  if (lex == NULL)
65  {
66    PLogError(L("ESR_INVALID_ARGUMENT"));
67    return ESR_INVALID_ARGUMENT;
68  }
69  FREE(lex);
70  return ESR_SUCCESS;
71}
72
73
74ESR_ReturnCode LA_nextToken(LexicalAnalyzer *lex, LCHAR *tokenBuf, size_t* tokenLen)
75{
76  LCHAR *p;
77  LCHAR *q;
78
79  while (LISSPACE(*lex->nextToken))
80    ++lex->nextToken;
81
82  switch (*lex->nextToken)
83  {
84    case OP_ASSIGN:
85    case OP_CONCAT:
86    case LBRACKET:
87    case PARAM_DELIM:
88    case RBRACKET:
89    case OP_CONDITION_IFTRUE:
90    case OP_CONDITION_ELSE:
91    case EO_STATEMENT:
92      tokenBuf[0] = *lex->nextToken;
93      tokenBuf[1] = EO_STRING;
94      *tokenLen = 1;
95      break;
96    case STRING_DELIM:
97      p = lex->nextToken;
98      q = tokenBuf;
99      *q++ = *p++;
100/* finds the end of the constant string also protects against going past end of string
101 * The parser above will handle the incomplete string. SteveR
102 */
103      while ( ( *p != STRING_DELIM ) && ( *p != '\0' ) )
104      {
105        if (*p == ESC_CHAR)
106          *q++ = *p++;
107        *q++ = *p++;
108      }
109
110      *q++ = *p++;
111      *tokenLen = q - tokenBuf;
112      tokenBuf[*tokenLen] = EO_STRING; /* make sure its there */
113      break;
114    default:
115      p = lex->nextToken;
116      while (isIdentifierChar(*p))  /* finds the end of the name of this identifier */
117        ++p;
118      *tokenLen = p - lex->nextToken;
119      LSTRNCPY(tokenBuf, lex->nextToken, *tokenLen);
120      tokenBuf[*tokenLen] = EO_STRING; /* make sure its there */
121  }
122  lex->nextToken += *tokenLen;
123  return ESR_SUCCESS;
124}
125
126/**
127 * Indicates if character is in range [a-z] or [A-Z] or [0-9] or ['.'].
128 **/
129ESR_BOOL isIdentifierChar(LCHAR p)
130{
131  return (p == DOT ||                     /* the dot */
132         p == USCORE ||                  /* the underscore */
133         (p <= L('z') && p >= L('a')) || /* lowercase alpha */
134         (p <= L('Z') && p >= L('A')) || /* uppercase alpha */
135         (p <= L('9') && p >= L('0'))) ? ESR_TRUE : ESR_FALSE;   /* numbers */
136}
137