1/*
2** 2001 September 15
3**
4** The author disclaims copyright to this source code.  In place of
5** a legal notice, here is a blessing:
6**
7**    May you do good and not evil.
8**    May you find forgiveness for yourself and forgive others.
9**    May you share freely, never taking more than you give.
10**
11*************************************************************************
12** An tokenizer for SQL
13**
14** This file contains C code that implements the sqlite3_complete() API.
15** This code used to be part of the tokenizer.c source file.  But by
16** separating it out, the code will be automatically omitted from
17** static links that do not use it.
18*/
19#include "sqliteInt.h"
20#ifndef SQLITE_OMIT_COMPLETE
21
22/*
23** This is defined in tokenize.c.  We just have to import the definition.
24*/
25#ifndef SQLITE_AMALGAMATION
26#ifdef SQLITE_ASCII
27#define IdChar(C)  ((sqlite3CtypeMap[(unsigned char)C]&0x46)!=0)
28#endif
29#ifdef SQLITE_EBCDIC
30extern const char sqlite3IsEbcdicIdChar[];
31#define IdChar(C)  (((c=C)>=0x42 && sqlite3IsEbcdicIdChar[c-0x40]))
32#endif
33#endif /* SQLITE_AMALGAMATION */
34
35
36/*
37** Token types used by the sqlite3_complete() routine.  See the header
38** comments on that procedure for additional information.
39*/
40#define tkSEMI    0
41#define tkWS      1
42#define tkOTHER   2
43#ifndef SQLITE_OMIT_TRIGGER
44#define tkEXPLAIN 3
45#define tkCREATE  4
46#define tkTEMP    5
47#define tkTRIGGER 6
48#define tkEND     7
49#endif
50
51/*
52** Return TRUE if the given SQL string ends in a semicolon.
53**
54** Special handling is require for CREATE TRIGGER statements.
55** Whenever the CREATE TRIGGER keywords are seen, the statement
56** must end with ";END;".
57**
58** This implementation uses a state machine with 8 states:
59**
60**   (0) INVALID   We have not yet seen a non-whitespace character.
61**
62**   (1) START     At the beginning or end of an SQL statement.  This routine
63**                 returns 1 if it ends in the START state and 0 if it ends
64**                 in any other state.
65**
66**   (2) NORMAL    We are in the middle of statement which ends with a single
67**                 semicolon.
68**
69**   (3) EXPLAIN   The keyword EXPLAIN has been seen at the beginning of
70**                 a statement.
71**
72**   (4) CREATE    The keyword CREATE has been seen at the beginning of a
73**                 statement, possibly preceeded by EXPLAIN and/or followed by
74**                 TEMP or TEMPORARY
75**
76**   (5) TRIGGER   We are in the middle of a trigger definition that must be
77**                 ended by a semicolon, the keyword END, and another semicolon.
78**
79**   (6) SEMI      We've seen the first semicolon in the ";END;" that occurs at
80**                 the end of a trigger definition.
81**
82**   (7) END       We've seen the ";END" of the ";END;" that occurs at the end
83**                 of a trigger difinition.
84**
85** Transitions between states above are determined by tokens extracted
86** from the input.  The following tokens are significant:
87**
88**   (0) tkSEMI      A semicolon.
89**   (1) tkWS        Whitespace.
90**   (2) tkOTHER     Any other SQL token.
91**   (3) tkEXPLAIN   The "explain" keyword.
92**   (4) tkCREATE    The "create" keyword.
93**   (5) tkTEMP      The "temp" or "temporary" keyword.
94**   (6) tkTRIGGER   The "trigger" keyword.
95**   (7) tkEND       The "end" keyword.
96**
97** Whitespace never causes a state transition and is always ignored.
98** This means that a SQL string of all whitespace is invalid.
99**
100** If we compile with SQLITE_OMIT_TRIGGER, all of the computation needed
101** to recognize the end of a trigger can be omitted.  All we have to do
102** is look for a semicolon that is not part of an string or comment.
103*/
104int sqlite3_complete(const char *zSql){
105  u8 state = 0;   /* Current state, using numbers defined in header comment */
106  u8 token;       /* Value of the next token */
107
108#ifndef SQLITE_OMIT_TRIGGER
109  /* A complex statement machine used to detect the end of a CREATE TRIGGER
110  ** statement.  This is the normal case.
111  */
112  static const u8 trans[8][8] = {
113                     /* Token:                                                */
114     /* State:       **  SEMI  WS  OTHER  EXPLAIN  CREATE  TEMP  TRIGGER  END */
115     /* 0 INVALID: */ {    1,  0,     2,       3,      4,    2,       2,   2, },
116     /* 1   START: */ {    1,  1,     2,       3,      4,    2,       2,   2, },
117     /* 2  NORMAL: */ {    1,  2,     2,       2,      2,    2,       2,   2, },
118     /* 3 EXPLAIN: */ {    1,  3,     3,       2,      4,    2,       2,   2, },
119     /* 4  CREATE: */ {    1,  4,     2,       2,      2,    4,       5,   2, },
120     /* 5 TRIGGER: */ {    6,  5,     5,       5,      5,    5,       5,   5, },
121     /* 6    SEMI: */ {    6,  6,     5,       5,      5,    5,       5,   7, },
122     /* 7     END: */ {    1,  7,     5,       5,      5,    5,       5,   5, },
123  };
124#else
125  /* If triggers are not supported by this compile then the statement machine
126  ** used to detect the end of a statement is much simplier
127  */
128  static const u8 trans[3][3] = {
129                     /* Token:           */
130     /* State:       **  SEMI  WS  OTHER */
131     /* 0 INVALID: */ {    1,  0,     2, },
132     /* 1   START: */ {    1,  1,     2, },
133     /* 2  NORMAL: */ {    1,  2,     2, },
134  };
135#endif /* SQLITE_OMIT_TRIGGER */
136
137  while( *zSql ){
138    switch( *zSql ){
139      case ';': {  /* A semicolon */
140        token = tkSEMI;
141        break;
142      }
143      case ' ':
144      case '\r':
145      case '\t':
146      case '\n':
147      case '\f': {  /* White space is ignored */
148        token = tkWS;
149        break;
150      }
151      case '/': {   /* C-style comments */
152        if( zSql[1]!='*' ){
153          token = tkOTHER;
154          break;
155        }
156        zSql += 2;
157        while( zSql[0] && (zSql[0]!='*' || zSql[1]!='/') ){ zSql++; }
158        if( zSql[0]==0 ) return 0;
159        zSql++;
160        token = tkWS;
161        break;
162      }
163      case '-': {   /* SQL-style comments from "--" to end of line */
164        if( zSql[1]!='-' ){
165          token = tkOTHER;
166          break;
167        }
168        while( *zSql && *zSql!='\n' ){ zSql++; }
169        if( *zSql==0 ) return state==1;
170        token = tkWS;
171        break;
172      }
173      case '[': {   /* Microsoft-style identifiers in [...] */
174        zSql++;
175        while( *zSql && *zSql!=']' ){ zSql++; }
176        if( *zSql==0 ) return 0;
177        token = tkOTHER;
178        break;
179      }
180      case '`':     /* Grave-accent quoted symbols used by MySQL */
181      case '"':     /* single- and double-quoted strings */
182      case '\'': {
183        int c = *zSql;
184        zSql++;
185        while( *zSql && *zSql!=c ){ zSql++; }
186        if( *zSql==0 ) return 0;
187        token = tkOTHER;
188        break;
189      }
190      default: {
191#ifdef SQLITE_EBCDIC
192        unsigned char c;
193#endif
194        if( IdChar((u8)*zSql) ){
195          /* Keywords and unquoted identifiers */
196          int nId;
197          for(nId=1; IdChar(zSql[nId]); nId++){}
198#ifdef SQLITE_OMIT_TRIGGER
199          token = tkOTHER;
200#else
201          switch( *zSql ){
202            case 'c': case 'C': {
203              if( nId==6 && sqlite3StrNICmp(zSql, "create", 6)==0 ){
204                token = tkCREATE;
205              }else{
206                token = tkOTHER;
207              }
208              break;
209            }
210            case 't': case 'T': {
211              if( nId==7 && sqlite3StrNICmp(zSql, "trigger", 7)==0 ){
212                token = tkTRIGGER;
213              }else if( nId==4 && sqlite3StrNICmp(zSql, "temp", 4)==0 ){
214                token = tkTEMP;
215              }else if( nId==9 && sqlite3StrNICmp(zSql, "temporary", 9)==0 ){
216                token = tkTEMP;
217              }else{
218                token = tkOTHER;
219              }
220              break;
221            }
222            case 'e':  case 'E': {
223              if( nId==3 && sqlite3StrNICmp(zSql, "end", 3)==0 ){
224                token = tkEND;
225              }else
226#ifndef SQLITE_OMIT_EXPLAIN
227              if( nId==7 && sqlite3StrNICmp(zSql, "explain", 7)==0 ){
228                token = tkEXPLAIN;
229              }else
230#endif
231              {
232                token = tkOTHER;
233              }
234              break;
235            }
236            default: {
237              token = tkOTHER;
238              break;
239            }
240          }
241#endif /* SQLITE_OMIT_TRIGGER */
242          zSql += nId-1;
243        }else{
244          /* Operators and special symbols */
245          token = tkOTHER;
246        }
247        break;
248      }
249    }
250    state = trans[state][token];
251    zSql++;
252  }
253  return state==1;
254}
255
256#ifndef SQLITE_OMIT_UTF16
257/*
258** This routine is the same as the sqlite3_complete() routine described
259** above, except that the parameter is required to be UTF-16 encoded, not
260** UTF-8.
261*/
262int sqlite3_complete16(const void *zSql){
263  sqlite3_value *pVal;
264  char const *zSql8;
265  int rc = SQLITE_NOMEM;
266
267#ifndef SQLITE_OMIT_AUTOINIT
268  rc = sqlite3_initialize();
269  if( rc ) return rc;
270#endif
271  pVal = sqlite3ValueNew(0);
272  sqlite3ValueSetStr(pVal, -1, zSql, SQLITE_UTF16NATIVE, SQLITE_STATIC);
273  zSql8 = sqlite3ValueText(pVal, SQLITE_UTF8);
274  if( zSql8 ){
275    rc = sqlite3_complete(zSql8);
276  }else{
277    rc = SQLITE_NOMEM;
278  }
279  sqlite3ValueFree(pVal);
280  return sqlite3ApiExit(0, rc);
281}
282#endif /* SQLITE_OMIT_UTF16 */
283#endif /* SQLITE_OMIT_COMPLETE */
284