1// Copyright 2011 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6//     * Redistributions of source code must retain the above copyright
7//       notice, this list of conditions and the following disclaimer.
8//     * Redistributions in binary form must reproduce the above
9//       copyright notice, this list of conditions and the following
10//       disclaimer in the documentation and/or other materials provided
11//       with the distribution.
12//     * Neither the name of Google Inc. nor the names of its
13//       contributors may be used to endorse or promote products derived
14//       from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#ifndef V8_DATEPARSER_H_
29#define V8_DATEPARSER_H_
30
31#include "allocation.h"
32#include "char-predicates-inl.h"
33
34namespace v8 {
35namespace internal {
36
37class DateParser : public AllStatic {
38 public:
39  // Parse the string as a date. If parsing succeeds, return true after
40  // filling out the output array as follows (all integers are Smis):
41  // [0]: year
42  // [1]: month (0 = Jan, 1 = Feb, ...)
43  // [2]: day
44  // [3]: hour
45  // [4]: minute
46  // [5]: second
47  // [6]: millisecond
48  // [7]: UTC offset in seconds, or null value if no timezone specified
49  // If parsing fails, return false (content of output array is not defined).
50  template <typename Char>
51  static bool Parse(Vector<Char> str, FixedArray* output, UnicodeCache* cache);
52
53  enum {
54    YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MILLISECOND, UTC_OFFSET, OUTPUT_SIZE
55  };
56
57 private:
58  // Range testing
59  static inline bool Between(int x, int lo, int hi) {
60    return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo);
61  }
62
63  // Indicates a missing value.
64  static const int kNone = kMaxInt;
65
66  // Maximal number of digits used to build the value of a numeral.
67  // Remaining digits are ignored.
68  static const int kMaxSignificantDigits = 9;
69
70  // InputReader provides basic string parsing and character classification.
71  template <typename Char>
72  class InputReader BASE_EMBEDDED {
73   public:
74    InputReader(UnicodeCache* unicode_cache, Vector<Char> s)
75        : index_(0),
76          buffer_(s),
77          unicode_cache_(unicode_cache) {
78      Next();
79    }
80
81    int position() { return index_; }
82
83    // Advance to the next character of the string.
84    void Next() {
85      ch_ = (index_ < buffer_.length()) ? buffer_[index_] : 0;
86      index_++;
87    }
88
89    // Read a string of digits as an unsigned number. Cap value at
90    // kMaxSignificantDigits, but skip remaining digits if the numeral
91    // is longer.
92    int ReadUnsignedNumeral() {
93      int n = 0;
94      int i = 0;
95      while (IsAsciiDigit()) {
96        if (i < kMaxSignificantDigits) n = n * 10 + ch_ - '0';
97        i++;
98        Next();
99      }
100      return n;
101    }
102
103    // Read a word (sequence of chars. >= 'A'), fill the given buffer with a
104    // lower-case prefix, and pad any remainder of the buffer with zeroes.
105    // Return word length.
106    int ReadWord(uint32_t* prefix, int prefix_size) {
107      int len;
108      for (len = 0; IsAsciiAlphaOrAbove(); Next(), len++) {
109        if (len < prefix_size) prefix[len] = AsciiAlphaToLower(ch_);
110      }
111      for (int i = len; i < prefix_size; i++) prefix[i] = 0;
112      return len;
113    }
114
115    // The skip methods return whether they actually skipped something.
116    bool Skip(uint32_t c) {
117      if (ch_ == c) {
118        Next();
119        return true;
120      }
121      return false;
122    }
123
124    bool SkipWhiteSpace() {
125      if (unicode_cache_->IsWhiteSpace(ch_)) {
126        Next();
127        return true;
128      }
129      return false;
130    }
131
132    bool SkipParentheses() {
133      if (ch_ != '(') return false;
134      int balance = 0;
135      do {
136        if (ch_ == ')') --balance;
137        else if (ch_ == '(') ++balance;
138        Next();
139      } while (balance > 0 && ch_);
140      return true;
141    }
142
143    // Character testing/classification. Non-ASCII digits are not supported.
144    bool Is(uint32_t c) const { return ch_ == c; }
145    bool IsEnd() const { return ch_ == 0; }
146    bool IsAsciiDigit() const { return IsDecimalDigit(ch_); }
147    bool IsAsciiAlphaOrAbove() const { return ch_ >= 'A'; }
148    bool IsAsciiSign() const { return ch_ == '+' || ch_ == '-'; }
149
150    // Return 1 for '+' and -1 for '-'.
151    int GetAsciiSignValue() const { return 44 - static_cast<int>(ch_); }
152
153   private:
154    int index_;
155    Vector<Char> buffer_;
156    uint32_t ch_;
157    UnicodeCache* unicode_cache_;
158  };
159
160  enum KeywordType {
161      INVALID, MONTH_NAME, TIME_ZONE_NAME, TIME_SEPARATOR, AM_PM
162  };
163
164  struct DateToken {
165   public:
166    bool IsInvalid() { return tag_ == kInvalidTokenTag; }
167    bool IsUnknown() { return tag_ == kUnknownTokenTag; }
168    bool IsNumber() { return tag_ == kNumberTag; }
169    bool IsSymbol() { return tag_ == kSymbolTag; }
170    bool IsWhiteSpace() { return tag_ == kWhiteSpaceTag; }
171    bool IsEndOfInput() { return tag_ == kEndOfInputTag; }
172    bool IsKeyword() { return tag_ >= kKeywordTagStart; }
173
174    int length() { return length_; }
175
176    int number() {
177      ASSERT(IsNumber());
178      return value_;
179    }
180    KeywordType keyword_type() {
181      ASSERT(IsKeyword());
182      return static_cast<KeywordType>(tag_);
183    }
184    int keyword_value() {
185      ASSERT(IsKeyword());
186      return value_;
187    }
188    char symbol() {
189      ASSERT(IsSymbol());
190      return static_cast<char>(value_);
191    }
192    bool IsSymbol(char symbol) {
193      return IsSymbol() && this->symbol() == symbol;
194    }
195    bool IsKeywordType(KeywordType tag) {
196      return tag_ == tag;
197    }
198    bool IsFixedLengthNumber(int length) {
199      return IsNumber() && length_ == length;
200    }
201    bool IsAsciiSign() {
202      return tag_ == kSymbolTag && (value_ == '-' || value_ == '+');
203    }
204    int ascii_sign() {
205      ASSERT(IsAsciiSign());
206      return 44 - value_;
207    }
208    bool IsKeywordZ() {
209      return IsKeywordType(TIME_ZONE_NAME) && length_ == 1 && value_ == 0;
210    }
211    bool IsUnknown(int character) {
212      return IsUnknown() && value_ == character;
213    }
214    // Factory functions.
215    static DateToken Keyword(KeywordType tag, int value, int length) {
216      return DateToken(tag, length, value);
217    }
218    static DateToken Number(int value, int length) {
219      return DateToken(kNumberTag, length, value);
220    }
221    static DateToken Symbol(char symbol) {
222      return DateToken(kSymbolTag, 1, symbol);
223    }
224    static DateToken EndOfInput() {
225      return DateToken(kEndOfInputTag, 0, -1);
226    }
227    static DateToken WhiteSpace(int length) {
228      return DateToken(kWhiteSpaceTag, length, -1);
229    }
230    static DateToken Unknown() {
231      return DateToken(kUnknownTokenTag, 1, -1);
232    }
233    static DateToken Invalid() {
234      return DateToken(kInvalidTokenTag, 0, -1);
235    }
236
237   private:
238    enum TagType {
239      kInvalidTokenTag = -6,
240      kUnknownTokenTag = -5,
241      kWhiteSpaceTag = -4,
242      kNumberTag = -3,
243      kSymbolTag = -2,
244      kEndOfInputTag = -1,
245      kKeywordTagStart = 0
246    };
247    DateToken(int tag, int length, int value)
248        : tag_(tag),
249          length_(length),
250          value_(value) { }
251
252    int tag_;
253    int length_;  // Number of characters.
254    int value_;
255  };
256
257  template <typename Char>
258  class DateStringTokenizer {
259   public:
260    explicit DateStringTokenizer(InputReader<Char>* in)
261        : in_(in), next_(Scan()) { }
262    DateToken Next() {
263      DateToken result = next_;
264      next_ = Scan();
265      return result;
266    }
267
268    DateToken Peek() {
269      return next_;
270    }
271    bool SkipSymbol(char symbol) {
272      if (next_.IsSymbol(symbol)) {
273        next_ = Scan();
274        return true;
275      }
276      return false;
277    }
278
279   private:
280    DateToken Scan();
281
282    InputReader<Char>* in_;
283    DateToken next_;
284  };
285
286  static int ReadMilliseconds(DateToken number);
287
288  // KeywordTable maps names of months, time zones, am/pm to numbers.
289  class KeywordTable : public AllStatic {
290   public:
291    // Look up a word in the keyword table and return an index.
292    // 'pre' contains a prefix of the word, zero-padded to size kPrefixLength
293    // and 'len' is the word length.
294    static int Lookup(const uint32_t* pre, int len);
295    // Get the type of the keyword at index i.
296    static KeywordType GetType(int i) {
297      return static_cast<KeywordType>(array[i][kTypeOffset]);
298    }
299    // Get the value of the keyword at index i.
300    static int GetValue(int i) { return array[i][kValueOffset]; }
301
302    static const int kPrefixLength = 3;
303    static const int kTypeOffset = kPrefixLength;
304    static const int kValueOffset = kTypeOffset + 1;
305    static const int kEntrySize = kValueOffset + 1;
306    static const int8_t array[][kEntrySize];
307  };
308
309  class TimeZoneComposer BASE_EMBEDDED {
310   public:
311    TimeZoneComposer() : sign_(kNone), hour_(kNone), minute_(kNone) {}
312    void Set(int offset_in_hours) {
313      sign_ = offset_in_hours < 0 ? -1 : 1;
314      hour_ = offset_in_hours * sign_;
315      minute_ = 0;
316    }
317    void SetSign(int sign) { sign_ = sign < 0 ? -1 : 1; }
318    void SetAbsoluteHour(int hour) { hour_ = hour; }
319    void SetAbsoluteMinute(int minute) { minute_ = minute; }
320    bool IsExpecting(int n) const {
321      return hour_ != kNone && minute_ == kNone && TimeComposer::IsMinute(n);
322    }
323    bool IsUTC() const { return hour_ == 0 && minute_ == 0; }
324    bool Write(FixedArray* output);
325    bool IsEmpty() { return hour_ == kNone; }
326   private:
327    int sign_;
328    int hour_;
329    int minute_;
330  };
331
332  class TimeComposer BASE_EMBEDDED {
333   public:
334    TimeComposer() : index_(0), hour_offset_(kNone) {}
335    bool IsEmpty() const { return index_ == 0; }
336    bool IsExpecting(int n) const {
337      return (index_ == 1 && IsMinute(n)) ||
338             (index_ == 2 && IsSecond(n)) ||
339             (index_ == 3 && IsMillisecond(n));
340    }
341    bool Add(int n) {
342      return index_ < kSize ? (comp_[index_++] = n, true) : false;
343    }
344    bool AddFinal(int n) {
345      if (!Add(n)) return false;
346      while (index_ < kSize) comp_[index_++] = 0;
347      return true;
348    }
349    void SetHourOffset(int n) { hour_offset_ = n; }
350    bool Write(FixedArray* output);
351
352    static bool IsMinute(int x) { return Between(x, 0, 59); }
353    static bool IsHour(int x) { return Between(x, 0, 23); }
354    static bool IsSecond(int x) { return Between(x, 0, 59); }
355
356   private:
357    static bool IsHour12(int x) { return Between(x, 0, 12); }
358    static bool IsMillisecond(int x) { return Between(x, 0, 999); }
359
360    static const int kSize = 4;
361    int comp_[kSize];
362    int index_;
363    int hour_offset_;
364  };
365
366  class DayComposer BASE_EMBEDDED {
367   public:
368    DayComposer() : index_(0), named_month_(kNone), is_iso_date_(false) {}
369    bool IsEmpty() const { return index_ == 0; }
370    bool Add(int n) {
371      if (index_ < kSize) {
372        comp_[index_] = n;
373        index_++;
374        return true;
375      }
376      return false;
377    }
378    void SetNamedMonth(int n) { named_month_ = n; }
379    bool Write(FixedArray* output);
380    void set_iso_date() { is_iso_date_ = true; }
381    static bool IsMonth(int x) { return Between(x, 1, 12); }
382    static bool IsDay(int x) { return Between(x, 1, 31); }
383
384   private:
385    static const int kSize = 3;
386    int comp_[kSize];
387    int index_;
388    int named_month_;
389    // If set, ensures that data is always parsed in year-month-date order.
390    bool is_iso_date_;
391  };
392
393  // Tries to parse an ES5 Date Time String. Returns the next token
394  // to continue with in the legacy date string parser. If parsing is
395  // complete, returns DateToken::EndOfInput(). If terminally unsuccessful,
396  // returns DateToken::Invalid(). Otherwise parsing continues in the
397  // legacy parser.
398  template <typename Char>
399  static DateParser::DateToken ParseES5DateTime(
400      DateStringTokenizer<Char>* scanner,
401      DayComposer* day,
402      TimeComposer* time,
403      TimeZoneComposer* tz);
404};
405
406
407} }  // namespace v8::internal
408
409#endif  // V8_DATEPARSER_H_
410