1// Copyright 2011 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_DATEPARSER_H_
6#define V8_DATEPARSER_H_
7
8#include "src/allocation.h"
9#include "src/char-predicates-inl.h"
10
11namespace v8 {
12namespace internal {
13
14class DateParser : public AllStatic {
15 public:
16  // Parse the string as a date. If parsing succeeds, return true after
17  // filling out the output array as follows (all integers are Smis):
18  // [0]: year
19  // [1]: month (0 = Jan, 1 = Feb, ...)
20  // [2]: day
21  // [3]: hour
22  // [4]: minute
23  // [5]: second
24  // [6]: millisecond
25  // [7]: UTC offset in seconds, or null value if no timezone specified
26  // If parsing fails, return false (content of output array is not defined).
27  template <typename Char>
28  static bool Parse(Vector<Char> str, FixedArray* output, UnicodeCache* cache);
29
30  enum {
31    YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MILLISECOND, UTC_OFFSET, OUTPUT_SIZE
32  };
33
34 private:
35  // Range testing
36  static inline bool Between(int x, int lo, int hi) {
37    return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo);
38  }
39
40  // Indicates a missing value.
41  static const int kNone = kMaxInt;
42
43  // Maximal number of digits used to build the value of a numeral.
44  // Remaining digits are ignored.
45  static const int kMaxSignificantDigits = 9;
46
47  // InputReader provides basic string parsing and character classification.
48  template <typename Char>
49  class InputReader BASE_EMBEDDED {
50   public:
51    InputReader(UnicodeCache* unicode_cache, Vector<Char> s)
52        : index_(0),
53          buffer_(s),
54          unicode_cache_(unicode_cache) {
55      Next();
56    }
57
58    int position() { return index_; }
59
60    // Advance to the next character of the string.
61    void Next() {
62      ch_ = (index_ < buffer_.length()) ? buffer_[index_] : 0;
63      index_++;
64    }
65
66    // Read a string of digits as an unsigned number. Cap value at
67    // kMaxSignificantDigits, but skip remaining digits if the numeral
68    // is longer.
69    int ReadUnsignedNumeral() {
70      int n = 0;
71      int i = 0;
72      while (IsAsciiDigit()) {
73        if (i < kMaxSignificantDigits) n = n * 10 + ch_ - '0';
74        i++;
75        Next();
76      }
77      return n;
78    }
79
80    // Read a word (sequence of chars. >= 'A'), fill the given buffer with a
81    // lower-case prefix, and pad any remainder of the buffer with zeroes.
82    // Return word length.
83    int ReadWord(uint32_t* prefix, int prefix_size) {
84      int len;
85      for (len = 0; IsAsciiAlphaOrAbove(); Next(), len++) {
86        if (len < prefix_size) prefix[len] = AsciiAlphaToLower(ch_);
87      }
88      for (int i = len; i < prefix_size; i++) prefix[i] = 0;
89      return len;
90    }
91
92    // The skip methods return whether they actually skipped something.
93    bool Skip(uint32_t c) {
94      if (ch_ == c) {
95        Next();
96        return true;
97      }
98      return false;
99    }
100
101    bool SkipWhiteSpace() {
102      if (unicode_cache_->IsWhiteSpaceOrLineTerminator(ch_)) {
103        Next();
104        return true;
105      }
106      return false;
107    }
108
109    bool SkipParentheses() {
110      if (ch_ != '(') return false;
111      int balance = 0;
112      do {
113        if (ch_ == ')') --balance;
114        else if (ch_ == '(') ++balance;
115        Next();
116      } while (balance > 0 && ch_);
117      return true;
118    }
119
120    // Character testing/classification. Non-ASCII digits are not supported.
121    bool Is(uint32_t c) const { return ch_ == c; }
122    bool IsEnd() const { return ch_ == 0; }
123    bool IsAsciiDigit() const { return IsDecimalDigit(ch_); }
124    bool IsAsciiAlphaOrAbove() const { return ch_ >= 'A'; }
125    bool IsAsciiSign() const { return ch_ == '+' || ch_ == '-'; }
126
127    // Return 1 for '+' and -1 for '-'.
128    int GetAsciiSignValue() const { return 44 - static_cast<int>(ch_); }
129
130   private:
131    int index_;
132    Vector<Char> buffer_;
133    uint32_t ch_;
134    UnicodeCache* unicode_cache_;
135  };
136
137  enum KeywordType {
138      INVALID, MONTH_NAME, TIME_ZONE_NAME, TIME_SEPARATOR, AM_PM
139  };
140
141  struct DateToken {
142   public:
143    bool IsInvalid() { return tag_ == kInvalidTokenTag; }
144    bool IsUnknown() { return tag_ == kUnknownTokenTag; }
145    bool IsNumber() { return tag_ == kNumberTag; }
146    bool IsSymbol() { return tag_ == kSymbolTag; }
147    bool IsWhiteSpace() { return tag_ == kWhiteSpaceTag; }
148    bool IsEndOfInput() { return tag_ == kEndOfInputTag; }
149    bool IsKeyword() { return tag_ >= kKeywordTagStart; }
150
151    int length() { return length_; }
152
153    int number() {
154      DCHECK(IsNumber());
155      return value_;
156    }
157    KeywordType keyword_type() {
158      DCHECK(IsKeyword());
159      return static_cast<KeywordType>(tag_);
160    }
161    int keyword_value() {
162      DCHECK(IsKeyword());
163      return value_;
164    }
165    char symbol() {
166      DCHECK(IsSymbol());
167      return static_cast<char>(value_);
168    }
169    bool IsSymbol(char symbol) {
170      return IsSymbol() && this->symbol() == symbol;
171    }
172    bool IsKeywordType(KeywordType tag) {
173      return tag_ == tag;
174    }
175    bool IsFixedLengthNumber(int length) {
176      return IsNumber() && length_ == length;
177    }
178    bool IsAsciiSign() {
179      return tag_ == kSymbolTag && (value_ == '-' || value_ == '+');
180    }
181    int ascii_sign() {
182      DCHECK(IsAsciiSign());
183      return 44 - value_;
184    }
185    bool IsKeywordZ() {
186      return IsKeywordType(TIME_ZONE_NAME) && length_ == 1 && value_ == 0;
187    }
188    bool IsUnknown(int character) {
189      return IsUnknown() && value_ == character;
190    }
191    // Factory functions.
192    static DateToken Keyword(KeywordType tag, int value, int length) {
193      return DateToken(tag, length, value);
194    }
195    static DateToken Number(int value, int length) {
196      return DateToken(kNumberTag, length, value);
197    }
198    static DateToken Symbol(char symbol) {
199      return DateToken(kSymbolTag, 1, symbol);
200    }
201    static DateToken EndOfInput() {
202      return DateToken(kEndOfInputTag, 0, -1);
203    }
204    static DateToken WhiteSpace(int length) {
205      return DateToken(kWhiteSpaceTag, length, -1);
206    }
207    static DateToken Unknown() {
208      return DateToken(kUnknownTokenTag, 1, -1);
209    }
210    static DateToken Invalid() {
211      return DateToken(kInvalidTokenTag, 0, -1);
212    }
213
214   private:
215    enum TagType {
216      kInvalidTokenTag = -6,
217      kUnknownTokenTag = -5,
218      kWhiteSpaceTag = -4,
219      kNumberTag = -3,
220      kSymbolTag = -2,
221      kEndOfInputTag = -1,
222      kKeywordTagStart = 0
223    };
224    DateToken(int tag, int length, int value)
225        : tag_(tag),
226          length_(length),
227          value_(value) { }
228
229    int tag_;
230    int length_;  // Number of characters.
231    int value_;
232  };
233
234  template <typename Char>
235  class DateStringTokenizer {
236   public:
237    explicit DateStringTokenizer(InputReader<Char>* in)
238        : in_(in), next_(Scan()) { }
239    DateToken Next() {
240      DateToken result = next_;
241      next_ = Scan();
242      return result;
243    }
244
245    DateToken Peek() {
246      return next_;
247    }
248    bool SkipSymbol(char symbol) {
249      if (next_.IsSymbol(symbol)) {
250        next_ = Scan();
251        return true;
252      }
253      return false;
254    }
255
256   private:
257    DateToken Scan();
258
259    InputReader<Char>* in_;
260    DateToken next_;
261  };
262
263  static int ReadMilliseconds(DateToken number);
264
265  // KeywordTable maps names of months, time zones, am/pm to numbers.
266  class KeywordTable : public AllStatic {
267   public:
268    // Look up a word in the keyword table and return an index.
269    // 'pre' contains a prefix of the word, zero-padded to size kPrefixLength
270    // and 'len' is the word length.
271    static int Lookup(const uint32_t* pre, int len);
272    // Get the type of the keyword at index i.
273    static KeywordType GetType(int i) {
274      return static_cast<KeywordType>(array[i][kTypeOffset]);
275    }
276    // Get the value of the keyword at index i.
277    static int GetValue(int i) { return array[i][kValueOffset]; }
278
279    static const int kPrefixLength = 3;
280    static const int kTypeOffset = kPrefixLength;
281    static const int kValueOffset = kTypeOffset + 1;
282    static const int kEntrySize = kValueOffset + 1;
283    static const int8_t array[][kEntrySize];
284  };
285
286  class TimeZoneComposer BASE_EMBEDDED {
287   public:
288    TimeZoneComposer() : sign_(kNone), hour_(kNone), minute_(kNone) {}
289    void Set(int offset_in_hours) {
290      sign_ = offset_in_hours < 0 ? -1 : 1;
291      hour_ = offset_in_hours * sign_;
292      minute_ = 0;
293    }
294    void SetSign(int sign) { sign_ = sign < 0 ? -1 : 1; }
295    void SetAbsoluteHour(int hour) { hour_ = hour; }
296    void SetAbsoluteMinute(int minute) { minute_ = minute; }
297    bool IsExpecting(int n) const {
298      return hour_ != kNone && minute_ == kNone && TimeComposer::IsMinute(n);
299    }
300    bool IsUTC() const { return hour_ == 0 && minute_ == 0; }
301    bool Write(FixedArray* output);
302    bool IsEmpty() { return hour_ == kNone; }
303   private:
304    int sign_;
305    int hour_;
306    int minute_;
307  };
308
309  class TimeComposer BASE_EMBEDDED {
310   public:
311    TimeComposer() : index_(0), hour_offset_(kNone) {}
312    bool IsEmpty() const { return index_ == 0; }
313    bool IsExpecting(int n) const {
314      return (index_ == 1 && IsMinute(n)) ||
315             (index_ == 2 && IsSecond(n)) ||
316             (index_ == 3 && IsMillisecond(n));
317    }
318    bool Add(int n) {
319      return index_ < kSize ? (comp_[index_++] = n, true) : false;
320    }
321    bool AddFinal(int n) {
322      if (!Add(n)) return false;
323      while (index_ < kSize) comp_[index_++] = 0;
324      return true;
325    }
326    void SetHourOffset(int n) { hour_offset_ = n; }
327    bool Write(FixedArray* output);
328
329    static bool IsMinute(int x) { return Between(x, 0, 59); }
330    static bool IsHour(int x) { return Between(x, 0, 23); }
331    static bool IsSecond(int x) { return Between(x, 0, 59); }
332
333   private:
334    static bool IsHour12(int x) { return Between(x, 0, 12); }
335    static bool IsMillisecond(int x) { return Between(x, 0, 999); }
336
337    static const int kSize = 4;
338    int comp_[kSize];
339    int index_;
340    int hour_offset_;
341  };
342
343  class DayComposer BASE_EMBEDDED {
344   public:
345    DayComposer() : index_(0), named_month_(kNone), is_iso_date_(false) {}
346    bool IsEmpty() const { return index_ == 0; }
347    bool Add(int n) {
348      if (index_ < kSize) {
349        comp_[index_] = n;
350        index_++;
351        return true;
352      }
353      return false;
354    }
355    void SetNamedMonth(int n) { named_month_ = n; }
356    bool Write(FixedArray* output);
357    void set_iso_date() { is_iso_date_ = true; }
358    static bool IsMonth(int x) { return Between(x, 1, 12); }
359    static bool IsDay(int x) { return Between(x, 1, 31); }
360
361   private:
362    static const int kSize = 3;
363    int comp_[kSize];
364    int index_;
365    int named_month_;
366    // If set, ensures that data is always parsed in year-month-date order.
367    bool is_iso_date_;
368  };
369
370  // Tries to parse an ES5 Date Time String. Returns the next token
371  // to continue with in the legacy date string parser. If parsing is
372  // complete, returns DateToken::EndOfInput(). If terminally unsuccessful,
373  // returns DateToken::Invalid(). Otherwise parsing continues in the
374  // legacy parser.
375  template <typename Char>
376  static DateParser::DateToken ParseES5DateTime(
377      DateStringTokenizer<Char>* scanner,
378      DayComposer* day,
379      TimeComposer* time,
380      TimeZoneComposer* tz);
381};
382
383
384} }  // namespace v8::internal
385
386#endif  // V8_DATEPARSER_H_
387