1// Copyright 2006-2008 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5"use strict";
6
7// This file relies on the fact that the following declaration has been made
8// in runtime.js:
9// var $Array = global.Array;
10
11// -------------------------------------------------------------------
12
13// This file contains support for URI manipulations written in
14// JavaScript.
15
16
17(function() {
18
19  // -------------------------------------------------------------------
20  // Define internal helper functions.
21
22  function HexValueOf(code) {
23    // 0-9
24    if (code >= 48 && code <= 57) return code - 48;
25    // A-F
26    if (code >= 65 && code <= 70) return code - 55;
27    // a-f
28    if (code >= 97 && code <= 102) return code - 87;
29
30    return -1;
31  }
32
33  // Does the char code correspond to an alpha-numeric char.
34  function isAlphaNumeric(cc) {
35    // a - z
36    if (97 <= cc && cc <= 122) return true;
37    // A - Z
38    if (65 <= cc && cc <= 90) return true;
39    // 0 - 9
40    if (48 <= cc && cc <= 57) return true;
41
42    return false;
43  }
44
45  //Lazily initialized.
46  var hexCharCodeArray = 0;
47
48  function URIAddEncodedOctetToBuffer(octet, result, index) {
49    result[index++] = 37; // Char code of '%'.
50    result[index++] = hexCharCodeArray[octet >> 4];
51    result[index++] = hexCharCodeArray[octet & 0x0F];
52    return index;
53  }
54
55  function URIEncodeOctets(octets, result, index) {
56    if (hexCharCodeArray === 0) {
57      hexCharCodeArray = [48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
58                          65, 66, 67, 68, 69, 70];
59    }
60    index = URIAddEncodedOctetToBuffer(octets[0], result, index);
61    if (octets[1]) index = URIAddEncodedOctetToBuffer(octets[1], result, index);
62    if (octets[2]) index = URIAddEncodedOctetToBuffer(octets[2], result, index);
63    if (octets[3]) index = URIAddEncodedOctetToBuffer(octets[3], result, index);
64    return index;
65  }
66
67  function URIEncodeSingle(cc, result, index) {
68    var x = (cc >> 12) & 0xF;
69    var y = (cc >> 6) & 63;
70    var z = cc & 63;
71    var octets = new $Array(3);
72    if (cc <= 0x007F) {
73      octets[0] = cc;
74    } else if (cc <= 0x07FF) {
75      octets[0] = y + 192;
76      octets[1] = z + 128;
77    } else {
78      octets[0] = x + 224;
79      octets[1] = y + 128;
80      octets[2] = z + 128;
81    }
82    return URIEncodeOctets(octets, result, index);
83  }
84
85  function URIEncodePair(cc1 , cc2, result, index) {
86    var u = ((cc1 >> 6) & 0xF) + 1;
87    var w = (cc1 >> 2) & 0xF;
88    var x = cc1 & 3;
89    var y = (cc2 >> 6) & 0xF;
90    var z = cc2 & 63;
91    var octets = new $Array(4);
92    octets[0] = (u >> 2) + 240;
93    octets[1] = (((u & 3) << 4) | w) + 128;
94    octets[2] = ((x << 4) | y) + 128;
95    octets[3] = z + 128;
96    return URIEncodeOctets(octets, result, index);
97  }
98
99  function URIHexCharsToCharCode(highChar, lowChar) {
100    var highCode = HexValueOf(highChar);
101    var lowCode = HexValueOf(lowChar);
102    if (highCode == -1 || lowCode == -1) {
103      throw new $URIError("URI malformed");
104    }
105    return (highCode << 4) | lowCode;
106  }
107
108  // Callers must ensure that |result| is a sufficiently long sequential
109  // two-byte string!
110  function URIDecodeOctets(octets, result, index) {
111    var value;
112    var o0 = octets[0];
113    if (o0 < 0x80) {
114      value = o0;
115    } else if (o0 < 0xc2) {
116      throw new $URIError("URI malformed");
117    } else {
118      var o1 = octets[1];
119      if (o0 < 0xe0) {
120        var a = o0 & 0x1f;
121        if ((o1 < 0x80) || (o1 > 0xbf)) {
122          throw new $URIError("URI malformed");
123        }
124        var b = o1 & 0x3f;
125        value = (a << 6) + b;
126        if (value < 0x80 || value > 0x7ff) {
127          throw new $URIError("URI malformed");
128        }
129      } else {
130        var o2 = octets[2];
131        if (o0 < 0xf0) {
132          var a = o0 & 0x0f;
133          if ((o1 < 0x80) || (o1 > 0xbf)) {
134            throw new $URIError("URI malformed");
135          }
136          var b = o1 & 0x3f;
137          if ((o2 < 0x80) || (o2 > 0xbf)) {
138            throw new $URIError("URI malformed");
139          }
140          var c = o2 & 0x3f;
141          value = (a << 12) + (b << 6) + c;
142          if ((value < 0x800) || (value > 0xffff)) {
143            throw new $URIError("URI malformed");
144          }
145        } else {
146          var o3 = octets[3];
147          if (o0 < 0xf8) {
148            var a = (o0 & 0x07);
149            if ((o1 < 0x80) || (o1 > 0xbf)) {
150              throw new $URIError("URI malformed");
151            }
152            var b = (o1 & 0x3f);
153            if ((o2 < 0x80) || (o2 > 0xbf)) {
154              throw new $URIError("URI malformed");
155            }
156            var c = (o2 & 0x3f);
157            if ((o3 < 0x80) || (o3 > 0xbf)) {
158              throw new $URIError("URI malformed");
159            }
160            var d = (o3 & 0x3f);
161            value = (a << 18) + (b << 12) + (c << 6) + d;
162            if ((value < 0x10000) || (value > 0x10ffff)) {
163              throw new $URIError("URI malformed");
164            }
165          } else {
166            throw new $URIError("URI malformed");
167          }
168        }
169      }
170    }
171    if (0xD800 <= value && value <= 0xDFFF) {
172      throw new $URIError("URI malformed");
173    }
174    if (value < 0x10000) {
175      %_TwoByteSeqStringSetChar(index++, value, result);
176    } else {
177      %_TwoByteSeqStringSetChar(index++, (value >> 10) + 0xd7c0, result);
178      %_TwoByteSeqStringSetChar(index++, (value & 0x3ff) + 0xdc00, result);
179    }
180    return index;
181  }
182
183  // ECMA-262, section 15.1.3
184  function Encode(uri, unescape) {
185    var uriLength = uri.length;
186    var array = new InternalArray(uriLength);
187    var index = 0;
188    for (var k = 0; k < uriLength; k++) {
189      var cc1 = uri.charCodeAt(k);
190      if (unescape(cc1)) {
191        array[index++] = cc1;
192      } else {
193        if (cc1 >= 0xDC00 && cc1 <= 0xDFFF) throw new $URIError("URI malformed");
194        if (cc1 < 0xD800 || cc1 > 0xDBFF) {
195          index = URIEncodeSingle(cc1, array, index);
196        } else {
197          k++;
198          if (k == uriLength) throw new $URIError("URI malformed");
199          var cc2 = uri.charCodeAt(k);
200          if (cc2 < 0xDC00 || cc2 > 0xDFFF) throw new $URIError("URI malformed");
201          index = URIEncodePair(cc1, cc2, array, index);
202        }
203      }
204    }
205
206    var result = %NewString(array.length, NEW_ONE_BYTE_STRING);
207    for (var i = 0; i < array.length; i++) {
208      %_OneByteSeqStringSetChar(i, array[i], result);
209    }
210    return result;
211  }
212
213  // ECMA-262, section 15.1.3
214  function Decode(uri, reserved) {
215    var uriLength = uri.length;
216    var one_byte = %NewString(uriLength, NEW_ONE_BYTE_STRING);
217    var index = 0;
218    var k = 0;
219
220    // Optimistically assume one-byte string.
221    for ( ; k < uriLength; k++) {
222      var code = uri.charCodeAt(k);
223      if (code == 37) {  // '%'
224        if (k + 2 >= uriLength) throw new $URIError("URI malformed");
225        var cc = URIHexCharsToCharCode(uri.charCodeAt(k+1), uri.charCodeAt(k+2));
226        if (cc >> 7) break;  // Assumption wrong, two-byte string.
227        if (reserved(cc)) {
228          %_OneByteSeqStringSetChar(index++, 37, one_byte);  // '%'.
229          %_OneByteSeqStringSetChar(index++, uri.charCodeAt(k+1), one_byte);
230          %_OneByteSeqStringSetChar(index++, uri.charCodeAt(k+2), one_byte);
231        } else {
232          %_OneByteSeqStringSetChar(index++, cc, one_byte);
233        }
234        k += 2;
235      } else {
236        if (code > 0x7f) break;  // Assumption wrong, two-byte string.
237        %_OneByteSeqStringSetChar(index++, code, one_byte);
238      }
239    }
240
241    one_byte = %TruncateString(one_byte, index);
242    if (k == uriLength) return one_byte;
243
244    // Write into two byte string.
245    var two_byte = %NewString(uriLength - k, NEW_TWO_BYTE_STRING);
246    index = 0;
247
248    for ( ; k < uriLength; k++) {
249      var code = uri.charCodeAt(k);
250      if (code == 37) {  // '%'
251        if (k + 2 >= uriLength) throw new $URIError("URI malformed");
252        var cc = URIHexCharsToCharCode(uri.charCodeAt(++k), uri.charCodeAt(++k));
253        if (cc >> 7) {
254          var n = 0;
255          while (((cc << ++n) & 0x80) != 0) { }
256          if (n == 1 || n > 4) throw new $URIError("URI malformed");
257          var octets = new $Array(n);
258          octets[0] = cc;
259          if (k + 3 * (n - 1) >= uriLength) throw new $URIError("URI malformed");
260          for (var i = 1; i < n; i++) {
261            if (uri.charAt(++k) != '%') throw new $URIError("URI malformed");
262            octets[i] = URIHexCharsToCharCode(uri.charCodeAt(++k),
263                                              uri.charCodeAt(++k));
264          }
265          index = URIDecodeOctets(octets, two_byte, index);
266        } else  if (reserved(cc)) {
267          %_TwoByteSeqStringSetChar(index++, 37, two_byte);  // '%'.
268          %_TwoByteSeqStringSetChar(index++, uri.charCodeAt(k - 1), two_byte);
269          %_TwoByteSeqStringSetChar(index++, uri.charCodeAt(k), two_byte);
270        } else {
271          %_TwoByteSeqStringSetChar(index++, cc, two_byte);
272        }
273      } else {
274        %_TwoByteSeqStringSetChar(index++, code, two_byte);
275      }
276    }
277
278    two_byte = %TruncateString(two_byte, index);
279    return one_byte + two_byte;
280  }
281
282  // -------------------------------------------------------------------
283  // Define exported functions.
284
285  // ECMA-262 - B.2.1.
286  function URIEscapeJS(str) {
287    var s = ToString(str);
288    return %URIEscape(s);
289  }
290
291  // ECMA-262 - B.2.2.
292  function URIUnescapeJS(str) {
293    var s = ToString(str);
294    return %URIUnescape(s);
295  }
296
297  // ECMA-262 - 15.1.3.1.
298  function URIDecode(uri) {
299    var reservedPredicate = function(cc) {
300      // #$
301      if (35 <= cc && cc <= 36) return true;
302      // &
303      if (cc == 38) return true;
304      // +,
305      if (43 <= cc && cc <= 44) return true;
306      // /
307      if (cc == 47) return true;
308      // :;
309      if (58 <= cc && cc <= 59) return true;
310      // =
311      if (cc == 61) return true;
312      // ?@
313      if (63 <= cc && cc <= 64) return true;
314
315      return false;
316    };
317    var string = ToString(uri);
318    return Decode(string, reservedPredicate);
319  }
320
321  // ECMA-262 - 15.1.3.2.
322  function URIDecodeComponent(component) {
323    var reservedPredicate = function(cc) { return false; };
324    var string = ToString(component);
325    return Decode(string, reservedPredicate);
326  }
327
328  // ECMA-262 - 15.1.3.3.
329  function URIEncode(uri) {
330    var unescapePredicate = function(cc) {
331      if (isAlphaNumeric(cc)) return true;
332      // !
333      if (cc == 33) return true;
334      // #$
335      if (35 <= cc && cc <= 36) return true;
336      // &'()*+,-./
337      if (38 <= cc && cc <= 47) return true;
338      // :;
339      if (58 <= cc && cc <= 59) return true;
340      // =
341      if (cc == 61) return true;
342      // ?@
343      if (63 <= cc && cc <= 64) return true;
344      // _
345      if (cc == 95) return true;
346      // ~
347      if (cc == 126) return true;
348
349      return false;
350    };
351    var string = ToString(uri);
352    return Encode(string, unescapePredicate);
353  }
354
355  // ECMA-262 - 15.1.3.4
356  function URIEncodeComponent(component) {
357    var unescapePredicate = function(cc) {
358      if (isAlphaNumeric(cc)) return true;
359      // !
360      if (cc == 33) return true;
361      // '()*
362      if (39 <= cc && cc <= 42) return true;
363      // -.
364      if (45 <= cc && cc <= 46) return true;
365      // _
366      if (cc == 95) return true;
367      // ~
368      if (cc == 126) return true;
369
370      return false;
371    };
372    var string = ToString(component);
373    return Encode(string, unescapePredicate);
374  }
375
376  // -------------------------------------------------------------------
377  // Install exported functions.
378
379  %CheckIsBootstrapping();
380
381  // Set up non-enumerable URI functions on the global object and set
382  // their names.
383  InstallFunctions(global, DONT_ENUM, $Array(
384      "escape", URIEscapeJS,
385      "unescape", URIUnescapeJS,
386      "decodeURI", URIDecode,
387      "decodeURIComponent", URIDecodeComponent,
388      "encodeURI", URIEncode,
389      "encodeURIComponent", URIEncodeComponent
390  ));
391
392})();
393