1/*
2 * libjingle
3 * Copyright 2004--2005, Google Inc.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 *  1. Redistributions of source code must retain the above copyright notice,
9 *     this list of conditions and the following disclaimer.
10 *  2. Redistributions in binary form must reproduce the above copyright notice,
11 *     this list of conditions and the following disclaimer in the documentation
12 *     and/or other materials provided with the distribution.
13 *  3. The name of the author may not be used to endorse or promote products
14 *     derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include "talk/xmpp/jid.h"
29
30#include <ctype.h>
31
32#include <algorithm>
33#include <string>
34
35#include "talk/base/common.h"
36#include "talk/base/logging.h"
37#include "talk/xmpp/constants.h"
38
39namespace buzz {
40
41Jid::Jid() : data_(NULL) {
42}
43
44Jid::Jid(bool is_special, const std::string & special) {
45  data_ = is_special ? new Data(special, STR_EMPTY, STR_EMPTY) : NULL;
46}
47
48Jid::Jid(const std::string & jid_string) {
49  if (jid_string == STR_EMPTY) {
50    data_ = NULL;
51    return;
52  }
53
54  // First find the slash and slice of that part
55  size_t slash = jid_string.find('/');
56  std::string resource_name = (slash == std::string::npos ? STR_EMPTY :
57                    jid_string.substr(slash + 1));
58
59  // Now look for the node
60  std::string node_name;
61  size_t at = jid_string.find('@');
62  size_t domain_begin;
63  if (at < slash && at != std::string::npos) {
64    node_name = jid_string.substr(0, at);
65    domain_begin = at + 1;
66  } else {
67    domain_begin = 0;
68  }
69
70  // Now take what is left as the domain
71  size_t domain_length =
72    (  slash == std::string::npos
73     ? jid_string.length() - domain_begin
74     : slash - domain_begin);
75
76  // avoid allocating these constants repeatedly
77  std::string domain_name;
78
79  if (domain_length == 9  && jid_string.find("gmail.com", domain_begin) == domain_begin) {
80    domain_name = STR_GMAIL_COM;
81  }
82  else if (domain_length == 14 && jid_string.find("googlemail.com", domain_begin) == domain_begin) {
83    domain_name = STR_GOOGLEMAIL_COM;
84  }
85  else if (domain_length == 10 && jid_string.find("google.com", domain_begin) == domain_begin) {
86    domain_name = STR_GOOGLE_COM;
87  }
88  else {
89    domain_name = jid_string.substr(domain_begin, domain_length);
90  }
91
92  // If the domain is empty we have a non-valid jid and we should empty
93  // everything else out
94  if (domain_name.empty()) {
95    data_ = NULL;
96    return;
97  }
98
99  bool valid_node;
100  std::string validated_node = prepNode(node_name,
101      node_name.begin(), node_name.end(), &valid_node);
102  bool valid_domain;
103  std::string validated_domain = prepDomain(domain_name,
104      domain_name.begin(), domain_name.end(), &valid_domain);
105  bool valid_resource;
106  std::string validated_resource = prepResource(resource_name,
107      resource_name.begin(), resource_name.end(), &valid_resource);
108
109  if (!valid_node || !valid_domain || !valid_resource) {
110    data_ = NULL;
111    return;
112  }
113
114  data_ = new Data(validated_node, validated_domain, validated_resource);
115}
116
117Jid::Jid(const std::string & node_name,
118         const std::string & domain_name,
119         const std::string & resource_name) {
120  if (domain_name.empty()) {
121    data_ = NULL;
122    return;
123  }
124
125  bool valid_node;
126  std::string validated_node = prepNode(node_name,
127      node_name.begin(), node_name.end(), &valid_node);
128  bool valid_domain;
129  std::string validated_domain = prepDomain(domain_name,
130      domain_name.begin(), domain_name.end(), &valid_domain);
131  bool valid_resource;
132  std::string validated_resource = prepResource(resource_name,
133      resource_name.begin(), resource_name.end(), &valid_resource);
134
135  if (!valid_node || !valid_domain || !valid_resource) {
136    data_ = NULL;
137    return;
138  }
139
140  data_ = new Data(validated_node, validated_domain, validated_resource);
141}
142
143std::string Jid::Str() const {
144  if (!IsValid())
145    return STR_EMPTY;
146
147  std::string ret;
148
149  if (!data_->node_name_.empty())
150    ret = data_->node_name_ + "@";
151
152  ASSERT(data_->domain_name_ != STR_EMPTY);
153  ret += data_->domain_name_;
154
155  if (!data_->resource_name_.empty())
156    ret += "/" + data_->resource_name_;
157
158  return ret;
159}
160
161bool
162Jid::IsValid() const {
163  return data_ != NULL && !data_->domain_name_.empty();
164}
165
166bool
167Jid::IsBare() const {
168  if (Compare(JID_EMPTY) == 0) {
169    LOG(LS_VERBOSE) << "Warning: Calling IsBare() on the empty jid";
170    return true;
171  }
172  return IsValid() &&
173         data_->resource_name_.empty();
174}
175
176bool
177Jid::IsFull() const {
178  return IsValid() &&
179         !data_->resource_name_.empty();
180}
181
182Jid
183Jid::BareJid() const {
184  if (!IsValid())
185    return Jid();
186  if (!IsFull())
187    return *this;
188  return Jid(data_->node_name_, data_->domain_name_, STR_EMPTY);
189}
190
191#if 0
192void
193Jid::set_node(const std::string & node_name) {
194    data_->node_name_ = node_name;
195}
196void
197Jid::set_domain(const std::string & domain_name) {
198    data_->domain_name_ = domain_name;
199}
200void
201Jid::set_resource(const std::string & res_name) {
202    data_->resource_name_ = res_name;
203}
204#endif
205
206bool
207Jid::BareEquals(const Jid & other) const {
208  return (other.data_ == data_ ||
209          (data_ != NULL &&
210          other.data_ != NULL &&
211          other.data_->node_name_ == data_->node_name_ &&
212          other.data_->domain_name_ == data_->domain_name_));
213}
214
215bool
216Jid::operator==(const Jid & other) const {
217  return (other.data_ == data_ ||
218          (data_ != NULL &&
219          other.data_ != NULL &&
220          other.data_->node_name_ == data_->node_name_ &&
221          other.data_->domain_name_ == data_->domain_name_ &&
222          other.data_->resource_name_ == data_->resource_name_));
223}
224
225int
226Jid::Compare(const Jid & other) const {
227  if (other.data_ == data_)
228    return 0;
229  if (data_ == NULL)
230    return -1;
231  if (other.data_ == NULL)
232    return 1;
233
234  int compare_result;
235  compare_result = data_->node_name_.compare(other.data_->node_name_);
236  if (0 != compare_result)
237    return compare_result;
238  compare_result = data_->domain_name_.compare(other.data_->domain_name_);
239  if (0 != compare_result)
240    return compare_result;
241  compare_result = data_->resource_name_.compare(other.data_->resource_name_);
242  return compare_result;
243}
244
245uint32 Jid::ComputeLameHash() const {
246  uint32 hash = 0;
247  // Hash the node portion
248  {
249    const std::string &str = node();
250    for (int i = 0; i < static_cast<int>(str.size()); ++i) {
251      hash = ((hash << 2) + hash) + str[i];
252    }
253  }
254
255  // Hash the domain portion
256  {
257    const std::string &str = domain();
258    for (int i = 0; i < static_cast<int>(str.size()); ++i)
259      hash = ((hash << 2) + hash) + str[i];
260  }
261
262  // Hash the resource portion
263  {
264    const std::string &str = resource();
265    for (int i = 0; i < static_cast<int>(str.size()); ++i)
266      hash = ((hash << 2) + hash) + str[i];
267  }
268
269  return hash;
270}
271
272// --- JID parsing code: ---
273
274// Checks and normalizes the node part of a JID.
275std::string
276Jid::prepNode(const std::string str, std::string::const_iterator start,
277    std::string::const_iterator end, bool *valid) {
278  *valid = false;
279  std::string result;
280
281  for (std::string::const_iterator i = start; i < end; i++) {
282    bool char_valid = true;
283    unsigned char ch = *i;
284    if (ch <= 0x7F) {
285      result += prepNodeAscii(ch, &char_valid);
286    }
287    else {
288      // TODO: implement the correct stringprep protocol for these
289      result += tolower(ch);
290    }
291    if (!char_valid) {
292      return STR_EMPTY;
293    }
294  }
295
296  if (result.length() > 1023) {
297    return STR_EMPTY;
298  }
299  *valid = true;
300  return result;
301}
302
303
304// Returns the appropriate mapping for an ASCII character in a node.
305char
306Jid::prepNodeAscii(char ch, bool *valid) {
307  *valid = true;
308  switch (ch) {
309    case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
310    case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
311    case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
312    case 'V': case 'W': case 'X': case 'Y': case 'Z':
313      return (char)(ch + ('a' - 'A'));
314
315    case 0x00: case 0x01: case 0x02: case 0x03: case 0x04: case 0x05:
316    case 0x06: case 0x07: case 0x08: case 0x09: case 0x0A: case 0x0B:
317    case 0x0C: case 0x0D: case 0x0E: case 0x0F: case 0x10: case 0x11:
318    case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17:
319    case ' ': case '&': case '/': case ':': case '<': case '>': case '@':
320    case '\"': case '\'':
321    case 0x7F:
322      *valid = false;
323      return 0;
324
325    default:
326      return ch;
327  }
328}
329
330
331// Checks and normalizes the resource part of a JID.
332std::string
333Jid::prepResource(const std::string str, std::string::const_iterator start,
334    std::string::const_iterator end, bool *valid) {
335  *valid = false;
336  std::string result;
337
338  for (std::string::const_iterator i = start; i < end; i++) {
339    bool char_valid = true;
340    unsigned char ch = *i;
341    if (ch <= 0x7F) {
342      result += prepResourceAscii(ch, &char_valid);
343    }
344    else {
345      // TODO: implement the correct stringprep protocol for these
346      result += ch;
347    }
348  }
349
350  if (result.length() > 1023) {
351    return STR_EMPTY;
352  }
353  *valid = true;
354  return result;
355}
356
357// Returns the appropriate mapping for an ASCII character in a resource.
358char
359Jid::prepResourceAscii(char ch, bool *valid) {
360  *valid = true;
361  switch (ch) {
362    case 0x00: case 0x01: case 0x02: case 0x03: case 0x04: case 0x05:
363    case 0x06: case 0x07: case 0x08: case 0x09: case 0x0A: case 0x0B:
364    case 0x0C: case 0x0D: case 0x0E: case 0x0F: case 0x10: case 0x11:
365    case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17:
366    case 0x7F:
367      *valid = false;
368      return 0;
369
370    default:
371      return ch;
372  }
373}
374
375// Checks and normalizes the domain part of a JID.
376std::string
377Jid::prepDomain(const std::string str, std::string::const_iterator start,
378    std::string::const_iterator end, bool *valid) {
379  *valid = false;
380  std::string result;
381
382  // TODO: if the domain contains a ':', then we should parse it
383  // as an IPv6 address rather than giving an error about illegal domain.
384  prepDomain(str, start, end, &result, valid);
385  if (!*valid) {
386    return STR_EMPTY;
387  }
388
389  if (result.length() > 1023) {
390    return STR_EMPTY;
391  }
392  *valid = true;
393  return result;
394}
395
396
397// Checks and normalizes an IDNA domain.
398void
399Jid::prepDomain(const std::string str, std::string::const_iterator start,
400    std::string::const_iterator end, std::string *buf, bool *valid) {
401  *valid = false;
402  std::string::const_iterator last = start;
403  for (std::string::const_iterator i = start; i < end; i++) {
404    bool label_valid = true;
405    char ch = *i;
406    switch (ch) {
407      case 0x002E:
408#if 0 // FIX: This isn't UTF-8-aware.
409      case 0x3002:
410      case 0xFF0E:
411      case 0xFF61:
412#endif
413        prepDomainLabel(str, last, i, buf, &label_valid);
414        *buf += '.';
415        last = i + 1;
416        break;
417    }
418    if (!label_valid) {
419      return;
420    }
421  }
422  prepDomainLabel(str, last, end, buf, valid);
423}
424
425// Checks and normalizes a domain label.
426void
427Jid::prepDomainLabel(const std::string str, std::string::const_iterator start,
428    std::string::const_iterator end, std::string *buf, bool *valid) {
429  *valid = false;
430
431  int startLen = buf->length();
432  for (std::string::const_iterator i = start; i < end; i++) {
433    bool char_valid = true;
434    unsigned char ch = *i;
435    if (ch <= 0x7F) {
436      *buf += prepDomainLabelAscii(ch, &char_valid);
437    }
438    else {
439      // TODO: implement ToASCII for these
440      *buf += ch;
441    }
442    if (!char_valid) {
443      return;
444    }
445  }
446
447  int count = buf->length() - startLen;
448  if (count == 0) {
449    return;
450  }
451  else if (count > 63) {
452    return;
453  }
454
455  // Is this check needed? See comment in prepDomainLabelAscii.
456  if ((*buf)[startLen] == '-') {
457    return;
458  }
459  if ((*buf)[buf->length() - 1] == '-') {
460    return;
461  }
462  *valid = true;
463}
464
465
466// Returns the appropriate mapping for an ASCII character in a domain label.
467char
468Jid::prepDomainLabelAscii(char ch, bool *valid) {
469  *valid = true;
470  // TODO: A literal reading of the spec seems to say that we do
471  // not need to check for these illegal characters (an "internationalized
472  // domain label" runs ToASCII with UseSTD3... set to false).  But that
473  // can't be right.  We should at least be checking that there are no '/'
474  // or '@' characters in the domain.  Perhaps we should see what others
475  // do in this case.
476
477  switch (ch) {
478    case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
479    case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
480    case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
481    case 'V': case 'W': case 'X': case 'Y': case 'Z':
482      return (char)(ch + ('a' - 'A'));
483
484    case 0x00: case 0x01: case 0x02: case 0x03: case 0x04: case 0x05:
485    case 0x06: case 0x07: case 0x08: case 0x09: case 0x0A: case 0x0B:
486    case 0x0C: case 0x0D: case 0x0E: case 0x0F: case 0x10: case 0x11:
487    case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17:
488    case 0x18: case 0x19: case 0x1A: case 0x1B: case 0x1C: case 0x1D:
489    case 0x1E: case 0x1F: case 0x20: case 0x21: case 0x22: case 0x23:
490    case 0x24: case 0x25: case 0x26: case 0x27: case 0x28: case 0x29:
491    case 0x2A: case 0x2B: case 0x2C: case 0x2E: case 0x2F: case 0x3A:
492    case 0x3B: case 0x3C: case 0x3D: case 0x3E: case 0x3F: case 0x40:
493    case 0x5B: case 0x5C: case 0x5D: case 0x5E: case 0x5F: case 0x60:
494    case 0x7B: case 0x7C: case 0x7D: case 0x7E: case 0x7F:
495      *valid = false;
496      return 0;
497
498    default:
499      return ch;
500  }
501}
502
503}
504