12d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines// Copyright 1999-2005 The RE2 Authors.  All Rights Reserved.
22d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines// Use of this source code is governed by a BSD-style
32d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines// license that can be found in the LICENSE file.
42d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines
52d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines#include "util/util.h"
62d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines#include "re2/stringpiece.h"
72d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines
82d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hinesnamespace re2 {
92d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines
102d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines// ----------------------------------------------------------------------
112d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines// CEscapeString()
122d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines//    Copies 'src' to 'dest', escaping dangerous characters using
132d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines//    C-style escape sequences.  'src' and 'dest' should not overlap.
142d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines//    Returns the number of bytes written to 'dest' (not including the \0)
152d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines//    or -1 if there was insufficient space.
162d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines// ----------------------------------------------------------------------
172d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hinesint CEscapeString(const char* src, int src_len, char* dest,
182d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines                  int dest_len) {
192d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines  const char* src_end = src + src_len;
202d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines  int used = 0;
212d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines
222d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines  for (; src < src_end; src++) {
232d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines    if (dest_len - used < 2)   // Need space for two letter escape
242d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines      return -1;
252d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines
262d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines    unsigned char c = *src;
272d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines    switch (c) {
282d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines      case '\n': dest[used++] = '\\'; dest[used++] = 'n';  break;
292d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines      case '\r': dest[used++] = '\\'; dest[used++] = 'r';  break;
302d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines      case '\t': dest[used++] = '\\'; dest[used++] = 't';  break;
312d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines      case '\"': dest[used++] = '\\'; dest[used++] = '\"'; break;
322d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines      case '\'': dest[used++] = '\\'; dest[used++] = '\''; break;
332d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines      case '\\': dest[used++] = '\\'; dest[used++] = '\\'; break;
342d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines      default:
352d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines        // Note that if we emit \xNN and the src character after that is a hex
362d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines        // digit then that digit must be escaped too to prevent it being
372d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines        // interpreted as part of the character code by C.
382d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines        if (c < ' ' || c > '~') {
392d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines          if (dest_len - used < 4) // need space for 4 letter escape
402d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines            return -1;
412d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines          sprintf(dest + used, "\\%03o", c);
422d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines          used += 4;
432d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines        } else {
442d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines          dest[used++] = c; break;
452d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines        }
462d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines    }
472d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines  }
482d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines
492d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines  if (dest_len - used < 1)   // make sure that there is room for \0
502d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines    return -1;
512d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines
522d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines  dest[used] = '\0';   // doesn't count towards return value though
532d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines  return used;
542d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines}
552d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines
562d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines
572d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines// ----------------------------------------------------------------------
582d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines// CEscape()
592d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines//    Copies 'src' to result, escaping dangerous characters using
602d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines//    C-style escape sequences.  'src' and 'dest' should not overlap.
612d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines// ----------------------------------------------------------------------
622d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hinesstring CEscape(const StringPiece& src) {
632d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines  const int dest_length = src.size() * 4 + 1; // Maximum possible expansion
642d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines  char* dest = new char[dest_length];
652d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines  const int len = CEscapeString(src.data(), src.size(),
662d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines                                dest, dest_length);
672d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines  string s = string(dest, len);
682d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines  delete[] dest;
692d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines  return s;
702d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines}
712d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines
722d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hinesstring PrefixSuccessor(const StringPiece& prefix) {
73  // We can increment the last character in the string and be done
74  // unless that character is 255, in which case we have to erase the
75  // last character and increment the previous character, unless that
76  // is 255, etc. If the string is empty or consists entirely of
77  // 255's, we just return the empty string.
78  bool done = false;
79  string limit(prefix.data(), prefix.size());
80  int index = limit.length() - 1;
81  while (!done && index >= 0) {
82    if ((limit[index]&255) == 255) {
83      limit.erase(index);
84      index--;
85    } else {
86      limit[index]++;
87      done = true;
88    }
89  }
90  if (!done) {
91    return "";
92  } else {
93    return limit;
94  }
95}
96
97}  // namespace re2
98