15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright 1999-2005 The RE2 Authors.  All Rights Reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// license that can be found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "util/util.h"
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/stringpiece.h"
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace re2 {
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ----------------------------------------------------------------------
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// CEscapeString()
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//    Copies 'src' to 'dest', escaping dangerous characters using
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//    C-style escape sequences.  'src' and 'dest' should not overlap.
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//    Returns the number of bytes written to 'dest' (not including the \0)
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//    or -1 if there was insufficient space.
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ----------------------------------------------------------------------
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int CEscapeString(const char* src, int src_len, char* dest,
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  int dest_len) {
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char* src_end = src + src_len;
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int used = 0;
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (; src < src_end; src++) {
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (dest_len - used < 2)   // Need space for two letter escape
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return -1;
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    unsigned char c = *src;
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    switch (c) {
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      case '\n': dest[used++] = '\\'; dest[used++] = 'n';  break;
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      case '\r': dest[used++] = '\\'; dest[used++] = 'r';  break;
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      case '\t': dest[used++] = '\\'; dest[used++] = 't';  break;
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      case '\"': dest[used++] = '\\'; dest[used++] = '\"'; break;
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      case '\'': dest[used++] = '\\'; dest[used++] = '\''; break;
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      case '\\': dest[used++] = '\\'; dest[used++] = '\\'; break;
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      default:
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // Note that if we emit \xNN and the src character after that is a hex
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // digit then that digit must be escaped too to prevent it being
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // interpreted as part of the character code by C.
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (c < ' ' || c > '~') {
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          if (dest_len - used < 4) // need space for 4 letter escape
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            return -1;
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          sprintf(dest + used, "\\%03o", c);
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          used += 4;
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        } else {
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          dest[used++] = c; break;
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (dest_len - used < 1)   // make sure that there is room for \0
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return -1;
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  dest[used] = '\0';   // doesn't count towards return value though
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return used;
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ----------------------------------------------------------------------
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// CEscape()
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//    Copies 'src' to result, escaping dangerous characters using
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//    C-style escape sequences.  'src' and 'dest' should not overlap.
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ----------------------------------------------------------------------
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)string CEscape(const StringPiece& src) {
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int dest_length = src.size() * 4 + 1; // Maximum possible expansion
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  char* dest = new char[dest_length];
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int len = CEscapeString(src.data(), src.size(),
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                dest, dest_length);
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string s = string(dest, len);
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  delete[] dest;
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return s;
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)string PrefixSuccessor(const StringPiece& prefix) {
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // We can increment the last character in the string and be done
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // unless that character is 255, in which case we have to erase the
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // last character and increment the previous character, unless that
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // is 255, etc. If the string is empty or consists entirely of
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // 255's, we just return the empty string.
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool done = false;
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string limit(prefix.data(), prefix.size());
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int index = limit.length() - 1;
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while (!done && index >= 0) {
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((limit[index]&255) == 255) {
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      limit.erase(index);
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      index--;
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      limit[index]++;
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      done = true;
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!done) {
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return "";
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return limit;
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace re2
98