strutil.cc revision d26caadec345d7f19d63f894a0b8320693543ea6
1// Copyright 2015 Google Inc. All rights reserved
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// +build ignore
16
17#include "strutil.h"
18
19#include <ctype.h>
20#include <limits.h>
21#include <unistd.h>
22
23#include <algorithm>
24#include <functional>
25#include <stack>
26#include <utility>
27
28#ifdef __SSE4_2__
29#include <smmintrin.h>
30#endif
31
32#include "log.h"
33
34static bool isSpace(char c) {
35  return (9 <= c && c <= 13) || c == 32;
36}
37
38#ifdef __SSE4_2__
39static int SkipUntilSSE42(const char* s, int len,
40                          const char* ranges, int ranges_size) {
41  __m128i ranges16 = _mm_loadu_si128((const __m128i*)ranges);
42  len &= ~15;
43  int i = 0;
44  while (i < len) {
45    __m128i b16 = _mm_loadu_si128((const __m128i*)(s + i));
46    int r = _mm_cmpestri(
47        ranges16, ranges_size, b16, len - i,
48        _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
49    if (r != 16) {
50      return i + r;
51    }
52    i += 16;
53  }
54  return len;
55}
56#endif
57
58template <typename Cond>
59static int SkipUntil(const char* s, int len,
60                     const char* ranges, int ranges_size,
61                     Cond cond) {
62  int i = 0;
63#ifdef __SSE4_2__
64  i += SkipUntilSSE42(s, len, ranges, ranges_size);
65#endif
66  for (; i < len; i++) {
67    if (cond(s[i]))
68      break;
69  }
70  return i;
71}
72
73WordScanner::Iterator& WordScanner::Iterator::operator++() {
74  int len = static_cast<int>(in->size());
75  for (s = i + 1; s < len; s++) {
76    if (!isSpace((*in)[s]))
77      break;
78  }
79  if (s >= len) {
80    in = NULL;
81    s = 0;
82    i = 0;
83    return *this;
84  }
85
86  static const char ranges[] = "\x09\x0d  ";
87  // It's intentional we are not using isSpace here. It seems with
88  // lambda the compiler generates better code.
89  i = s + SkipUntil(in->data() + s, len - s, ranges, 4,
90                    [](char c) { return (9 <= c && c <= 13) || c == 32; });
91  return *this;
92}
93
94StringPiece WordScanner::Iterator::operator*() const {
95  return in->substr(s, i - s);
96}
97
98WordScanner::WordScanner(StringPiece in)
99    : in_(in) {
100}
101
102WordScanner::Iterator WordScanner::begin() const {
103  Iterator iter;
104  iter.in = &in_;
105  iter.s = 0;
106  iter.i = -1;
107  ++iter;
108  return iter;
109}
110
111WordScanner::Iterator WordScanner::end() const {
112  Iterator iter;
113  iter.in = NULL;
114  iter.s = 0;
115  iter.i = 0;
116  return iter;
117}
118
119void WordScanner::Split(vector<StringPiece>* o) {
120  for (StringPiece t : *this)
121    o->push_back(t);
122}
123
124WordWriter::WordWriter(string* o)
125    : out_(o),
126      needs_space_(false) {
127}
128
129void WordWriter::MaybeAddWhitespace() {
130  if (needs_space_) {
131    out_->push_back(' ');
132  } else {
133    needs_space_ = true;
134  }
135}
136
137void WordWriter::Write(StringPiece s) {
138  MaybeAddWhitespace();
139  AppendString(s, out_);
140}
141
142ScopedTerminator::ScopedTerminator(StringPiece s)
143    : s_(s), c_(s[s.size()]) {
144  const_cast<char*>(s_.data())[s_.size()] = '\0';
145}
146
147ScopedTerminator::~ScopedTerminator() {
148  const_cast<char*>(s_.data())[s_.size()] = c_;
149}
150
151void AppendString(StringPiece str, string* out) {
152  out->append(str.begin(), str.end());
153}
154
155bool HasPrefix(StringPiece str, StringPiece prefix) {
156  ssize_t size_diff = str.size() - prefix.size();
157  return size_diff >= 0 && str.substr(0, prefix.size()) == prefix;
158}
159
160bool HasSuffix(StringPiece str, StringPiece suffix) {
161  ssize_t size_diff = str.size() - suffix.size();
162  return size_diff >= 0 && str.substr(size_diff) == suffix;
163}
164
165bool HasWord(StringPiece str, StringPiece w) {
166  size_t found = str.find(w);
167  if (found == string::npos)
168    return false;
169  if (found != 0 && !isSpace(str[found-1]))
170    return false;
171  size_t end = found + w.size();
172  if (end != str.size() && !isSpace(str[end]))
173    return false;
174  return true;
175}
176
177StringPiece TrimSuffix(StringPiece str, StringPiece suffix) {
178  ssize_t size_diff = str.size() - suffix.size();
179  if (size_diff < 0 || str.substr(size_diff) != suffix)
180    return str;
181  return str.substr(0, size_diff);
182}
183
184Pattern::Pattern(StringPiece pat)
185    : pat_(pat), percent_index_(pat.find('%')) {
186}
187
188bool Pattern::Match(StringPiece str) const {
189  if (percent_index_ == string::npos)
190    return str == pat_;
191  return MatchImpl(str);
192}
193
194bool Pattern::MatchImpl(StringPiece str) const {
195  return (HasPrefix(str, pat_.substr(0, percent_index_)) &&
196          HasSuffix(str, pat_.substr(percent_index_ + 1)));
197}
198
199StringPiece Pattern::Stem(StringPiece str) const {
200  if (!Match(str))
201    return "";
202  return str.substr(percent_index_,
203                    str.size() - (pat_.size() - percent_index_ - 1));
204}
205
206void Pattern::AppendSubst(StringPiece str, StringPiece subst,
207                          string* out) const {
208  if (percent_index_ == string::npos) {
209    if (str == pat_) {
210      AppendString(subst, out);
211      return;
212    } else {
213      AppendString(str, out);
214      return;
215    }
216  }
217
218  if (MatchImpl(str)) {
219    size_t subst_percent_index = subst.find('%');
220    if (subst_percent_index == string::npos) {
221      AppendString(subst, out);
222      return;
223    } else {
224      AppendString(subst.substr(0, subst_percent_index), out);
225      AppendString(str.substr(percent_index_,
226                              str.size() - pat_.size() + 1), out);
227      AppendString(subst.substr(subst_percent_index + 1), out);
228      return;
229    }
230  }
231  AppendString(str, out);
232}
233
234void Pattern::AppendSubstRef(StringPiece str, StringPiece subst,
235                             string* out) const {
236  if (percent_index_ != string::npos && subst.find('%') != string::npos) {
237    AppendSubst(str, subst, out);
238    return;
239  }
240  StringPiece s = TrimSuffix(str, pat_);
241  out->append(s.begin(), s.end());
242  out->append(subst.begin(), subst.end());
243}
244
245string NoLineBreak(const string& s) {
246  size_t index = s.find('\n');
247  if (index == string::npos)
248    return s;
249  string r = s;
250  while (index != string::npos) {
251    r = r.substr(0, index) + "\\n" + r.substr(index + 1);
252    index = r.find('\n', index + 2);
253  }
254  return r;
255}
256
257StringPiece TrimLeftSpace(StringPiece s) {
258  size_t i = 0;
259  for (; i < s.size(); i++) {
260    if (isSpace(s[i]))
261      continue;
262    char n = s.get(i+1);
263    if (s[i] == '\\' && (n == '\r' || n == '\n')) {
264      i++;
265      continue;
266    }
267    break;
268  }
269  return s.substr(i, s.size() - i);
270}
271
272StringPiece TrimRightSpace(StringPiece s) {
273  size_t i = 0;
274  for (; i < s.size(); i++) {
275    char c = s[s.size() - 1 - i];
276    if (isSpace(c)) {
277      if ((c == '\r' || c == '\n') && s.get(s.size() - 2 - i) == '\\')
278        i++;
279      continue;
280    }
281    break;
282  }
283  return s.substr(0, s.size() - i);
284}
285
286StringPiece TrimSpace(StringPiece s) {
287  return TrimRightSpace(TrimLeftSpace(s));
288}
289
290StringPiece Dirname(StringPiece s) {
291  size_t found = s.rfind('/');
292  if (found == string::npos)
293    return StringPiece(".");
294  if (found == 0)
295    return StringPiece("");
296  return s.substr(0, found);
297}
298
299StringPiece Basename(StringPiece s) {
300  size_t found = s.rfind('/');
301  if (found == string::npos || found == 0)
302    return s;
303  return s.substr(found + 1);
304}
305
306StringPiece GetExt(StringPiece s) {
307  size_t found = s.rfind('.');
308  if (found == string::npos)
309    return StringPiece("");
310  return s.substr(found);
311}
312
313StringPiece StripExt(StringPiece s) {
314  size_t slash_index = s.rfind('/');
315  size_t found = s.rfind('.');
316  if (found == string::npos ||
317      (slash_index != string::npos && found < slash_index))
318    return s;
319  return s.substr(0, found);
320}
321
322void NormalizePath(string* o) {
323  if (o->empty())
324    return;
325  size_t start_index = 0;
326  if ((*o)[0] == '/')
327    start_index++;
328  size_t j = start_index;
329  size_t prev_start = start_index;
330  for (size_t i = start_index; i <= o->size(); i++) {
331    char c = (*o)[i];
332    if (c != '/' && c != 0) {
333      (*o)[j] = c;
334      j++;
335      continue;
336    }
337
338    StringPiece prev_dir = StringPiece(o->data() + prev_start, j - prev_start);
339    if (prev_dir == ".") {
340      j--;
341    } else if (prev_dir == ".." && j != 2 /* .. */) {
342      if (j == 3) {
343        // /..
344        j = start_index;
345      } else {
346        size_t orig_j = j;
347        j -= 4;
348        j = o->rfind('/', j);
349        if (j == string::npos) {
350          j = start_index;
351        } else {
352          j++;
353        }
354        if (StringPiece(o->data() + j, 3) == "../") {
355          j = orig_j;
356          (*o)[j] = c;
357          j++;
358        }
359      }
360    } else if (!prev_dir.empty()) {
361      if (c) {
362        (*o)[j] = c;
363        j++;
364      }
365    }
366    prev_start = j;
367  }
368  if (j > 1 && (*o)[j-1] == '/')
369    j--;
370  o->resize(j);
371}
372
373void AbsPath(StringPiece s, string* o) {
374  if (s.get(0) == '/') {
375    o->clear();
376  } else {
377    char buf[PATH_MAX];
378    if (!getcwd(buf, PATH_MAX)) {
379      fprintf(stderr, "getcwd failed\n");
380      CHECK(false);
381    }
382
383    CHECK(buf[0] == '/');
384    *o = buf;
385    *o += '/';
386  }
387  AppendString(s, o);
388  NormalizePath(o);
389}
390
391template<typename Cond>
392size_t FindOutsideParenImpl(StringPiece s, Cond cond) {
393  bool prev_backslash = false;
394  stack<char> paren_stack;
395  for (size_t i = 0; i < s.size(); i++) {
396    char c = s[i];
397    if (cond(c) && paren_stack.empty() && !prev_backslash) {
398      return i;
399    }
400    switch (c) {
401      case '(':
402        paren_stack.push(')');
403        break;
404      case '{':
405        paren_stack.push('}');
406        break;
407
408      case ')':
409      case '}':
410        if (!paren_stack.empty() && c == paren_stack.top()) {
411          paren_stack.pop();
412        }
413        break;
414    }
415    prev_backslash = c == '\\' && !prev_backslash;
416  }
417  return string::npos;
418}
419
420size_t FindOutsideParen(StringPiece s, char c) {
421  return FindOutsideParenImpl(s, [&c](char d){return c == d;});
422}
423
424size_t FindTwoOutsideParen(StringPiece s, char c1, char c2) {
425  return FindOutsideParenImpl(s, [&c1, &c2](char d){
426      return d == c1 || d == c2;
427    });
428}
429
430size_t FindThreeOutsideParen(StringPiece s, char c1, char c2, char c3) {
431  return FindOutsideParenImpl(s, [&c1, &c2, &c3](char d){
432      return d == c1 || d == c2 || d == c3;
433    });
434}
435
436size_t FindEndOfLine(StringPiece s, size_t e, size_t* lf_cnt) {
437  static const char ranges[] = "\0\0\n\n\\\\";
438  while (e < s.size()) {
439    e += SkipUntil(s.data() + e, s.size() - e, ranges, 6,
440                   [](char c) { return c == 0 || c == '\n' || c == '\\'; });
441    if (e >= s.size()) {
442      CHECK(s.size() == e);
443      break;
444    }
445    char c = s[e];
446    if (c == '\0')
447      break;
448    if (c == '\\') {
449      if (s[e+1] == '\n') {
450        e += 2;
451        ++*lf_cnt;
452      } else if (s[e+1] == '\r' && s[e+2] == '\n') {
453        e += 3;
454        ++*lf_cnt;
455      } else if (s[e+1] == '\\') {
456        e += 2;
457      } else {
458        e++;
459      }
460    } else if (c == '\n') {
461      ++*lf_cnt;
462      return e;
463    }
464  }
465  return e;
466}
467
468StringPiece TrimLeadingCurdir(StringPiece s) {
469  while (s.substr(0, 2) == "./")
470    s = s.substr(2);
471  return s;
472}
473
474void FormatForCommandSubstitution(string* s) {
475  while ((*s)[s->size()-1] == '\n')
476    s->pop_back();
477  for (size_t i = 0; i < s->size(); i++) {
478    if ((*s)[i] == '\n')
479      (*s)[i] = ' ';
480  }
481}
482
483string SortWordsInString(StringPiece s) {
484  vector<string> toks;
485  for (StringPiece tok : WordScanner(s)) {
486    toks.push_back(tok.as_string());
487  }
488  sort(toks.begin(), toks.end());
489  return JoinStrings(toks, " ");
490}
491
492string ConcatDir(StringPiece b, StringPiece n) {
493  string r;
494  if (!b.empty()) {
495    b.AppendToString(&r);
496    r += '/';
497  }
498  n.AppendToString(&r);
499  NormalizePath(&r);
500  return r;
501}
502
503string EchoEscape(const string str) {
504  const char *in = str.c_str();
505  string buf;
506  for (; *in; in++) {
507    switch(*in) {
508      case '\\':
509        buf += "\\\\\\\\";
510        break;
511      case '\n':
512        buf += "\\n";
513        break;
514      case '"':
515        buf += "\\\"";
516        break;
517      default:
518        buf += *in;
519    }
520  }
521  return buf;
522}
523
524static bool NeedsShellEscape(char c) {
525  return c == 0 || c == '"' || c == '$' || c == '\\' || c == '`';
526}
527
528void EscapeShell(string* s) {
529  static const char ranges[] = "\0\0\"\"$$\\\\``";
530  size_t prev = 0;
531  size_t i = SkipUntil(s->c_str(), s->size(), ranges, 10, NeedsShellEscape);
532  if (i == s->size())
533    return;
534
535  string r;
536  for (; i < s->size();) {
537    StringPiece(*s).substr(prev, i - prev).AppendToString(&r);
538    char c = (*s)[i];
539    r += '\\';
540    if (c == '$') {
541      if ((*s)[i+1] == '$') {
542        r += '$';
543        i++;
544      }
545    }
546    r += c;
547    i++;
548    prev = i;
549    i += SkipUntil(s->c_str() + i, s->size() - i, ranges, 10, NeedsShellEscape);
550  }
551  StringPiece(*s).substr(prev).AppendToString(&r);
552  s->swap(r);
553}
554