strutil.cc revision d26caadec345d7f19d63f894a0b8320693543ea6
1// Copyright 2015 Google Inc. All rights reserved 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15// +build ignore 16 17#include "strutil.h" 18 19#include <ctype.h> 20#include <limits.h> 21#include <unistd.h> 22 23#include <algorithm> 24#include <functional> 25#include <stack> 26#include <utility> 27 28#ifdef __SSE4_2__ 29#include <smmintrin.h> 30#endif 31 32#include "log.h" 33 34static bool isSpace(char c) { 35 return (9 <= c && c <= 13) || c == 32; 36} 37 38#ifdef __SSE4_2__ 39static int SkipUntilSSE42(const char* s, int len, 40 const char* ranges, int ranges_size) { 41 __m128i ranges16 = _mm_loadu_si128((const __m128i*)ranges); 42 len &= ~15; 43 int i = 0; 44 while (i < len) { 45 __m128i b16 = _mm_loadu_si128((const __m128i*)(s + i)); 46 int r = _mm_cmpestri( 47 ranges16, ranges_size, b16, len - i, 48 _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS); 49 if (r != 16) { 50 return i + r; 51 } 52 i += 16; 53 } 54 return len; 55} 56#endif 57 58template <typename Cond> 59static int SkipUntil(const char* s, int len, 60 const char* ranges, int ranges_size, 61 Cond cond) { 62 int i = 0; 63#ifdef __SSE4_2__ 64 i += SkipUntilSSE42(s, len, ranges, ranges_size); 65#endif 66 for (; i < len; i++) { 67 if (cond(s[i])) 68 break; 69 } 70 return i; 71} 72 73WordScanner::Iterator& WordScanner::Iterator::operator++() { 74 int len = static_cast<int>(in->size()); 75 for (s = i + 1; s < len; s++) { 76 if (!isSpace((*in)[s])) 77 break; 78 } 79 if (s >= len) { 80 in = NULL; 81 s = 0; 82 i = 0; 83 return *this; 84 } 85 86 static const char ranges[] = "\x09\x0d "; 87 // It's intentional we are not using isSpace here. It seems with 88 // lambda the compiler generates better code. 89 i = s + SkipUntil(in->data() + s, len - s, ranges, 4, 90 [](char c) { return (9 <= c && c <= 13) || c == 32; }); 91 return *this; 92} 93 94StringPiece WordScanner::Iterator::operator*() const { 95 return in->substr(s, i - s); 96} 97 98WordScanner::WordScanner(StringPiece in) 99 : in_(in) { 100} 101 102WordScanner::Iterator WordScanner::begin() const { 103 Iterator iter; 104 iter.in = &in_; 105 iter.s = 0; 106 iter.i = -1; 107 ++iter; 108 return iter; 109} 110 111WordScanner::Iterator WordScanner::end() const { 112 Iterator iter; 113 iter.in = NULL; 114 iter.s = 0; 115 iter.i = 0; 116 return iter; 117} 118 119void WordScanner::Split(vector<StringPiece>* o) { 120 for (StringPiece t : *this) 121 o->push_back(t); 122} 123 124WordWriter::WordWriter(string* o) 125 : out_(o), 126 needs_space_(false) { 127} 128 129void WordWriter::MaybeAddWhitespace() { 130 if (needs_space_) { 131 out_->push_back(' '); 132 } else { 133 needs_space_ = true; 134 } 135} 136 137void WordWriter::Write(StringPiece s) { 138 MaybeAddWhitespace(); 139 AppendString(s, out_); 140} 141 142ScopedTerminator::ScopedTerminator(StringPiece s) 143 : s_(s), c_(s[s.size()]) { 144 const_cast<char*>(s_.data())[s_.size()] = '\0'; 145} 146 147ScopedTerminator::~ScopedTerminator() { 148 const_cast<char*>(s_.data())[s_.size()] = c_; 149} 150 151void AppendString(StringPiece str, string* out) { 152 out->append(str.begin(), str.end()); 153} 154 155bool HasPrefix(StringPiece str, StringPiece prefix) { 156 ssize_t size_diff = str.size() - prefix.size(); 157 return size_diff >= 0 && str.substr(0, prefix.size()) == prefix; 158} 159 160bool HasSuffix(StringPiece str, StringPiece suffix) { 161 ssize_t size_diff = str.size() - suffix.size(); 162 return size_diff >= 0 && str.substr(size_diff) == suffix; 163} 164 165bool HasWord(StringPiece str, StringPiece w) { 166 size_t found = str.find(w); 167 if (found == string::npos) 168 return false; 169 if (found != 0 && !isSpace(str[found-1])) 170 return false; 171 size_t end = found + w.size(); 172 if (end != str.size() && !isSpace(str[end])) 173 return false; 174 return true; 175} 176 177StringPiece TrimSuffix(StringPiece str, StringPiece suffix) { 178 ssize_t size_diff = str.size() - suffix.size(); 179 if (size_diff < 0 || str.substr(size_diff) != suffix) 180 return str; 181 return str.substr(0, size_diff); 182} 183 184Pattern::Pattern(StringPiece pat) 185 : pat_(pat), percent_index_(pat.find('%')) { 186} 187 188bool Pattern::Match(StringPiece str) const { 189 if (percent_index_ == string::npos) 190 return str == pat_; 191 return MatchImpl(str); 192} 193 194bool Pattern::MatchImpl(StringPiece str) const { 195 return (HasPrefix(str, pat_.substr(0, percent_index_)) && 196 HasSuffix(str, pat_.substr(percent_index_ + 1))); 197} 198 199StringPiece Pattern::Stem(StringPiece str) const { 200 if (!Match(str)) 201 return ""; 202 return str.substr(percent_index_, 203 str.size() - (pat_.size() - percent_index_ - 1)); 204} 205 206void Pattern::AppendSubst(StringPiece str, StringPiece subst, 207 string* out) const { 208 if (percent_index_ == string::npos) { 209 if (str == pat_) { 210 AppendString(subst, out); 211 return; 212 } else { 213 AppendString(str, out); 214 return; 215 } 216 } 217 218 if (MatchImpl(str)) { 219 size_t subst_percent_index = subst.find('%'); 220 if (subst_percent_index == string::npos) { 221 AppendString(subst, out); 222 return; 223 } else { 224 AppendString(subst.substr(0, subst_percent_index), out); 225 AppendString(str.substr(percent_index_, 226 str.size() - pat_.size() + 1), out); 227 AppendString(subst.substr(subst_percent_index + 1), out); 228 return; 229 } 230 } 231 AppendString(str, out); 232} 233 234void Pattern::AppendSubstRef(StringPiece str, StringPiece subst, 235 string* out) const { 236 if (percent_index_ != string::npos && subst.find('%') != string::npos) { 237 AppendSubst(str, subst, out); 238 return; 239 } 240 StringPiece s = TrimSuffix(str, pat_); 241 out->append(s.begin(), s.end()); 242 out->append(subst.begin(), subst.end()); 243} 244 245string NoLineBreak(const string& s) { 246 size_t index = s.find('\n'); 247 if (index == string::npos) 248 return s; 249 string r = s; 250 while (index != string::npos) { 251 r = r.substr(0, index) + "\\n" + r.substr(index + 1); 252 index = r.find('\n', index + 2); 253 } 254 return r; 255} 256 257StringPiece TrimLeftSpace(StringPiece s) { 258 size_t i = 0; 259 for (; i < s.size(); i++) { 260 if (isSpace(s[i])) 261 continue; 262 char n = s.get(i+1); 263 if (s[i] == '\\' && (n == '\r' || n == '\n')) { 264 i++; 265 continue; 266 } 267 break; 268 } 269 return s.substr(i, s.size() - i); 270} 271 272StringPiece TrimRightSpace(StringPiece s) { 273 size_t i = 0; 274 for (; i < s.size(); i++) { 275 char c = s[s.size() - 1 - i]; 276 if (isSpace(c)) { 277 if ((c == '\r' || c == '\n') && s.get(s.size() - 2 - i) == '\\') 278 i++; 279 continue; 280 } 281 break; 282 } 283 return s.substr(0, s.size() - i); 284} 285 286StringPiece TrimSpace(StringPiece s) { 287 return TrimRightSpace(TrimLeftSpace(s)); 288} 289 290StringPiece Dirname(StringPiece s) { 291 size_t found = s.rfind('/'); 292 if (found == string::npos) 293 return StringPiece("."); 294 if (found == 0) 295 return StringPiece(""); 296 return s.substr(0, found); 297} 298 299StringPiece Basename(StringPiece s) { 300 size_t found = s.rfind('/'); 301 if (found == string::npos || found == 0) 302 return s; 303 return s.substr(found + 1); 304} 305 306StringPiece GetExt(StringPiece s) { 307 size_t found = s.rfind('.'); 308 if (found == string::npos) 309 return StringPiece(""); 310 return s.substr(found); 311} 312 313StringPiece StripExt(StringPiece s) { 314 size_t slash_index = s.rfind('/'); 315 size_t found = s.rfind('.'); 316 if (found == string::npos || 317 (slash_index != string::npos && found < slash_index)) 318 return s; 319 return s.substr(0, found); 320} 321 322void NormalizePath(string* o) { 323 if (o->empty()) 324 return; 325 size_t start_index = 0; 326 if ((*o)[0] == '/') 327 start_index++; 328 size_t j = start_index; 329 size_t prev_start = start_index; 330 for (size_t i = start_index; i <= o->size(); i++) { 331 char c = (*o)[i]; 332 if (c != '/' && c != 0) { 333 (*o)[j] = c; 334 j++; 335 continue; 336 } 337 338 StringPiece prev_dir = StringPiece(o->data() + prev_start, j - prev_start); 339 if (prev_dir == ".") { 340 j--; 341 } else if (prev_dir == ".." && j != 2 /* .. */) { 342 if (j == 3) { 343 // /.. 344 j = start_index; 345 } else { 346 size_t orig_j = j; 347 j -= 4; 348 j = o->rfind('/', j); 349 if (j == string::npos) { 350 j = start_index; 351 } else { 352 j++; 353 } 354 if (StringPiece(o->data() + j, 3) == "../") { 355 j = orig_j; 356 (*o)[j] = c; 357 j++; 358 } 359 } 360 } else if (!prev_dir.empty()) { 361 if (c) { 362 (*o)[j] = c; 363 j++; 364 } 365 } 366 prev_start = j; 367 } 368 if (j > 1 && (*o)[j-1] == '/') 369 j--; 370 o->resize(j); 371} 372 373void AbsPath(StringPiece s, string* o) { 374 if (s.get(0) == '/') { 375 o->clear(); 376 } else { 377 char buf[PATH_MAX]; 378 if (!getcwd(buf, PATH_MAX)) { 379 fprintf(stderr, "getcwd failed\n"); 380 CHECK(false); 381 } 382 383 CHECK(buf[0] == '/'); 384 *o = buf; 385 *o += '/'; 386 } 387 AppendString(s, o); 388 NormalizePath(o); 389} 390 391template<typename Cond> 392size_t FindOutsideParenImpl(StringPiece s, Cond cond) { 393 bool prev_backslash = false; 394 stack<char> paren_stack; 395 for (size_t i = 0; i < s.size(); i++) { 396 char c = s[i]; 397 if (cond(c) && paren_stack.empty() && !prev_backslash) { 398 return i; 399 } 400 switch (c) { 401 case '(': 402 paren_stack.push(')'); 403 break; 404 case '{': 405 paren_stack.push('}'); 406 break; 407 408 case ')': 409 case '}': 410 if (!paren_stack.empty() && c == paren_stack.top()) { 411 paren_stack.pop(); 412 } 413 break; 414 } 415 prev_backslash = c == '\\' && !prev_backslash; 416 } 417 return string::npos; 418} 419 420size_t FindOutsideParen(StringPiece s, char c) { 421 return FindOutsideParenImpl(s, [&c](char d){return c == d;}); 422} 423 424size_t FindTwoOutsideParen(StringPiece s, char c1, char c2) { 425 return FindOutsideParenImpl(s, [&c1, &c2](char d){ 426 return d == c1 || d == c2; 427 }); 428} 429 430size_t FindThreeOutsideParen(StringPiece s, char c1, char c2, char c3) { 431 return FindOutsideParenImpl(s, [&c1, &c2, &c3](char d){ 432 return d == c1 || d == c2 || d == c3; 433 }); 434} 435 436size_t FindEndOfLine(StringPiece s, size_t e, size_t* lf_cnt) { 437 static const char ranges[] = "\0\0\n\n\\\\"; 438 while (e < s.size()) { 439 e += SkipUntil(s.data() + e, s.size() - e, ranges, 6, 440 [](char c) { return c == 0 || c == '\n' || c == '\\'; }); 441 if (e >= s.size()) { 442 CHECK(s.size() == e); 443 break; 444 } 445 char c = s[e]; 446 if (c == '\0') 447 break; 448 if (c == '\\') { 449 if (s[e+1] == '\n') { 450 e += 2; 451 ++*lf_cnt; 452 } else if (s[e+1] == '\r' && s[e+2] == '\n') { 453 e += 3; 454 ++*lf_cnt; 455 } else if (s[e+1] == '\\') { 456 e += 2; 457 } else { 458 e++; 459 } 460 } else if (c == '\n') { 461 ++*lf_cnt; 462 return e; 463 } 464 } 465 return e; 466} 467 468StringPiece TrimLeadingCurdir(StringPiece s) { 469 while (s.substr(0, 2) == "./") 470 s = s.substr(2); 471 return s; 472} 473 474void FormatForCommandSubstitution(string* s) { 475 while ((*s)[s->size()-1] == '\n') 476 s->pop_back(); 477 for (size_t i = 0; i < s->size(); i++) { 478 if ((*s)[i] == '\n') 479 (*s)[i] = ' '; 480 } 481} 482 483string SortWordsInString(StringPiece s) { 484 vector<string> toks; 485 for (StringPiece tok : WordScanner(s)) { 486 toks.push_back(tok.as_string()); 487 } 488 sort(toks.begin(), toks.end()); 489 return JoinStrings(toks, " "); 490} 491 492string ConcatDir(StringPiece b, StringPiece n) { 493 string r; 494 if (!b.empty()) { 495 b.AppendToString(&r); 496 r += '/'; 497 } 498 n.AppendToString(&r); 499 NormalizePath(&r); 500 return r; 501} 502 503string EchoEscape(const string str) { 504 const char *in = str.c_str(); 505 string buf; 506 for (; *in; in++) { 507 switch(*in) { 508 case '\\': 509 buf += "\\\\\\\\"; 510 break; 511 case '\n': 512 buf += "\\n"; 513 break; 514 case '"': 515 buf += "\\\""; 516 break; 517 default: 518 buf += *in; 519 } 520 } 521 return buf; 522} 523 524static bool NeedsShellEscape(char c) { 525 return c == 0 || c == '"' || c == '$' || c == '\\' || c == '`'; 526} 527 528void EscapeShell(string* s) { 529 static const char ranges[] = "\0\0\"\"$$\\\\``"; 530 size_t prev = 0; 531 size_t i = SkipUntil(s->c_str(), s->size(), ranges, 10, NeedsShellEscape); 532 if (i == s->size()) 533 return; 534 535 string r; 536 for (; i < s->size();) { 537 StringPiece(*s).substr(prev, i - prev).AppendToString(&r); 538 char c = (*s)[i]; 539 r += '\\'; 540 if (c == '$') { 541 if ((*s)[i+1] == '$') { 542 r += '$'; 543 i++; 544 } 545 } 546 r += c; 547 i++; 548 prev = i; 549 i += SkipUntil(s->c_str() + i, s->size() - i, ranges, 10, NeedsShellEscape); 550 } 551 StringPiece(*s).substr(prev).AppendToString(&r); 552 s->swap(r); 553} 554