1// Copyright (c) 2011 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "net/ftp/ftp_util.h" 6 7#include <map> 8#include <vector> 9 10#include "base/i18n/case_conversion.h" 11#include "base/i18n/char_iterator.h" 12#include "base/logging.h" 13#include "base/memory/singleton.h" 14#include "base/strings/string_number_conversions.h" 15#include "base/strings/string_piece.h" 16#include "base/strings/string_split.h" 17#include "base/strings/string_tokenizer.h" 18#include "base/strings/string_util.h" 19#include "base/strings/utf_string_conversions.h" 20#include "base/time/time.h" 21#include "third_party/icu/source/common/unicode/uchar.h" 22#include "third_party/icu/source/i18n/unicode/datefmt.h" 23#include "third_party/icu/source/i18n/unicode/dtfmtsym.h" 24 25using base::StringPiece16; 26 27// For examples of Unix<->VMS path conversions, see the unit test file. On VMS 28// a path looks differently depending on whether it's a file or directory. 29 30namespace net { 31 32// static 33std::string FtpUtil::UnixFilePathToVMS(const std::string& unix_path) { 34 if (unix_path.empty()) 35 return std::string(); 36 37 base::StringTokenizer tokenizer(unix_path, "/"); 38 std::vector<std::string> tokens; 39 while (tokenizer.GetNext()) 40 tokens.push_back(tokenizer.token()); 41 42 if (unix_path[0] == '/') { 43 // It's an absolute path. 44 45 if (tokens.empty()) { 46 DCHECK_EQ(1U, unix_path.length()); 47 return "[]"; 48 } 49 50 if (tokens.size() == 1) 51 return unix_path.substr(1); // Drop the leading slash. 52 53 std::string result(tokens[0] + ":["); 54 if (tokens.size() == 2) { 55 // Don't ask why, it just works that way on VMS. 56 result.append("000000"); 57 } else { 58 result.append(tokens[1]); 59 for (size_t i = 2; i < tokens.size() - 1; i++) 60 result.append("." + tokens[i]); 61 } 62 result.append("]" + tokens[tokens.size() - 1]); 63 return result; 64 } 65 66 if (tokens.size() == 1) 67 return unix_path; 68 69 std::string result("["); 70 for (size_t i = 0; i < tokens.size() - 1; i++) 71 result.append("." + tokens[i]); 72 result.append("]" + tokens[tokens.size() - 1]); 73 return result; 74} 75 76// static 77std::string FtpUtil::UnixDirectoryPathToVMS(const std::string& unix_path) { 78 if (unix_path.empty()) 79 return std::string(); 80 81 std::string path(unix_path); 82 83 if (path[path.length() - 1] != '/') 84 path.append("/"); 85 86 // Reuse logic from UnixFilePathToVMS by appending a fake file name to the 87 // real path and removing it after conversion. 88 path.append("x"); 89 path = UnixFilePathToVMS(path); 90 return path.substr(0, path.length() - 1); 91} 92 93// static 94std::string FtpUtil::VMSPathToUnix(const std::string& vms_path) { 95 if (vms_path.empty()) 96 return "."; 97 98 if (vms_path[0] == '/') { 99 // This is not really a VMS path. Most likely the server is emulating UNIX. 100 // Return path as-is. 101 return vms_path; 102 } 103 104 if (vms_path == "[]") 105 return "/"; 106 107 std::string result(vms_path); 108 if (vms_path[0] == '[') { 109 // It's a relative path. 110 ReplaceFirstSubstringAfterOffset(&result, 0, "[.", std::string()); 111 } else { 112 // It's an absolute path. 113 result.insert(0, "/"); 114 ReplaceSubstringsAfterOffset(&result, 0, ":[000000]", "/"); 115 ReplaceSubstringsAfterOffset(&result, 0, ":[", "/"); 116 } 117 std::replace(result.begin(), result.end(), '.', '/'); 118 std::replace(result.begin(), result.end(), ']', '/'); 119 120 // Make sure the result doesn't end with a slash. 121 if (result.length() && result[result.length() - 1] == '/') 122 result = result.substr(0, result.length() - 1); 123 124 return result; 125} 126 127namespace { 128 129// Lazy-initialized map of abbreviated month names. 130class AbbreviatedMonthsMap { 131 public: 132 static AbbreviatedMonthsMap* GetInstance() { 133 return Singleton<AbbreviatedMonthsMap>::get(); 134 } 135 136 // Converts abbreviated month name |text| to its number (in range 1-12). 137 // On success returns true and puts the number in |number|. 138 bool GetMonthNumber(const base::string16& text, int* number) { 139 // Ignore the case of the month names. The simplest way to handle that 140 // is to make everything lowercase. 141 base::string16 text_lower(base::i18n::ToLower(text)); 142 143 if (map_.find(text_lower) == map_.end()) 144 return false; 145 146 *number = map_[text_lower]; 147 return true; 148 } 149 150 private: 151 friend struct DefaultSingletonTraits<AbbreviatedMonthsMap>; 152 153 // Constructor, initializes the map based on ICU data. It is much faster 154 // to do that just once. 155 AbbreviatedMonthsMap() { 156 int32_t locales_count; 157 const icu::Locale* locales = 158 icu::DateFormat::getAvailableLocales(locales_count); 159 160 for (int32_t locale = 0; locale < locales_count; locale++) { 161 UErrorCode status(U_ZERO_ERROR); 162 163 icu::DateFormatSymbols format_symbols(locales[locale], status); 164 165 // If we cannot get format symbols for some locale, it's not a fatal 166 // error. Just try another one. 167 if (U_FAILURE(status)) 168 continue; 169 170 int32_t months_count; 171 const icu::UnicodeString* months = 172 format_symbols.getShortMonths(months_count); 173 174 for (int32_t month = 0; month < months_count; month++) { 175 base::string16 month_name(months[month].getBuffer(), 176 static_cast<size_t>(months[month].length())); 177 178 // Ignore the case of the month names. The simplest way to handle that 179 // is to make everything lowercase. 180 month_name = base::i18n::ToLower(month_name); 181 182 map_[month_name] = month + 1; 183 184 // Sometimes ICU returns longer strings, but in FTP listings a shorter 185 // abbreviation is used (for example for the Russian locale). Make sure 186 // we always have a map entry for a three-letter abbreviation. 187 map_[month_name.substr(0, 3)] = month + 1; 188 } 189 } 190 191 // Fail loudly if the data returned by ICU is obviously incomplete. 192 // This is intended to catch cases like http://crbug.com/177428 193 // much earlier. Note that the issue above turned out to be non-trivial 194 // to reproduce - crash data is much better indicator of a problem 195 // than incomplete bug reports. 196 CHECK_EQ(1, map_[ASCIIToUTF16("jan")]); 197 CHECK_EQ(2, map_[ASCIIToUTF16("feb")]); 198 CHECK_EQ(3, map_[ASCIIToUTF16("mar")]); 199 CHECK_EQ(4, map_[ASCIIToUTF16("apr")]); 200 CHECK_EQ(5, map_[ASCIIToUTF16("may")]); 201 CHECK_EQ(6, map_[ASCIIToUTF16("jun")]); 202 CHECK_EQ(7, map_[ASCIIToUTF16("jul")]); 203 CHECK_EQ(8, map_[ASCIIToUTF16("aug")]); 204 CHECK_EQ(9, map_[ASCIIToUTF16("sep")]); 205 CHECK_EQ(10, map_[ASCIIToUTF16("oct")]); 206 CHECK_EQ(11, map_[ASCIIToUTF16("nov")]); 207 CHECK_EQ(12, map_[ASCIIToUTF16("dec")]); 208 } 209 210 // Maps lowercase month names to numbers in range 1-12. 211 std::map<base::string16, int> map_; 212 213 DISALLOW_COPY_AND_ASSIGN(AbbreviatedMonthsMap); 214}; 215 216} // namespace 217 218// static 219bool FtpUtil::AbbreviatedMonthToNumber(const base::string16& text, 220 int* number) { 221 return AbbreviatedMonthsMap::GetInstance()->GetMonthNumber(text, number); 222} 223 224// static 225bool FtpUtil::LsDateListingToTime(const base::string16& month, 226 const base::string16& day, 227 const base::string16& rest, 228 const base::Time& current_time, 229 base::Time* result) { 230 base::Time::Exploded time_exploded = { 0 }; 231 232 if (!AbbreviatedMonthToNumber(month, &time_exploded.month)) { 233 // Work around garbage sent by some servers in the same column 234 // as the month. Take just last 3 characters of the string. 235 if (month.length() < 3 || 236 !AbbreviatedMonthToNumber(month.substr(month.length() - 3), 237 &time_exploded.month)) { 238 return false; 239 } 240 } 241 242 if (!base::StringToInt(day, &time_exploded.day_of_month)) 243 return false; 244 if (time_exploded.day_of_month > 31) 245 return false; 246 247 if (!base::StringToInt(rest, &time_exploded.year)) { 248 // Maybe it's time. Does it look like time? Note that it can be any of 249 // "HH:MM", "H:MM", "HH:M" or maybe even "H:M". 250 if (rest.length() > 5) 251 return false; 252 253 size_t colon_pos = rest.find(':'); 254 if (colon_pos == base::string16::npos) 255 return false; 256 if (colon_pos > 2) 257 return false; 258 259 if (!base::StringToInt( 260 StringPiece16(rest.begin(), rest.begin() + colon_pos), 261 &time_exploded.hour)) { 262 return false; 263 } 264 if (!base::StringToInt( 265 StringPiece16(rest.begin() + colon_pos + 1, rest.end()), 266 &time_exploded.minute)) { 267 return false; 268 } 269 270 // Guess the year. 271 base::Time::Exploded current_exploded; 272 current_time.LocalExplode(¤t_exploded); 273 274 // If it's not possible for the parsed date to be in the current year, 275 // use the previous year. 276 if (time_exploded.month > current_exploded.month || 277 (time_exploded.month == current_exploded.month && 278 time_exploded.day_of_month > current_exploded.day_of_month)) { 279 time_exploded.year = current_exploded.year - 1; 280 } else { 281 time_exploded.year = current_exploded.year; 282 } 283 } 284 285 // We don't know the time zone of the listing, so just use local time. 286 *result = base::Time::FromLocalExploded(time_exploded); 287 return true; 288} 289 290// static 291bool FtpUtil::WindowsDateListingToTime(const base::string16& date, 292 const base::string16& time, 293 base::Time* result) { 294 base::Time::Exploded time_exploded = { 0 }; 295 296 // Date should be in format MM-DD-YY[YY]. 297 std::vector<base::string16> date_parts; 298 base::SplitString(date, '-', &date_parts); 299 if (date_parts.size() != 3) 300 return false; 301 if (!base::StringToInt(date_parts[0], &time_exploded.month)) 302 return false; 303 if (!base::StringToInt(date_parts[1], &time_exploded.day_of_month)) 304 return false; 305 if (!base::StringToInt(date_parts[2], &time_exploded.year)) 306 return false; 307 if (time_exploded.year < 0) 308 return false; 309 // If year has only two digits then assume that 00-79 is 2000-2079, 310 // and 80-99 is 1980-1999. 311 if (time_exploded.year < 80) 312 time_exploded.year += 2000; 313 else if (time_exploded.year < 100) 314 time_exploded.year += 1900; 315 316 // Time should be in format HH:MM[(AM|PM)] 317 if (time.length() < 5) 318 return false; 319 320 std::vector<base::string16> time_parts; 321 base::SplitString(time.substr(0, 5), ':', &time_parts); 322 if (time_parts.size() != 2) 323 return false; 324 if (!base::StringToInt(time_parts[0], &time_exploded.hour)) 325 return false; 326 if (!base::StringToInt(time_parts[1], &time_exploded.minute)) 327 return false; 328 if (!time_exploded.HasValidValues()) 329 return false; 330 331 if (time.length() > 5) { 332 if (time.length() != 7) 333 return false; 334 base::string16 am_or_pm(time.substr(5, 2)); 335 if (EqualsASCII(am_or_pm, "PM")) { 336 if (time_exploded.hour < 12) 337 time_exploded.hour += 12; 338 } else if (EqualsASCII(am_or_pm, "AM")) { 339 if (time_exploded.hour == 12) 340 time_exploded.hour = 0; 341 } else { 342 return false; 343 } 344 } 345 346 // We don't know the time zone of the server, so just use local time. 347 *result = base::Time::FromLocalExploded(time_exploded); 348 return true; 349} 350 351// static 352base::string16 FtpUtil::GetStringPartAfterColumns(const base::string16& text, 353 int columns) { 354 base::i18n::UTF16CharIterator iter(&text); 355 356 // TODO(jshin): Is u_isspace the right function to use here? 357 for (int i = 0; i < columns; i++) { 358 // Skip the leading whitespace. 359 while (!iter.end() && u_isspace(iter.get())) 360 iter.Advance(); 361 362 // Skip the actual text of i-th column. 363 while (!iter.end() && !u_isspace(iter.get())) 364 iter.Advance(); 365 } 366 367 base::string16 result(text.substr(iter.array_pos())); 368 TrimWhitespace(result, TRIM_ALL, &result); 369 return result; 370} 371 372} // namespace 373