ftp_util.cc revision 0f1bc08d4cfcc34181b0b5cbf065c40f687bf740
1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "net/ftp/ftp_util.h"
6
7#include <map>
8#include <vector>
9
10#include "base/i18n/case_conversion.h"
11#include "base/i18n/char_iterator.h"
12#include "base/logging.h"
13#include "base/memory/singleton.h"
14#include "base/strings/string_number_conversions.h"
15#include "base/strings/string_piece.h"
16#include "base/strings/string_split.h"
17#include "base/strings/string_tokenizer.h"
18#include "base/strings/string_util.h"
19#include "base/strings/utf_string_conversions.h"
20#include "base/time/time.h"
21#include "third_party/icu/source/common/unicode/uchar.h"
22#include "third_party/icu/source/i18n/unicode/datefmt.h"
23#include "third_party/icu/source/i18n/unicode/dtfmtsym.h"
24
25using base::StringPiece16;
26
27// For examples of Unix<->VMS path conversions, see the unit test file. On VMS
28// a path looks differently depending on whether it's a file or directory.
29
30namespace net {
31
32// static
33std::string FtpUtil::UnixFilePathToVMS(const std::string& unix_path) {
34  if (unix_path.empty())
35    return std::string();
36
37  base::StringTokenizer tokenizer(unix_path, "/");
38  std::vector<std::string> tokens;
39  while (tokenizer.GetNext())
40    tokens.push_back(tokenizer.token());
41
42  if (unix_path[0] == '/') {
43    // It's an absolute path.
44
45    if (tokens.empty()) {
46      DCHECK_EQ(1U, unix_path.length());
47      return "[]";
48    }
49
50    if (tokens.size() == 1)
51      return unix_path.substr(1);  // Drop the leading slash.
52
53    std::string result(tokens[0] + ":[");
54    if (tokens.size() == 2) {
55      // Don't ask why, it just works that way on VMS.
56      result.append("000000");
57    } else {
58      result.append(tokens[1]);
59      for (size_t i = 2; i < tokens.size() - 1; i++)
60        result.append("." + tokens[i]);
61    }
62    result.append("]" + tokens[tokens.size() - 1]);
63    return result;
64  }
65
66  if (tokens.size() == 1)
67    return unix_path;
68
69  std::string result("[");
70  for (size_t i = 0; i < tokens.size() - 1; i++)
71    result.append("." + tokens[i]);
72  result.append("]" + tokens[tokens.size() - 1]);
73  return result;
74}
75
76// static
77std::string FtpUtil::UnixDirectoryPathToVMS(const std::string& unix_path) {
78  if (unix_path.empty())
79    return std::string();
80
81  std::string path(unix_path);
82
83  if (path[path.length() - 1] != '/')
84    path.append("/");
85
86  // Reuse logic from UnixFilePathToVMS by appending a fake file name to the
87  // real path and removing it after conversion.
88  path.append("x");
89  path = UnixFilePathToVMS(path);
90  return path.substr(0, path.length() - 1);
91}
92
93// static
94std::string FtpUtil::VMSPathToUnix(const std::string& vms_path) {
95  if (vms_path.empty())
96    return ".";
97
98  if (vms_path[0] == '/') {
99    // This is not really a VMS path. Most likely the server is emulating UNIX.
100    // Return path as-is.
101    return vms_path;
102  }
103
104  if (vms_path == "[]")
105    return "/";
106
107  std::string result(vms_path);
108  if (vms_path[0] == '[') {
109    // It's a relative path.
110    ReplaceFirstSubstringAfterOffset(&result, 0, "[.", std::string());
111  } else {
112    // It's an absolute path.
113    result.insert(0, "/");
114    ReplaceSubstringsAfterOffset(&result, 0, ":[000000]", "/");
115    ReplaceSubstringsAfterOffset(&result, 0, ":[", "/");
116  }
117  std::replace(result.begin(), result.end(), '.', '/');
118  std::replace(result.begin(), result.end(), ']', '/');
119
120  // Make sure the result doesn't end with a slash.
121  if (result.length() && result[result.length() - 1] == '/')
122    result = result.substr(0, result.length() - 1);
123
124  return result;
125}
126
127namespace {
128
129// Lazy-initialized map of abbreviated month names.
130class AbbreviatedMonthsMap {
131 public:
132  static AbbreviatedMonthsMap* GetInstance() {
133    return Singleton<AbbreviatedMonthsMap>::get();
134  }
135
136  // Converts abbreviated month name |text| to its number (in range 1-12).
137  // On success returns true and puts the number in |number|.
138  bool GetMonthNumber(const base::string16& text, int* number) {
139    // Ignore the case of the month names. The simplest way to handle that
140    // is to make everything lowercase.
141    base::string16 text_lower(base::i18n::ToLower(text));
142
143    if (map_.find(text_lower) == map_.end())
144      return false;
145
146    *number = map_[text_lower];
147    return true;
148  }
149
150 private:
151  friend struct DefaultSingletonTraits<AbbreviatedMonthsMap>;
152
153  // Constructor, initializes the map based on ICU data. It is much faster
154  // to do that just once.
155  AbbreviatedMonthsMap() {
156    int32_t locales_count;
157    const icu::Locale* locales =
158        icu::DateFormat::getAvailableLocales(locales_count);
159
160    for (int32_t locale = 0; locale < locales_count; locale++) {
161      UErrorCode status(U_ZERO_ERROR);
162
163      icu::DateFormatSymbols format_symbols(locales[locale], status);
164
165      // If we cannot get format symbols for some locale, it's not a fatal
166      // error. Just try another one.
167      if (U_FAILURE(status))
168        continue;
169
170      int32_t months_count;
171      const icu::UnicodeString* months =
172          format_symbols.getShortMonths(months_count);
173
174      for (int32_t month = 0; month < months_count; month++) {
175        base::string16 month_name(months[month].getBuffer(),
176                            static_cast<size_t>(months[month].length()));
177
178        // Ignore the case of the month names. The simplest way to handle that
179        // is to make everything lowercase.
180        month_name = base::i18n::ToLower(month_name);
181
182        map_[month_name] = month + 1;
183
184        // Sometimes ICU returns longer strings, but in FTP listings a shorter
185        // abbreviation is used (for example for the Russian locale). Make sure
186        // we always have a map entry for a three-letter abbreviation.
187        map_[month_name.substr(0, 3)] = month + 1;
188      }
189    }
190
191    // Fail loudly if the data returned by ICU is obviously incomplete.
192    // This is intended to catch cases like http://crbug.com/177428
193    // much earlier. Note that the issue above turned out to be non-trivial
194    // to reproduce - crash data is much better indicator of a problem
195    // than incomplete bug reports.
196    CHECK_EQ(1, map_[ASCIIToUTF16("jan")]);
197    CHECK_EQ(2, map_[ASCIIToUTF16("feb")]);
198    CHECK_EQ(3, map_[ASCIIToUTF16("mar")]);
199    CHECK_EQ(4, map_[ASCIIToUTF16("apr")]);
200    CHECK_EQ(5, map_[ASCIIToUTF16("may")]);
201    CHECK_EQ(6, map_[ASCIIToUTF16("jun")]);
202    CHECK_EQ(7, map_[ASCIIToUTF16("jul")]);
203    CHECK_EQ(8, map_[ASCIIToUTF16("aug")]);
204    CHECK_EQ(9, map_[ASCIIToUTF16("sep")]);
205    CHECK_EQ(10, map_[ASCIIToUTF16("oct")]);
206    CHECK_EQ(11, map_[ASCIIToUTF16("nov")]);
207    CHECK_EQ(12, map_[ASCIIToUTF16("dec")]);
208  }
209
210  // Maps lowercase month names to numbers in range 1-12.
211  std::map<base::string16, int> map_;
212
213  DISALLOW_COPY_AND_ASSIGN(AbbreviatedMonthsMap);
214};
215
216}  // namespace
217
218// static
219bool FtpUtil::AbbreviatedMonthToNumber(const base::string16& text,
220                                       int* number) {
221  return AbbreviatedMonthsMap::GetInstance()->GetMonthNumber(text, number);
222}
223
224// static
225bool FtpUtil::LsDateListingToTime(const base::string16& month,
226                                  const base::string16& day,
227                                  const base::string16& rest,
228                                  const base::Time& current_time,
229                                  base::Time* result) {
230  base::Time::Exploded time_exploded = { 0 };
231
232  if (!AbbreviatedMonthToNumber(month, &time_exploded.month)) {
233    // Work around garbage sent by some servers in the same column
234    // as the month. Take just last 3 characters of the string.
235    if (month.length() < 3 ||
236        !AbbreviatedMonthToNumber(month.substr(month.length() - 3),
237                                  &time_exploded.month)) {
238      return false;
239    }
240  }
241
242  if (!base::StringToInt(day, &time_exploded.day_of_month))
243    return false;
244  if (time_exploded.day_of_month > 31)
245    return false;
246
247  if (!base::StringToInt(rest, &time_exploded.year)) {
248    // Maybe it's time. Does it look like time? Note that it can be any of
249    // "HH:MM", "H:MM", "HH:M" or maybe even "H:M".
250    if (rest.length() > 5)
251      return false;
252
253    size_t colon_pos = rest.find(':');
254    if (colon_pos == string16::npos)
255      return false;
256    if (colon_pos > 2)
257      return false;
258
259    if (!base::StringToInt(
260            StringPiece16(rest.begin(), rest.begin() + colon_pos),
261            &time_exploded.hour)) {
262      return false;
263    }
264    if (!base::StringToInt(
265            StringPiece16(rest.begin() + colon_pos + 1, rest.end()),
266            &time_exploded.minute)) {
267      return false;
268    }
269
270    // Guess the year.
271    base::Time::Exploded current_exploded;
272    current_time.LocalExplode(&current_exploded);
273
274    // If it's not possible for the parsed date to be in the current year,
275    // use the previous year.
276    if (time_exploded.month > current_exploded.month ||
277        (time_exploded.month == current_exploded.month &&
278         time_exploded.day_of_month > current_exploded.day_of_month)) {
279      time_exploded.year = current_exploded.year - 1;
280    } else {
281      time_exploded.year = current_exploded.year;
282    }
283  }
284
285  // We don't know the time zone of the listing, so just use local time.
286  *result = base::Time::FromLocalExploded(time_exploded);
287  return true;
288}
289
290// static
291bool FtpUtil::WindowsDateListingToTime(const base::string16& date,
292                                       const base::string16& time,
293                                       base::Time* result) {
294  base::Time::Exploded time_exploded = { 0 };
295
296  // Date should be in format MM-DD-YY[YY].
297  std::vector<base::string16> date_parts;
298  base::SplitString(date, '-', &date_parts);
299  if (date_parts.size() != 3)
300    return false;
301  if (!base::StringToInt(date_parts[0], &time_exploded.month))
302    return false;
303  if (!base::StringToInt(date_parts[1], &time_exploded.day_of_month))
304    return false;
305  if (!base::StringToInt(date_parts[2], &time_exploded.year))
306    return false;
307  if (time_exploded.year < 0)
308    return false;
309  // If year has only two digits then assume that 00-79 is 2000-2079,
310  // and 80-99 is 1980-1999.
311  if (time_exploded.year < 80)
312    time_exploded.year += 2000;
313  else if (time_exploded.year < 100)
314    time_exploded.year += 1900;
315
316  // Time should be in format HH:MM[(AM|PM)]
317  if (time.length() < 5)
318    return false;
319
320  std::vector<base::string16> time_parts;
321  base::SplitString(time.substr(0, 5), ':', &time_parts);
322  if (time_parts.size() != 2)
323    return false;
324  if (!base::StringToInt(time_parts[0], &time_exploded.hour))
325    return false;
326  if (!base::StringToInt(time_parts[1], &time_exploded.minute))
327    return false;
328  if (!time_exploded.HasValidValues())
329    return false;
330
331  if (time.length() > 5) {
332    if (time.length() != 7)
333      return false;
334    base::string16 am_or_pm(time.substr(5, 2));
335    if (EqualsASCII(am_or_pm, "PM")) {
336      if (time_exploded.hour < 12)
337        time_exploded.hour += 12;
338    } else if (EqualsASCII(am_or_pm, "AM")) {
339      if (time_exploded.hour == 12)
340        time_exploded.hour = 0;
341    } else {
342      return false;
343    }
344  }
345
346  // We don't know the time zone of the server, so just use local time.
347  *result = base::Time::FromLocalExploded(time_exploded);
348  return true;
349}
350
351// static
352base::string16 FtpUtil::GetStringPartAfterColumns(const base::string16& text,
353                                                  int columns) {
354  base::i18n::UTF16CharIterator iter(&text);
355
356  // TODO(jshin): Is u_isspace the right function to use here?
357  for (int i = 0; i < columns; i++) {
358    // Skip the leading whitespace.
359    while (!iter.end() && u_isspace(iter.get()))
360      iter.Advance();
361
362    // Skip the actual text of i-th column.
363    while (!iter.end() && !u_isspace(iter.get()))
364      iter.Advance();
365  }
366
367  base::string16 result(text.substr(iter.array_pos()));
368  TrimWhitespace(result, TRIM_ALL, &result);
369  return result;
370}
371
372}  // namespace
373