ftp_util.cc revision a1401311d1ab56c4ed0a474bd38c108f75cb0cd9
1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "net/ftp/ftp_util.h"
6
7#include <map>
8#include <vector>
9
10#include "base/i18n/case_conversion.h"
11#include "base/i18n/char_iterator.h"
12#include "base/logging.h"
13#include "base/memory/singleton.h"
14#include "base/strings/string_number_conversions.h"
15#include "base/strings/string_piece.h"
16#include "base/strings/string_split.h"
17#include "base/strings/string_tokenizer.h"
18#include "base/strings/string_util.h"
19#include "base/strings/utf_string_conversions.h"
20#include "base/time/time.h"
21#include "third_party/icu/source/common/unicode/uchar.h"
22#include "third_party/icu/source/i18n/unicode/datefmt.h"
23#include "third_party/icu/source/i18n/unicode/dtfmtsym.h"
24
25using base::ASCIIToUTF16;
26using base::StringPiece16;
27
28// For examples of Unix<->VMS path conversions, see the unit test file. On VMS
29// a path looks differently depending on whether it's a file or directory.
30
31namespace net {
32
33// static
34std::string FtpUtil::UnixFilePathToVMS(const std::string& unix_path) {
35  if (unix_path.empty())
36    return std::string();
37
38  base::StringTokenizer tokenizer(unix_path, "/");
39  std::vector<std::string> tokens;
40  while (tokenizer.GetNext())
41    tokens.push_back(tokenizer.token());
42
43  if (unix_path[0] == '/') {
44    // It's an absolute path.
45
46    if (tokens.empty()) {
47      DCHECK_EQ(1U, unix_path.length());
48      return "[]";
49    }
50
51    if (tokens.size() == 1)
52      return unix_path.substr(1);  // Drop the leading slash.
53
54    std::string result(tokens[0] + ":[");
55    if (tokens.size() == 2) {
56      // Don't ask why, it just works that way on VMS.
57      result.append("000000");
58    } else {
59      result.append(tokens[1]);
60      for (size_t i = 2; i < tokens.size() - 1; i++)
61        result.append("." + tokens[i]);
62    }
63    result.append("]" + tokens[tokens.size() - 1]);
64    return result;
65  }
66
67  if (tokens.size() == 1)
68    return unix_path;
69
70  std::string result("[");
71  for (size_t i = 0; i < tokens.size() - 1; i++)
72    result.append("." + tokens[i]);
73  result.append("]" + tokens[tokens.size() - 1]);
74  return result;
75}
76
77// static
78std::string FtpUtil::UnixDirectoryPathToVMS(const std::string& unix_path) {
79  if (unix_path.empty())
80    return std::string();
81
82  std::string path(unix_path);
83
84  if (path[path.length() - 1] != '/')
85    path.append("/");
86
87  // Reuse logic from UnixFilePathToVMS by appending a fake file name to the
88  // real path and removing it after conversion.
89  path.append("x");
90  path = UnixFilePathToVMS(path);
91  return path.substr(0, path.length() - 1);
92}
93
94// static
95std::string FtpUtil::VMSPathToUnix(const std::string& vms_path) {
96  if (vms_path.empty())
97    return ".";
98
99  if (vms_path[0] == '/') {
100    // This is not really a VMS path. Most likely the server is emulating UNIX.
101    // Return path as-is.
102    return vms_path;
103  }
104
105  if (vms_path == "[]")
106    return "/";
107
108  std::string result(vms_path);
109  if (vms_path[0] == '[') {
110    // It's a relative path.
111    ReplaceFirstSubstringAfterOffset(&result, 0, "[.", std::string());
112  } else {
113    // It's an absolute path.
114    result.insert(0, "/");
115    ReplaceSubstringsAfterOffset(&result, 0, ":[000000]", "/");
116    ReplaceSubstringsAfterOffset(&result, 0, ":[", "/");
117  }
118  std::replace(result.begin(), result.end(), '.', '/');
119  std::replace(result.begin(), result.end(), ']', '/');
120
121  // Make sure the result doesn't end with a slash.
122  if (result.length() && result[result.length() - 1] == '/')
123    result = result.substr(0, result.length() - 1);
124
125  return result;
126}
127
128namespace {
129
130// Lazy-initialized map of abbreviated month names.
131class AbbreviatedMonthsMap {
132 public:
133  static AbbreviatedMonthsMap* GetInstance() {
134    return Singleton<AbbreviatedMonthsMap>::get();
135  }
136
137  // Converts abbreviated month name |text| to its number (in range 1-12).
138  // On success returns true and puts the number in |number|.
139  bool GetMonthNumber(const base::string16& text, int* number) {
140    // Ignore the case of the month names. The simplest way to handle that
141    // is to make everything lowercase.
142    base::string16 text_lower(base::i18n::ToLower(text));
143
144    if (map_.find(text_lower) == map_.end())
145      return false;
146
147    *number = map_[text_lower];
148    return true;
149  }
150
151 private:
152  friend struct DefaultSingletonTraits<AbbreviatedMonthsMap>;
153
154  // Constructor, initializes the map based on ICU data. It is much faster
155  // to do that just once.
156  AbbreviatedMonthsMap() {
157    int32_t locales_count;
158    const icu::Locale* locales =
159        icu::DateFormat::getAvailableLocales(locales_count);
160
161    for (int32_t locale = 0; locale < locales_count; locale++) {
162      UErrorCode status(U_ZERO_ERROR);
163
164      icu::DateFormatSymbols format_symbols(locales[locale], status);
165
166      // If we cannot get format symbols for some locale, it's not a fatal
167      // error. Just try another one.
168      if (U_FAILURE(status))
169        continue;
170
171      int32_t months_count;
172      const icu::UnicodeString* months =
173          format_symbols.getShortMonths(months_count);
174
175      for (int32_t month = 0; month < months_count; month++) {
176        base::string16 month_name(months[month].getBuffer(),
177                            static_cast<size_t>(months[month].length()));
178
179        // Ignore the case of the month names. The simplest way to handle that
180        // is to make everything lowercase.
181        month_name = base::i18n::ToLower(month_name);
182
183        map_[month_name] = month + 1;
184
185        // Sometimes ICU returns longer strings, but in FTP listings a shorter
186        // abbreviation is used (for example for the Russian locale). Make sure
187        // we always have a map entry for a three-letter abbreviation.
188        map_[month_name.substr(0, 3)] = month + 1;
189      }
190    }
191
192    // Fail loudly if the data returned by ICU is obviously incomplete.
193    // This is intended to catch cases like http://crbug.com/177428
194    // much earlier. Note that the issue above turned out to be non-trivial
195    // to reproduce - crash data is much better indicator of a problem
196    // than incomplete bug reports.
197    CHECK_EQ(1, map_[ASCIIToUTF16("jan")]);
198    CHECK_EQ(2, map_[ASCIIToUTF16("feb")]);
199    CHECK_EQ(3, map_[ASCIIToUTF16("mar")]);
200    CHECK_EQ(4, map_[ASCIIToUTF16("apr")]);
201    CHECK_EQ(5, map_[ASCIIToUTF16("may")]);
202    CHECK_EQ(6, map_[ASCIIToUTF16("jun")]);
203    CHECK_EQ(7, map_[ASCIIToUTF16("jul")]);
204    CHECK_EQ(8, map_[ASCIIToUTF16("aug")]);
205    CHECK_EQ(9, map_[ASCIIToUTF16("sep")]);
206    CHECK_EQ(10, map_[ASCIIToUTF16("oct")]);
207    CHECK_EQ(11, map_[ASCIIToUTF16("nov")]);
208    CHECK_EQ(12, map_[ASCIIToUTF16("dec")]);
209  }
210
211  // Maps lowercase month names to numbers in range 1-12.
212  std::map<base::string16, int> map_;
213
214  DISALLOW_COPY_AND_ASSIGN(AbbreviatedMonthsMap);
215};
216
217}  // namespace
218
219// static
220bool FtpUtil::AbbreviatedMonthToNumber(const base::string16& text,
221                                       int* number) {
222  return AbbreviatedMonthsMap::GetInstance()->GetMonthNumber(text, number);
223}
224
225// static
226bool FtpUtil::LsDateListingToTime(const base::string16& month,
227                                  const base::string16& day,
228                                  const base::string16& rest,
229                                  const base::Time& current_time,
230                                  base::Time* result) {
231  base::Time::Exploded time_exploded = { 0 };
232
233  if (!AbbreviatedMonthToNumber(month, &time_exploded.month)) {
234    // Work around garbage sent by some servers in the same column
235    // as the month. Take just last 3 characters of the string.
236    if (month.length() < 3 ||
237        !AbbreviatedMonthToNumber(month.substr(month.length() - 3),
238                                  &time_exploded.month)) {
239      return false;
240    }
241  }
242
243  if (!base::StringToInt(day, &time_exploded.day_of_month))
244    return false;
245  if (time_exploded.day_of_month > 31)
246    return false;
247
248  if (!base::StringToInt(rest, &time_exploded.year)) {
249    // Maybe it's time. Does it look like time? Note that it can be any of
250    // "HH:MM", "H:MM", "HH:M" or maybe even "H:M".
251    if (rest.length() > 5)
252      return false;
253
254    size_t colon_pos = rest.find(':');
255    if (colon_pos == base::string16::npos)
256      return false;
257    if (colon_pos > 2)
258      return false;
259
260    if (!base::StringToInt(
261            StringPiece16(rest.begin(), rest.begin() + colon_pos),
262            &time_exploded.hour)) {
263      return false;
264    }
265    if (!base::StringToInt(
266            StringPiece16(rest.begin() + colon_pos + 1, rest.end()),
267            &time_exploded.minute)) {
268      return false;
269    }
270
271    // Guess the year.
272    base::Time::Exploded current_exploded;
273    current_time.LocalExplode(&current_exploded);
274
275    // If it's not possible for the parsed date to be in the current year,
276    // use the previous year.
277    if (time_exploded.month > current_exploded.month ||
278        (time_exploded.month == current_exploded.month &&
279         time_exploded.day_of_month > current_exploded.day_of_month)) {
280      time_exploded.year = current_exploded.year - 1;
281    } else {
282      time_exploded.year = current_exploded.year;
283    }
284  }
285
286  // We don't know the time zone of the listing, so just use local time.
287  *result = base::Time::FromLocalExploded(time_exploded);
288  return true;
289}
290
291// static
292bool FtpUtil::WindowsDateListingToTime(const base::string16& date,
293                                       const base::string16& time,
294                                       base::Time* result) {
295  base::Time::Exploded time_exploded = { 0 };
296
297  // Date should be in format MM-DD-YY[YY].
298  std::vector<base::string16> date_parts;
299  base::SplitString(date, '-', &date_parts);
300  if (date_parts.size() != 3)
301    return false;
302  if (!base::StringToInt(date_parts[0], &time_exploded.month))
303    return false;
304  if (!base::StringToInt(date_parts[1], &time_exploded.day_of_month))
305    return false;
306  if (!base::StringToInt(date_parts[2], &time_exploded.year))
307    return false;
308  if (time_exploded.year < 0)
309    return false;
310  // If year has only two digits then assume that 00-79 is 2000-2079,
311  // and 80-99 is 1980-1999.
312  if (time_exploded.year < 80)
313    time_exploded.year += 2000;
314  else if (time_exploded.year < 100)
315    time_exploded.year += 1900;
316
317  // Time should be in format HH:MM[(AM|PM)]
318  if (time.length() < 5)
319    return false;
320
321  std::vector<base::string16> time_parts;
322  base::SplitString(time.substr(0, 5), ':', &time_parts);
323  if (time_parts.size() != 2)
324    return false;
325  if (!base::StringToInt(time_parts[0], &time_exploded.hour))
326    return false;
327  if (!base::StringToInt(time_parts[1], &time_exploded.minute))
328    return false;
329  if (!time_exploded.HasValidValues())
330    return false;
331
332  if (time.length() > 5) {
333    if (time.length() != 7)
334      return false;
335    base::string16 am_or_pm(time.substr(5, 2));
336    if (EqualsASCII(am_or_pm, "PM")) {
337      if (time_exploded.hour < 12)
338        time_exploded.hour += 12;
339    } else if (EqualsASCII(am_or_pm, "AM")) {
340      if (time_exploded.hour == 12)
341        time_exploded.hour = 0;
342    } else {
343      return false;
344    }
345  }
346
347  // We don't know the time zone of the server, so just use local time.
348  *result = base::Time::FromLocalExploded(time_exploded);
349  return true;
350}
351
352// static
353base::string16 FtpUtil::GetStringPartAfterColumns(const base::string16& text,
354                                                  int columns) {
355  base::i18n::UTF16CharIterator iter(&text);
356
357  // TODO(jshin): Is u_isspace the right function to use here?
358  for (int i = 0; i < columns; i++) {
359    // Skip the leading whitespace.
360    while (!iter.end() && u_isspace(iter.get()))
361      iter.Advance();
362
363    // Skip the actual text of i-th column.
364    while (!iter.end() && !u_isspace(iter.get()))
365      iter.Advance();
366  }
367
368  base::string16 result(text.substr(iter.array_pos()));
369  base::TrimWhitespace(result, base::TRIM_ALL, &result);
370  return result;
371}
372
373}  // namespace
374