ftp_directory_listing_parser.cc revision 5d1f7b1de12d16ceb2c938c56701a3e8bfa558f7
15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2011 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/ftp/ftp_directory_listing_parser.h"
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/bind.h"
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/callback.h"
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/i18n/icu_encoding_detection.h"
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/i18n/icu_string_conversions.h"
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/stl_util.h"
125e3f23d412006dc4db4e659864679f29341e113fTorne (Richard Coles)#include "base/strings/string_util.h"
132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "base/strings/string_split.h"
14868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "base/strings/utf_string_conversions.h"
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/base/net_errors.h"
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/ftp/ftp_directory_listing_parser_ls.h"
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/ftp/ftp_directory_listing_parser_netware.h"
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/ftp/ftp_directory_listing_parser_os2.h"
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/ftp/ftp_directory_listing_parser_vms.h"
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/ftp/ftp_directory_listing_parser_windows.h"
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/ftp/ftp_server_type_histograms.h"
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace net {
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace {
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Fills in |raw_name| for all |entries| using |encoding|. Returns network
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// error code.
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int FillInRawName(const std::string& encoding,
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  std::vector<FtpDirectoryListingEntry>* entries) {
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (size_t i = 0; i < entries->size(); i++) {
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!base::UTF16ToCodepage(entries->at(i).name, encoding.c_str(),
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                               base::OnStringConversionError::FAIL,
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                               &entries->at(i).raw_name)) {
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return ERR_ENCODING_CONVERSION_FAILED;
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return OK;
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Parses |text| as an FTP directory listing. Fills in |entries|
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// and |server_type| and returns network error code.
44c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)int ParseListing(const base::string16& text,
45c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                 const base::string16& newline_separator,
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                 const std::string& encoding,
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                 const base::Time& current_time,
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                 std::vector<FtpDirectoryListingEntry>* entries,
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                 FtpServerType* server_type) {
50c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  std::vector<base::string16> lines;
512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  base::SplitStringUsingSubstr(text, newline_separator, &lines);
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  struct {
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    base::Callback<bool(void)> callback;
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    FtpServerType server_type;
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } parsers[] = {
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    {
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      base::Bind(&ParseFtpDirectoryListingLs, lines, current_time, entries),
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      SERVER_LS
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    },
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    {
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      base::Bind(&ParseFtpDirectoryListingWindows, lines, entries),
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      SERVER_WINDOWS
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    },
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    {
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      base::Bind(&ParseFtpDirectoryListingVms, lines, entries),
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      SERVER_VMS
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    },
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    {
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      base::Bind(&ParseFtpDirectoryListingNetware,
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                 lines, current_time, entries),
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      SERVER_NETWARE
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    },
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    {
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      base::Bind(&ParseFtpDirectoryListingOS2, lines, entries),
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      SERVER_OS2
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  };
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(parsers); i++) {
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    entries->clear();
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (parsers[i].callback.Run()) {
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      *server_type = parsers[i].server_type;
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return FillInRawName(encoding, entries);
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  entries->clear();
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return ERR_UNRECOGNIZED_FTP_DIRECTORY_LISTING_FORMAT;
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Detects encoding of |text| and parses it as an FTP directory listing.
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Fills in |entries| and |server_type| and returns network error code.
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int DecodeAndParse(const std::string& text,
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                   const base::Time& current_time,
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                   std::vector<FtpDirectoryListingEntry>* entries,
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                   FtpServerType* server_type) {
982a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  const char* kNewlineSeparators[] = { "\n", "\r\n" };
992a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::vector<std::string> encodings;
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!base::DetectAllEncodings(text, &encodings))
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return ERR_ENCODING_DETECTION_FAILED;
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Use first encoding that can be used to decode the text.
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (size_t i = 0; i < encodings.size(); i++) {
106c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    base::string16 converted_text;
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (base::CodepageToUTF16(text,
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                              encodings[i].c_str(),
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                              base::OnStringConversionError::FAIL,
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                              &converted_text)) {
1112a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      for (size_t j = 0; j < arraysize(kNewlineSeparators); j++) {
1122a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        int rv = ParseListing(converted_text,
1135d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)                              base::ASCIIToUTF16(kNewlineSeparators[j]),
1142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                              encodings[i],
1152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                              current_time,
1162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                              entries,
1172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                              server_type);
1182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        if (rv == OK)
1192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)          return rv;
1202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      }
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  entries->clear();
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  *server_type = SERVER_UNKNOWN;
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return ERR_UNRECOGNIZED_FTP_DIRECTORY_LISTING_FORMAT;
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)FtpDirectoryListingEntry::FtpDirectoryListingEntry()
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    : type(UNKNOWN),
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      size(-1) {
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int ParseFtpDirectoryListing(const std::string& text,
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             const base::Time& current_time,
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             std::vector<FtpDirectoryListingEntry>* entries) {
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  FtpServerType server_type = SERVER_UNKNOWN;
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int rv = DecodeAndParse(text, current_time, entries, &server_type);
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  UpdateFtpServerTypeHistograms(server_type);
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return rv;
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace net
146