ftp_directory_listing_parser.cc revision 5821806d5e7f356e8fa4b058a389a808ea183019
15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2011 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/ftp/ftp_directory_listing_parser.h"
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/bind.h"
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/callback.h"
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/i18n/icu_encoding_detection.h"
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/i18n/icu_string_conversions.h"
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/stl_util.h"
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/string_split.h"
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/string_util.h"
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/base/net_errors.h"
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/ftp/ftp_directory_listing_parser_ls.h"
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/ftp/ftp_directory_listing_parser_netware.h"
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/ftp/ftp_directory_listing_parser_os2.h"
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/ftp/ftp_directory_listing_parser_vms.h"
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/ftp/ftp_directory_listing_parser_windows.h"
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/ftp/ftp_server_type_histograms.h"
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace net {
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace {
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Fills in |raw_name| for all |entries| using |encoding|. Returns network
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// error code.
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int FillInRawName(const std::string& encoding,
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  std::vector<FtpDirectoryListingEntry>* entries) {
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (size_t i = 0; i < entries->size(); i++) {
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!base::UTF16ToCodepage(entries->at(i).name, encoding.c_str(),
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                               base::OnStringConversionError::FAIL,
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                               &entries->at(i).raw_name)) {
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return ERR_ENCODING_CONVERSION_FAILED;
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return OK;
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Parses |text| as an FTP directory listing. Fills in |entries|
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// and |server_type| and returns network error code.
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int ParseListing(const string16& text,
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                 const std::string& encoding,
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                 const base::Time& current_time,
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                 std::vector<FtpDirectoryListingEntry>* entries,
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                 FtpServerType* server_type) {
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::vector<string16> lines;
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  base::SplitString(text, '\n', &lines);
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  struct {
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    base::Callback<bool(void)> callback;
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    FtpServerType server_type;
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } parsers[] = {
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    {
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      base::Bind(&ParseFtpDirectoryListingLs, lines, current_time, entries),
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      SERVER_LS
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    },
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    {
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      base::Bind(&ParseFtpDirectoryListingWindows, lines, entries),
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      SERVER_WINDOWS
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    },
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    {
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      base::Bind(&ParseFtpDirectoryListingVms, lines, entries),
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      SERVER_VMS
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    },
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    {
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      base::Bind(&ParseFtpDirectoryListingNetware,
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                 lines, current_time, entries),
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      SERVER_NETWARE
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    },
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    {
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      base::Bind(&ParseFtpDirectoryListingOS2, lines, entries),
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      SERVER_OS2
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  };
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(parsers); i++) {
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    entries->clear();
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (parsers[i].callback.Run()) {
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      *server_type = parsers[i].server_type;
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return FillInRawName(encoding, entries);
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  entries->clear();
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return ERR_UNRECOGNIZED_FTP_DIRECTORY_LISTING_FORMAT;
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Detects encoding of |text| and parses it as an FTP directory listing.
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Fills in |entries| and |server_type| and returns network error code.
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int DecodeAndParse(const std::string& text,
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                   const base::Time& current_time,
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                   std::vector<FtpDirectoryListingEntry>* entries,
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                   FtpServerType* server_type) {
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::vector<std::string> encodings;
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!base::DetectAllEncodings(text, &encodings))
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return ERR_ENCODING_DETECTION_FAILED;
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Use first encoding that can be used to decode the text.
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (size_t i = 0; i < encodings.size(); i++) {
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    string16 converted_text;
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (base::CodepageToUTF16(text,
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                              encodings[i].c_str(),
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                              base::OnStringConversionError::FAIL,
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                              &converted_text)) {
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      int rv = ParseListing(converted_text,
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                            encodings[i],
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                            current_time,
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                            entries,
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                            server_type);
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (rv == OK)
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return rv;
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  entries->clear();
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  *server_type = SERVER_UNKNOWN;
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return ERR_UNRECOGNIZED_FTP_DIRECTORY_LISTING_FORMAT;
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)FtpDirectoryListingEntry::FtpDirectoryListingEntry()
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    : type(UNKNOWN),
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      size(-1) {
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int ParseFtpDirectoryListing(const std::string& text,
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             const base::Time& current_time,
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             std::vector<FtpDirectoryListingEntry>* entries) {
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  FtpServerType server_type = SERVER_UNKNOWN;
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int rv = DecodeAndParse(text, current_time, entries, &server_type);
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  UpdateFtpServerTypeHistograms(server_type);
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return rv;
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace net
139