ftp_directory_listing_parser.cc revision 5821806d5e7f356e8fa4b058a389a808ea183019
15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2011 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/ftp/ftp_directory_listing_parser.h" 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/bind.h" 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/callback.h" 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/i18n/icu_encoding_detection.h" 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/i18n/icu_string_conversions.h" 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/stl_util.h" 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/string_split.h" 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/string_util.h" 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/base/net_errors.h" 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/ftp/ftp_directory_listing_parser_ls.h" 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/ftp/ftp_directory_listing_parser_netware.h" 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/ftp/ftp_directory_listing_parser_os2.h" 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/ftp/ftp_directory_listing_parser_vms.h" 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/ftp/ftp_directory_listing_parser_windows.h" 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/ftp/ftp_server_type_histograms.h" 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace net { 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace { 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Fills in |raw_name| for all |entries| using |encoding|. Returns network 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// error code. 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int FillInRawName(const std::string& encoding, 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::vector<FtpDirectoryListingEntry>* entries) { 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (size_t i = 0; i < entries->size(); i++) { 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!base::UTF16ToCodepage(entries->at(i).name, encoding.c_str(), 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base::OnStringConversionError::FAIL, 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) &entries->at(i).raw_name)) { 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return ERR_ENCODING_CONVERSION_FAILED; 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return OK; 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Parses |text| as an FTP directory listing. Fills in |entries| 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// and |server_type| and returns network error code. 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int ParseListing(const string16& text, 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::string& encoding, 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const base::Time& current_time, 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::vector<FtpDirectoryListingEntry>* entries, 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FtpServerType* server_type) { 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::vector<string16> lines; 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base::SplitString(text, '\n', &lines); 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) struct { 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base::Callback<bool(void)> callback; 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FtpServerType server_type; 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } parsers[] = { 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) { 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base::Bind(&ParseFtpDirectoryListingLs, lines, current_time, entries), 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SERVER_LS 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) }, 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) { 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base::Bind(&ParseFtpDirectoryListingWindows, lines, entries), 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SERVER_WINDOWS 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) }, 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) { 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base::Bind(&ParseFtpDirectoryListingVms, lines, entries), 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SERVER_VMS 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) }, 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) { 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base::Bind(&ParseFtpDirectoryListingNetware, 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) lines, current_time, entries), 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SERVER_NETWARE 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) }, 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) { 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base::Bind(&ParseFtpDirectoryListingOS2, lines, entries), 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SERVER_OS2 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) }; 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (size_t i = 0; i < ARRAYSIZE_UNSAFE(parsers); i++) { 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) entries->clear(); 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (parsers[i].callback.Run()) { 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *server_type = parsers[i].server_type; 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return FillInRawName(encoding, entries); 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) entries->clear(); 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return ERR_UNRECOGNIZED_FTP_DIRECTORY_LISTING_FORMAT; 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Detects encoding of |text| and parses it as an FTP directory listing. 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Fills in |entries| and |server_type| and returns network error code. 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int DecodeAndParse(const std::string& text, 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const base::Time& current_time, 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::vector<FtpDirectoryListingEntry>* entries, 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FtpServerType* server_type) { 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::vector<std::string> encodings; 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!base::DetectAllEncodings(text, &encodings)) 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return ERR_ENCODING_DETECTION_FAILED; 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Use first encoding that can be used to decode the text. 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (size_t i = 0; i < encodings.size(); i++) { 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) string16 converted_text; 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (base::CodepageToUTF16(text, 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) encodings[i].c_str(), 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base::OnStringConversionError::FAIL, 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) &converted_text)) { 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int rv = ParseListing(converted_text, 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) encodings[i], 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) current_time, 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) entries, 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) server_type); 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (rv == OK) 1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return rv; 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) entries->clear(); 1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *server_type = SERVER_UNKNOWN; 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return ERR_UNRECOGNIZED_FTP_DIRECTORY_LISTING_FORMAT; 1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace 1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)FtpDirectoryListingEntry::FtpDirectoryListingEntry() 1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) : type(UNKNOWN), 1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) size(-1) { 1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int ParseFtpDirectoryListing(const std::string& text, 1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const base::Time& current_time, 1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::vector<FtpDirectoryListingEntry>* entries) { 1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FtpServerType server_type = SERVER_UNKNOWN; 1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int rv = DecodeAndParse(text, current_time, entries, &server_type); 1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) UpdateFtpServerTypeHistograms(server_type); 1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return rv; 1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace net 139