15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2011 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/ftp/ftp_directory_listing_parser.h" 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/bind.h" 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/callback.h" 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/i18n/icu_encoding_detection.h" 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/i18n/icu_string_conversions.h" 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/stl_util.h" 125e3f23d412006dc4db4e659864679f29341e113fTorne (Richard Coles)#include "base/strings/string_util.h" 132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "base/strings/string_split.h" 14868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "base/strings/utf_string_conversions.h" 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/base/net_errors.h" 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/ftp/ftp_directory_listing_parser_ls.h" 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/ftp/ftp_directory_listing_parser_netware.h" 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/ftp/ftp_directory_listing_parser_os2.h" 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/ftp/ftp_directory_listing_parser_vms.h" 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/ftp/ftp_directory_listing_parser_windows.h" 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/ftp/ftp_server_type_histograms.h" 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace net { 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace { 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Fills in |raw_name| for all |entries| using |encoding|. Returns network 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// error code. 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int FillInRawName(const std::string& encoding, 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::vector<FtpDirectoryListingEntry>* entries) { 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (size_t i = 0; i < entries->size(); i++) { 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!base::UTF16ToCodepage(entries->at(i).name, encoding.c_str(), 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base::OnStringConversionError::FAIL, 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) &entries->at(i).raw_name)) { 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return ERR_ENCODING_CONVERSION_FAILED; 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return OK; 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Parses |text| as an FTP directory listing. Fills in |entries| 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// and |server_type| and returns network error code. 44c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)int ParseListing(const base::string16& text, 45c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const base::string16& newline_separator, 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::string& encoding, 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const base::Time& current_time, 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::vector<FtpDirectoryListingEntry>* entries, 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FtpServerType* server_type) { 50c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) std::vector<base::string16> lines; 512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) base::SplitStringUsingSubstr(text, newline_separator, &lines); 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) struct { 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base::Callback<bool(void)> callback; 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FtpServerType server_type; 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } parsers[] = { 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) { 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base::Bind(&ParseFtpDirectoryListingLs, lines, current_time, entries), 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SERVER_LS 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) }, 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) { 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base::Bind(&ParseFtpDirectoryListingWindows, lines, entries), 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SERVER_WINDOWS 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) }, 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) { 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base::Bind(&ParseFtpDirectoryListingVms, lines, entries), 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SERVER_VMS 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) }, 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) { 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base::Bind(&ParseFtpDirectoryListingNetware, 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) lines, current_time, entries), 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SERVER_NETWARE 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) }, 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) { 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base::Bind(&ParseFtpDirectoryListingOS2, lines, entries), 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SERVER_OS2 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) }; 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (size_t i = 0; i < ARRAYSIZE_UNSAFE(parsers); i++) { 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) entries->clear(); 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (parsers[i].callback.Run()) { 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *server_type = parsers[i].server_type; 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return FillInRawName(encoding, entries); 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) entries->clear(); 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return ERR_UNRECOGNIZED_FTP_DIRECTORY_LISTING_FORMAT; 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Detects encoding of |text| and parses it as an FTP directory listing. 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Fills in |entries| and |server_type| and returns network error code. 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int DecodeAndParse(const std::string& text, 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const base::Time& current_time, 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::vector<FtpDirectoryListingEntry>* entries, 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FtpServerType* server_type) { 982a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) const char* kNewlineSeparators[] = { "\n", "\r\n" }; 992a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::vector<std::string> encodings; 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!base::DetectAllEncodings(text, &encodings)) 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return ERR_ENCODING_DETECTION_FAILED; 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Use first encoding that can be used to decode the text. 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (size_t i = 0; i < encodings.size(); i++) { 106c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) base::string16 converted_text; 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (base::CodepageToUTF16(text, 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) encodings[i].c_str(), 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base::OnStringConversionError::FAIL, 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) &converted_text)) { 1112a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) for (size_t j = 0; j < arraysize(kNewlineSeparators); j++) { 1122a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) int rv = ParseListing(converted_text, 1135d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) base::ASCIIToUTF16(kNewlineSeparators[j]), 1142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) encodings[i], 1152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) current_time, 1162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) entries, 1172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) server_type); 1182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (rv == OK) 1192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return rv; 1202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) entries->clear(); 1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *server_type = SERVER_UNKNOWN; 1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return ERR_UNRECOGNIZED_FTP_DIRECTORY_LISTING_FORMAT; 1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace 1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)FtpDirectoryListingEntry::FtpDirectoryListingEntry() 1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) : type(UNKNOWN), 1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) size(-1) { 1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int ParseFtpDirectoryListing(const std::string& text, 1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const base::Time& current_time, 1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::vector<FtpDirectoryListingEntry>* entries) { 1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FtpServerType server_type = SERVER_UNKNOWN; 1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int rv = DecodeAndParse(text, current_time, entries, &server_type); 1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) UpdateFtpServerTypeHistograms(server_type); 1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return rv; 1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace net 146