ftp_directory_listing_parser_ls.cc revision 2a99a7e74a7f215066514fe81d2bfa6639d9eddd
1402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll// Use of this source code is governed by a BSD-style license that can be 3402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll// found in the LICENSE file. 4402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll 5402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll#include "net/ftp/ftp_directory_listing_parser_ls.h" 6402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll 7402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll#include <vector> 8402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll 9402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll#include "base/string_number_conversions.h" 10402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll#include "base/string_util.h" 11402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll#include "base/strings/string_split.h" 12402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll#include "base/time.h" 13402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll#include "base/utf_string_conversions.h" 14402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll#include "net/ftp/ftp_directory_listing_parser.h" 15402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll#include "net/ftp/ftp_util.h" 16402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll 17402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Mollnamespace { 18402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll 19402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Mollbool TwoColumnDateListingToTime(const string16& date, 20402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll const string16& time, 21402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll base::Time* result) { 22402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll base::Time::Exploded time_exploded = { 0 }; 23402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll 24402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll // Date should be in format YYYY-MM-DD. 25402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll std::vector<string16> date_parts; 26402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll base::SplitString(date, '-', &date_parts); 27402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll if (date_parts.size() != 3) 28402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll return false; 29402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll if (!base::StringToInt(date_parts[0], &time_exploded.year)) 30402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll return false; 31402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll if (!base::StringToInt(date_parts[1], &time_exploded.month)) 32402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll return false; 33402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll if (!base::StringToInt(date_parts[2], &time_exploded.day_of_month)) 34402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll return false; 35402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll 36402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll // Time should be in format HH:MM 37402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll if (time.length() != 5) 38402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll return false; 39402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll 40402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll std::vector<string16> time_parts; 41402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll base::SplitString(time, ':', &time_parts); 42402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll if (time_parts.size() != 2) 43402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll return false; 44402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll if (!base::StringToInt(time_parts[0], &time_exploded.hour)) 45402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll return false; 46402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll if (!base::StringToInt(time_parts[1], &time_exploded.minute)) 47402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll return false; 48402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll if (!time_exploded.HasValidValues()) 49402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll return false; 50402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll 51402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll // We don't know the time zone of the server, so just use local time. 52402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll *result = base::Time::FromLocalExploded(time_exploded); 53402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll return true; 54402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll} 55402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll 56402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll// Returns the column index of the end of the date listing and detected 57402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll// last modification time. 58402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Mollbool DetectColumnOffsetSizeAndModificationTime( 59402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll const std::vector<string16>& columns, 60402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll const base::Time& current_time, 61402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll size_t* offset, 62402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll string16* size, 63402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll base::Time* modification_time) { 64402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll // The column offset can be arbitrarily large if some fields 65402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll // like owner or group name contain spaces. Try offsets from left to right 66402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll // and use the first one that matches a date listing. 67402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll // 68402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll // Here is how a listing line should look like. A star ("*") indicates 69402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll // a required field: 70402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll // 71402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll // * 1. permission listing 72402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll // 2. number of links (optional) 73402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll // * 3. owner name (may contain spaces) 74402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll // 4. group name (optional, may contain spaces) 75402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll // * 5. size in bytes 76402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll // * 6. month 77402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll // * 7. day of month 78402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll // * 8. year or time <-- column_offset will be the index of this column 79402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll // 9. file name (optional, may contain spaces) 80402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll for (size_t i = 5U; i < columns.size(); i++) { 81402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll if (net::FtpUtil::LsDateListingToTime(columns[i - 2], 82402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll columns[i - 1], 83402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll columns[i], 84402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll current_time, 85402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll modification_time)) { 86402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll *size = columns[i - 3]; 87402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll *offset = i; 88402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll return true; 89402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll } 90402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll } 91402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll 92402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll // Some FTP listings have swapped the "month" and "day of month" columns 93402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll // (for example Russian listings). We try to recognize them only after making 94402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll // sure no column offset works above (this is a more strict way). 95402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll for (size_t i = 5U; i < columns.size(); i++) { 96402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll if (net::FtpUtil::LsDateListingToTime(columns[i - 1], 97402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll columns[i - 2], 98402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll columns[i], 99402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll current_time, 100402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll modification_time)) { 101402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll *size = columns[i - 3]; 102402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll *offset = i; 103402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll return true; 104402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll } 105402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll } 106402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll 107402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll // Some FTP listings use a different date format. 108402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll for (size_t i = 5U; i < columns.size(); i++) { 109402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll if (TwoColumnDateListingToTime(columns[i - 1], 110402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll columns[i], 111402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll modification_time)) { 112402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll *size = columns[i - 2]; 113402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll *offset = i; 114402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll return true; 115402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll } 116402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll } 117402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll 118402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll return false; 119402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll} 120402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll 121402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll} // namespace 122402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll 123402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Mollnamespace net { 124402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll 125402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Mollbool ParseFtpDirectoryListingLs( 126402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll const std::vector<string16>& lines, 127402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll const base::Time& current_time, 128402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll std::vector<FtpDirectoryListingEntry>* entries) { 129402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll // True after we have received a "total n" listing header, where n is an 130402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll // integer. Only one such header is allowed per listing. 131402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll bool received_total_line = false; 132402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll 133402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll for (size_t i = 0; i < lines.size(); i++) { 134402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll if (lines[i].empty()) 135402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll continue; 136402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll 137402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll std::vector<string16> columns; 138402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll base::SplitString(CollapseWhitespace(lines[i], false), ' ', &columns); 139402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll 140402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll // Some FTP servers put a "total n" line at the beginning of the listing 141402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll // (n is an integer). Allow such a line, but only once, and only if it's 142402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll // the first non-empty line. Do not match the word exactly, because it may 143402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll // be in different languages (at least English and German have been seen 144402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll // in the field). 145402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll if (columns.size() == 2 && !received_total_line) { 146402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll received_total_line = true; 147402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll 148402794e73aed8611d62eb4b01cd155e2d76fcb87Raphael Moll int64 total_number; 149 if (!base::StringToInt64(columns[1], &total_number)) 150 return false; 151 if (total_number < 0) 152 return false; 153 154 continue; 155 } 156 157 FtpDirectoryListingEntry entry; 158 159 size_t column_offset; 160 string16 size; 161 if (!DetectColumnOffsetSizeAndModificationTime(columns, 162 current_time, 163 &column_offset, 164 &size, 165 &entry.last_modified)) { 166 // Some servers send a message in one of the first few lines. 167 // All those messages have in common is the string ".:", 168 // where "." means the current directory, and ":" separates it 169 // from the rest of the message, which may be empty. 170 if (lines[i].find(ASCIIToUTF16(".:")) != string16::npos) 171 continue; 172 173 return false; 174 } 175 176 // Do not check "validity" of the permission listing. It's quirky, 177 // and some servers send garbage here while other parts of the line are OK. 178 179 if (!columns[0].empty() && columns[0][0] == 'l') { 180 entry.type = FtpDirectoryListingEntry::SYMLINK; 181 } else if (!columns[0].empty() && columns[0][0] == 'd') { 182 entry.type = FtpDirectoryListingEntry::DIRECTORY; 183 } else { 184 entry.type = FtpDirectoryListingEntry::FILE; 185 } 186 187 if (!base::StringToInt64(size, &entry.size)) { 188 // Some FTP servers do not separate owning group name from file size, 189 // like "group1234". We still want to display the file name for that 190 // entry, but can't really get the size (What if the group is named 191 // "group1", and the size is in fact 234? We can't distinguish between 192 // that and "group" with size 1234). Use a dummy value for the size. 193 // TODO(phajdan.jr): Use a value that means "unknown" instead of 0 bytes. 194 entry.size = 0; 195 } 196 if (entry.size < 0) { 197 // Some FTP servers have bugs that cause them to display the file size 198 // as negative. They're most likely big files like DVD ISO images. 199 // We still want to display them, so just say the real file size 200 // is unknown. 201 entry.size = -1; 202 } 203 if (entry.type != FtpDirectoryListingEntry::FILE) 204 entry.size = -1; 205 206 if (column_offset == columns.size() - 1) { 207 // If the end of the date listing is the last column, there is no file 208 // name. Some FTP servers send listing entries with empty names. 209 // It's not obvious how to display such an entry, so we ignore them. 210 // We don't want to make the parsing fail at this point though. 211 // Other entries can still be useful. 212 continue; 213 } 214 215 entry.name = FtpUtil::GetStringPartAfterColumns(lines[i], 216 column_offset + 1); 217 218 if (entry.type == FtpDirectoryListingEntry::SYMLINK) { 219 string16::size_type pos = entry.name.rfind(ASCIIToUTF16(" -> ")); 220 221 // We don't require the " -> " to be present. Some FTP servers don't send 222 // the symlink target, possibly for security reasons. 223 if (pos != string16::npos) 224 entry.name = entry.name.substr(0, pos); 225 } 226 227 entries->push_back(entry); 228 } 229 230 return true; 231} 232 233} // namespace net 234