ftp_directory_listing_parser_ls.cc revision a1401311d1ab56c4ed0a474bd38c108f75cb0cd9
1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "net/ftp/ftp_directory_listing_parser_ls.h" 6 7#include <vector> 8 9#include "base/strings/string_number_conversions.h" 10#include "base/strings/string_split.h" 11#include "base/strings/string_util.h" 12#include "base/strings/utf_string_conversions.h" 13#include "base/time/time.h" 14#include "net/ftp/ftp_directory_listing_parser.h" 15#include "net/ftp/ftp_util.h" 16 17namespace { 18 19bool TwoColumnDateListingToTime(const base::string16& date, 20 const base::string16& time, 21 base::Time* result) { 22 base::Time::Exploded time_exploded = { 0 }; 23 24 // Date should be in format YYYY-MM-DD. 25 std::vector<base::string16> date_parts; 26 base::SplitString(date, '-', &date_parts); 27 if (date_parts.size() != 3) 28 return false; 29 if (!base::StringToInt(date_parts[0], &time_exploded.year)) 30 return false; 31 if (!base::StringToInt(date_parts[1], &time_exploded.month)) 32 return false; 33 if (!base::StringToInt(date_parts[2], &time_exploded.day_of_month)) 34 return false; 35 36 // Time should be in format HH:MM 37 if (time.length() != 5) 38 return false; 39 40 std::vector<base::string16> time_parts; 41 base::SplitString(time, ':', &time_parts); 42 if (time_parts.size() != 2) 43 return false; 44 if (!base::StringToInt(time_parts[0], &time_exploded.hour)) 45 return false; 46 if (!base::StringToInt(time_parts[1], &time_exploded.minute)) 47 return false; 48 if (!time_exploded.HasValidValues()) 49 return false; 50 51 // We don't know the time zone of the server, so just use local time. 52 *result = base::Time::FromLocalExploded(time_exploded); 53 return true; 54} 55 56// Returns the column index of the end of the date listing and detected 57// last modification time. 58bool DetectColumnOffsetSizeAndModificationTime( 59 const std::vector<base::string16>& columns, 60 const base::Time& current_time, 61 size_t* offset, 62 base::string16* size, 63 base::Time* modification_time) { 64 // The column offset can be arbitrarily large if some fields 65 // like owner or group name contain spaces. Try offsets from left to right 66 // and use the first one that matches a date listing. 67 // 68 // Here is how a listing line should look like. A star ("*") indicates 69 // a required field: 70 // 71 // * 1. permission listing 72 // 2. number of links (optional) 73 // * 3. owner name (may contain spaces) 74 // 4. group name (optional, may contain spaces) 75 // * 5. size in bytes 76 // * 6. month 77 // * 7. day of month 78 // * 8. year or time <-- column_offset will be the index of this column 79 // 9. file name (optional, may contain spaces) 80 for (size_t i = 5U; i < columns.size(); i++) { 81 if (net::FtpUtil::LsDateListingToTime(columns[i - 2], 82 columns[i - 1], 83 columns[i], 84 current_time, 85 modification_time)) { 86 *size = columns[i - 3]; 87 *offset = i; 88 return true; 89 } 90 } 91 92 // Some FTP listings have swapped the "month" and "day of month" columns 93 // (for example Russian listings). We try to recognize them only after making 94 // sure no column offset works above (this is a more strict way). 95 for (size_t i = 5U; i < columns.size(); i++) { 96 if (net::FtpUtil::LsDateListingToTime(columns[i - 1], 97 columns[i - 2], 98 columns[i], 99 current_time, 100 modification_time)) { 101 *size = columns[i - 3]; 102 *offset = i; 103 return true; 104 } 105 } 106 107 // Some FTP listings use a different date format. 108 for (size_t i = 5U; i < columns.size(); i++) { 109 if (TwoColumnDateListingToTime(columns[i - 1], 110 columns[i], 111 modification_time)) { 112 *size = columns[i - 2]; 113 *offset = i; 114 return true; 115 } 116 } 117 118 return false; 119} 120 121} // namespace 122 123namespace net { 124 125bool ParseFtpDirectoryListingLs( 126 const std::vector<base::string16>& lines, 127 const base::Time& current_time, 128 std::vector<FtpDirectoryListingEntry>* entries) { 129 // True after we have received a "total n" listing header, where n is an 130 // integer. Only one such header is allowed per listing. 131 bool received_total_line = false; 132 133 for (size_t i = 0; i < lines.size(); i++) { 134 if (lines[i].empty()) 135 continue; 136 137 std::vector<base::string16> columns; 138 base::SplitString(base::CollapseWhitespace(lines[i], false), ' ', &columns); 139 140 // Some FTP servers put a "total n" line at the beginning of the listing 141 // (n is an integer). Allow such a line, but only once, and only if it's 142 // the first non-empty line. Do not match the word exactly, because it may 143 // be in different languages (at least English and German have been seen 144 // in the field). 145 if (columns.size() == 2 && !received_total_line) { 146 received_total_line = true; 147 148 int64 total_number; 149 if (!base::StringToInt64(columns[1], &total_number)) 150 return false; 151 if (total_number < 0) 152 return false; 153 154 continue; 155 } 156 157 FtpDirectoryListingEntry entry; 158 159 size_t column_offset; 160 base::string16 size; 161 if (!DetectColumnOffsetSizeAndModificationTime(columns, 162 current_time, 163 &column_offset, 164 &size, 165 &entry.last_modified)) { 166 // Some servers send a message in one of the first few lines. 167 // All those messages have in common is the string ".:", 168 // where "." means the current directory, and ":" separates it 169 // from the rest of the message, which may be empty. 170 if (lines[i].find(base::ASCIIToUTF16(".:")) != base::string16::npos) 171 continue; 172 173 return false; 174 } 175 176 // Do not check "validity" of the permission listing. It's quirky, 177 // and some servers send garbage here while other parts of the line are OK. 178 179 if (!columns[0].empty() && columns[0][0] == 'l') { 180 entry.type = FtpDirectoryListingEntry::SYMLINK; 181 } else if (!columns[0].empty() && columns[0][0] == 'd') { 182 entry.type = FtpDirectoryListingEntry::DIRECTORY; 183 } else { 184 entry.type = FtpDirectoryListingEntry::FILE; 185 } 186 187 if (!base::StringToInt64(size, &entry.size)) { 188 // Some FTP servers do not separate owning group name from file size, 189 // like "group1234". We still want to display the file name for that 190 // entry, but can't really get the size (What if the group is named 191 // "group1", and the size is in fact 234? We can't distinguish between 192 // that and "group" with size 1234). Use a dummy value for the size. 193 // TODO(phajdan.jr): Use a value that means "unknown" instead of 0 bytes. 194 entry.size = 0; 195 } 196 if (entry.size < 0) { 197 // Some FTP servers have bugs that cause them to display the file size 198 // as negative. They're most likely big files like DVD ISO images. 199 // We still want to display them, so just say the real file size 200 // is unknown. 201 entry.size = -1; 202 } 203 if (entry.type != FtpDirectoryListingEntry::FILE) 204 entry.size = -1; 205 206 if (column_offset == columns.size() - 1) { 207 // If the end of the date listing is the last column, there is no file 208 // name. Some FTP servers send listing entries with empty names. 209 // It's not obvious how to display such an entry, so we ignore them. 210 // We don't want to make the parsing fail at this point though. 211 // Other entries can still be useful. 212 continue; 213 } 214 215 entry.name = FtpUtil::GetStringPartAfterColumns(lines[i], 216 column_offset + 1); 217 218 if (entry.type == FtpDirectoryListingEntry::SYMLINK) { 219 base::string16::size_type pos = 220 entry.name.rfind(base::ASCIIToUTF16(" -> ")); 221 222 // We don't require the " -> " to be present. Some FTP servers don't send 223 // the symlink target, possibly for security reasons. 224 if (pos != base::string16::npos) 225 entry.name = entry.name.substr(0, pos); 226 } 227 228 entries->push_back(entry); 229 } 230 231 return true; 232} 233 234} // namespace net 235