ftp_directory_listing_parser_ls.cc revision 5d1f7b1de12d16ceb2c938c56701a3e8bfa558f7
1ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch// Copyright (c) 2012 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/ftp/ftp_directory_listing_parser_ls.h" 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <vector> 85d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/strings/string_number_conversions.h" 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/strings/string_split.h" 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/strings/string_util.h" 12a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)#include "base/strings/utf_string_conversions.h" 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/time/time.h" 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/ftp/ftp_directory_listing_parser.h" 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/ftp/ftp_util.h" 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace { 18868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool TwoColumnDateListingToTime(const base::string16& date, 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const base::string16& time, 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base::Time* result) { 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base::Time::Exploded time_exploded = { 0 }; 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Date should be in format YYYY-MM-DD. 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::vector<base::string16> date_parts; 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base::SplitString(date, '-', &date_parts); 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (date_parts.size() != 3) 28868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) return false; 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!base::StringToInt(date_parts[0], &time_exploded.year)) 30f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return false; 311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (!base::StringToInt(date_parts[1], &time_exploded.month)) 321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return false; 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!base::StringToInt(date_parts[2], &time_exploded.day_of_month)) 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 35868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) 361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci // Time should be in format HH:MM 371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (time.length() != 5) 381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return false; 391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 40f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) std::vector<base::string16> time_parts; 41a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) base::SplitString(time, ':', &time_parts); 42f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) if (time_parts.size() != 2) 43f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return false; 44f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) if (!base::StringToInt(time_parts[0], &time_exploded.hour)) 45f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return false; 46f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) if (!base::StringToInt(time_parts[1], &time_exploded.minute)) 47f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return false; 48a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) if (!time_exploded.HasValidValues()) 49f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return false; 50f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci // We don't know the time zone of the server, so just use local time. 521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci *result = base::Time::FromLocalExploded(time_exploded); 531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return true; 541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// Returns the column index of the end of the date listing and detected 57eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch// last modification time. 58868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)bool DetectColumnOffsetSizeAndModificationTime( 59868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const std::vector<base::string16>& columns, 602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) const base::Time& current_time, 612a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) size_t* offset, 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base::string16* size, 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base::Time* modification_time) { 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The column offset can be arbitrarily large if some fields 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // like owner or group name contain spaces. Try offsets from left to right 662a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // and use the first one that matches a date listing. 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 682a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Here is how a listing line should look like. A star ("*") indicates 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // a required field: 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // * 1. permission listing 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 2. number of links (optional) 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // * 3. owner name (may contain spaces) 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 4. group name (optional, may contain spaces) 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // * 5. size in bytes 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // * 6. month 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // * 7. day of month 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // * 8. year or time <-- column_offset will be the index of this column 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 9. file name (optional, may contain spaces) 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (size_t i = 5U; i < columns.size(); i++) { 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (net::FtpUtil::LsDateListingToTime(columns[i - 2], 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) columns[i - 1], 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) columns[i], 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) current_time, 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) modification_time)) { 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *size = columns[i - 3]; 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *offset = i; 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Some FTP listings have swapped the "month" and "day of month" columns 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // (for example Russian listings). We try to recognize them only after making 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // sure no column offset works above (this is a more strict way). 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (size_t i = 5U; i < columns.size(); i++) { 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (net::FtpUtil::LsDateListingToTime(columns[i - 1], 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) columns[i - 2], 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) columns[i], 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) current_time, 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) modification_time)) { 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *size = columns[i - 3]; 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *offset = i; 103a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) return true; 104a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) } 105a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) } 106a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) 107a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) // Some FTP listings use a different date format. 108a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) for (size_t i = 5U; i < columns.size(); i++) { 109a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) if (TwoColumnDateListingToTime(columns[i - 1], 110a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) columns[i], 111a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) modification_time)) { 112a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) *size = columns[i - 2]; 11358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) *offset = i; 11458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) return true; 115f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 116a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) } 11758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) 1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 123f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)namespace net { 124f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 1252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool ParseFtpDirectoryListingLs( 126a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) const std::vector<base::string16>& lines, 127a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) const base::Time& current_time, 128a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) std::vector<FtpDirectoryListingEntry>* entries) { 1291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci // True after we have received a "total n" listing header, where n is an 1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // integer. Only one such header is allowed per listing. 1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool received_total_line = false; 1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (size_t i = 0; i < lines.size(); i++) { 134a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) if (lines[i].empty()) 1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 137f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) std::vector<base::string16> columns; 138f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) base::SplitString(CollapseWhitespace(lines[i], false), ' ', &columns); 1395d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 140a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) // Some FTP servers put a "total n" line at the beginning of the listing 141a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) // (n is an integer). Allow such a line, but only once, and only if it's 142a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) // the first non-empty line. Do not match the word exactly, because it may 1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // be in different languages (at least English and German have been seen 1442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // in the field). 1452a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (columns.size() == 2 && !received_total_line) { 1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) received_total_line = true; 1475d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1485d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) int64 total_number; 1495d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) if (!base::StringToInt64(columns[1], &total_number)) 1505d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) return false; 1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (total_number < 0) 1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 157 FtpDirectoryListingEntry entry; 158 159 size_t column_offset; 160 base::string16 size; 161 if (!DetectColumnOffsetSizeAndModificationTime(columns, 162 current_time, 163 &column_offset, 164 &size, 165 &entry.last_modified)) { 166 // Some servers send a message in one of the first few lines. 167 // All those messages have in common is the string ".:", 168 // where "." means the current directory, and ":" separates it 169 // from the rest of the message, which may be empty. 170 if (lines[i].find(base::ASCIIToUTF16(".:")) != base::string16::npos) 171 continue; 172 173 return false; 174 } 175 176 // Do not check "validity" of the permission listing. It's quirky, 177 // and some servers send garbage here while other parts of the line are OK. 178 179 if (!columns[0].empty() && columns[0][0] == 'l') { 180 entry.type = FtpDirectoryListingEntry::SYMLINK; 181 } else if (!columns[0].empty() && columns[0][0] == 'd') { 182 entry.type = FtpDirectoryListingEntry::DIRECTORY; 183 } else { 184 entry.type = FtpDirectoryListingEntry::FILE; 185 } 186 187 if (!base::StringToInt64(size, &entry.size)) { 188 // Some FTP servers do not separate owning group name from file size, 189 // like "group1234". We still want to display the file name for that 190 // entry, but can't really get the size (What if the group is named 191 // "group1", and the size is in fact 234? We can't distinguish between 192 // that and "group" with size 1234). Use a dummy value for the size. 193 // TODO(phajdan.jr): Use a value that means "unknown" instead of 0 bytes. 194 entry.size = 0; 195 } 196 if (entry.size < 0) { 197 // Some FTP servers have bugs that cause them to display the file size 198 // as negative. They're most likely big files like DVD ISO images. 199 // We still want to display them, so just say the real file size 200 // is unknown. 201 entry.size = -1; 202 } 203 if (entry.type != FtpDirectoryListingEntry::FILE) 204 entry.size = -1; 205 206 if (column_offset == columns.size() - 1) { 207 // If the end of the date listing is the last column, there is no file 208 // name. Some FTP servers send listing entries with empty names. 209 // It's not obvious how to display such an entry, so we ignore them. 210 // We don't want to make the parsing fail at this point though. 211 // Other entries can still be useful. 212 continue; 213 } 214 215 entry.name = FtpUtil::GetStringPartAfterColumns(lines[i], 216 column_offset + 1); 217 218 if (entry.type == FtpDirectoryListingEntry::SYMLINK) { 219 base::string16::size_type pos = 220 entry.name.rfind(base::ASCIIToUTF16(" -> ")); 221 222 // We don't require the " -> " to be present. Some FTP servers don't send 223 // the symlink target, possibly for security reasons. 224 if (pos != base::string16::npos) 225 entry.name = entry.name.substr(0, pos); 226 } 227 228 entries->push_back(entry); 229 } 230 231 return true; 232} 233 234} // namespace net 235