ftp_directory_listing_parser_ls.cc revision 5821806d5e7f356e8fa4b058a389a808ea183019
1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "net/ftp/ftp_directory_listing_parser_ls.h" 6 7#include <vector> 8 9#include "base/string_number_conversions.h" 10#include "base/string_split.h" 11#include "base/string_util.h" 12#include "base/time.h" 13#include "base/utf_string_conversions.h" 14#include "net/ftp/ftp_directory_listing_parser.h" 15#include "net/ftp/ftp_util.h" 16 17namespace { 18 19bool LooksLikeUnixPermission(const string16& text) { 20 if (text.length() != 3) 21 return false; 22 23 // Meaning of the flags: 24 // r - file is readable 25 // w - file is writable 26 // x - file is executable 27 // s or S - setuid/setgid bit set 28 // t or T - "sticky" bit set 29 return ((text[0] == 'r' || text[0] == '-') && 30 (text[1] == 'w' || text[1] == '-') && 31 (text[2] == 'x' || text[2] == 's' || text[2] == 'S' || 32 text[2] == 't' || text[2] == 'T' || text[2] == '-')); 33} 34 35bool LooksLikeUnixPermissionsListing(const string16& text) { 36 if (text.length() < 7) 37 return false; 38 39 // Do not check the first character (entry type). There are many weird 40 // servers that use special file types (for example Plan9 and append-only 41 // files). Fortunately, the rest of the permission listing is more consistent. 42 43 // Do not check the rest of the string. Some servers fail to properly 44 // separate this column from the next column (number of links), resulting 45 // in additional characters at the end. Also, sometimes there is a "+" 46 // sign at the end indicating the file has ACLs set. 47 48 // In fact, we don't even expect three "rwx" triplets of permission 49 // listing, as some FTP servers like Hylafax only send two. 50 return (LooksLikeUnixPermission(text.substr(1, 3)) && 51 LooksLikeUnixPermission(text.substr(4, 3))); 52} 53 54// Returns the column index of the end of the date listing and detected 55// last modification time. 56bool DetectColumnOffsetAndModificationTime(const std::vector<string16>& columns, 57 const base::Time& current_time, 58 size_t* offset, 59 base::Time* modification_time) { 60 // The column offset can be arbitrarily large if some fields 61 // like owner or group name contain spaces. Try offsets from left to right 62 // and use the first one that matches a date listing. 63 // 64 // Here is how a listing line should look like. A star ("*") indicates 65 // a required field: 66 // 67 // * 1. permission listing 68 // 2. number of links (optional) 69 // * 3. owner name (may contain spaces) 70 // 4. group name (optional, may contain spaces) 71 // * 5. size in bytes 72 // * 6. month 73 // * 7. day of month 74 // * 8. year or time <-- column_offset will be the index of this column 75 // 9. file name (optional, may contain spaces) 76 for (size_t i = 5U; i < columns.size(); i++) { 77 if (net::FtpUtil::LsDateListingToTime(columns[i - 2], 78 columns[i - 1], 79 columns[i], 80 current_time, 81 modification_time)) { 82 *offset = i; 83 return true; 84 } 85 } 86 87 // Some FTP listings have swapped the "month" and "day of month" columns 88 // (for example Russian listings). We try to recognize them only after making 89 // sure no column offset works above (this is a more strict way). 90 for (size_t i = 5U; i < columns.size(); i++) { 91 if (net::FtpUtil::LsDateListingToTime(columns[i - 1], 92 columns[i - 2], 93 columns[i], 94 current_time, 95 modification_time)) { 96 *offset = i; 97 return true; 98 } 99 } 100 101 return false; 102} 103 104} // namespace 105 106namespace net { 107 108bool ParseFtpDirectoryListingLs( 109 const std::vector<string16>& lines, 110 const base::Time& current_time, 111 std::vector<FtpDirectoryListingEntry>* entries) { 112 // True after we have received a "total n" listing header, where n is an 113 // integer. Only one such header is allowed per listing. 114 bool received_total_line = false; 115 116 for (size_t i = 0; i < lines.size(); i++) { 117 if (lines[i].empty()) 118 continue; 119 120 std::vector<string16> columns; 121 base::SplitString(CollapseWhitespace(lines[i], false), ' ', &columns); 122 123 // Some FTP servers put a "total n" line at the beginning of the listing 124 // (n is an integer). Allow such a line, but only once, and only if it's 125 // the first non-empty line. Do not match the word exactly, because it may 126 // be in different languages (at least English and German have been seen 127 // in the field). 128 if (columns.size() == 2 && !received_total_line) { 129 received_total_line = true; 130 131 int total_number; 132 if (!base::StringToInt(columns[1], &total_number)) 133 return false; 134 if (total_number < 0) 135 return false; 136 137 continue; 138 } 139 140 FtpDirectoryListingEntry entry; 141 142 size_t column_offset; 143 if (!DetectColumnOffsetAndModificationTime(columns, 144 current_time, 145 &column_offset, 146 &entry.last_modified)) { 147 // Some servers send a message in one of the first few lines. 148 // All those messages have in common is the string ".:", 149 // where "." means the current directory, and ":" separates it 150 // from the rest of the message, which may be empty. 151 if (lines[i].find(ASCIIToUTF16(".:")) != string16::npos) 152 continue; 153 154 return false; 155 } 156 157 if (!LooksLikeUnixPermissionsListing(columns[0])) 158 return false; 159 if (columns[0][0] == 'l') { 160 entry.type = FtpDirectoryListingEntry::SYMLINK; 161 } else if (columns[0][0] == 'd') { 162 entry.type = FtpDirectoryListingEntry::DIRECTORY; 163 } else { 164 entry.type = FtpDirectoryListingEntry::FILE; 165 } 166 167 if (!base::StringToInt64(columns[column_offset - 3], &entry.size)) { 168 // Some FTP servers do not separate owning group name from file size, 169 // like "group1234". We still want to display the file name for that 170 // entry, but can't really get the size (What if the group is named 171 // "group1", and the size is in fact 234? We can't distinguish between 172 // that and "group" with size 1234). Use a dummy value for the size. 173 // TODO(phajdan.jr): Use a value that means "unknown" instead of 0 bytes. 174 entry.size = 0; 175 } 176 if (entry.size < 0) { 177 // Some FTP servers have bugs that cause them to display the file size 178 // as negative. They're most likely big files like DVD ISO images. 179 // We still want to display them, so just say the real file size 180 // is unknown. 181 entry.size = -1; 182 } 183 if (entry.type != FtpDirectoryListingEntry::FILE) 184 entry.size = -1; 185 186 if (column_offset == columns.size() - 1) { 187 // If the end of the date listing is the last column, there is no file 188 // name. Some FTP servers send listing entries with empty names. 189 // It's not obvious how to display such an entry, so we ignore them. 190 // We don't want to make the parsing fail at this point though. 191 // Other entries can still be useful. 192 continue; 193 } 194 195 entry.name = FtpUtil::GetStringPartAfterColumns(lines[i], 196 column_offset + 1); 197 198 if (entry.type == FtpDirectoryListingEntry::SYMLINK) { 199 string16::size_type pos = entry.name.rfind(ASCIIToUTF16(" -> ")); 200 201 // We don't require the " -> " to be present. Some FTP servers don't send 202 // the symlink target, possibly for security reasons. 203 if (pos != string16::npos) 204 entry.name = entry.name.substr(0, pos); 205 } 206 207 entries->push_back(entry); 208 } 209 210 return true; 211} 212 213} // namespace net 214