1# -*- coding: utf-8 -*- 2# Copyright 2013 Google Inc. All Rights Reserved. 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15"""Implementation of Unix-like du command for cloud storage providers.""" 16 17from __future__ import absolute_import 18 19import sys 20 21from gslib.boto_translation import S3_DELETE_MARKER_GUID 22from gslib.bucket_listing_ref import BucketListingObject 23from gslib.command import Command 24from gslib.command_argument import CommandArgument 25from gslib.cs_api_map import ApiSelector 26from gslib.exception import CommandException 27from gslib.ls_helper import LsHelper 28from gslib.storage_url import ContainsWildcard 29from gslib.storage_url import StorageUrlFromString 30from gslib.util import MakeHumanReadable 31from gslib.util import NO_MAX 32from gslib.util import UTF8 33 34_SYNOPSIS = """ 35 gsutil du url... 36""" 37 38_DETAILED_HELP_TEXT = (""" 39<B>SYNOPSIS</B> 40""" + _SYNOPSIS + """ 41 42 43<B>DESCRIPTION</B> 44 The du command displays the amount of space (in bytes) being used by the 45 objects in the file or object hierarchy under a given URL. The syntax emulates 46 the Linux du command (which stands for disk usage). For example, the command: 47 48 gsutil du -s gs://your-bucket/dir 49 50 will report the total space used by all objects under gs://your-bucket/dir and 51 any sub-directories. 52 53 54<B>OPTIONS</B> 55 -0 Ends each output line with a 0 byte rather than a newline. This 56 can be useful to make the output more easily machine-readable. 57 58 -a Includes non-current object versions / generations in the listing 59 (only useful with a versioning-enabled bucket). Also prints 60 generation and metageneration for each listed object. 61 62 -c Produce a grand total. 63 64 -e A pattern to exclude from reporting. Example: -e "*.o" would 65 exclude any object that ends in ".o". Can be specified multiple 66 times. 67 68 -h Prints object sizes in human-readable format (e.g., 1 KiB, 69 234 MiB, 2GiB, etc.) 70 71 -s Display only a summary total for each argument. 72 73 -X Similar to -e, but excludes patterns from the given file. The 74 patterns to exclude should be one per line. 75 76 77<B>EXAMPLES</B> 78 To list the size of all objects in a bucket: 79 80 gsutil du gs://bucketname 81 82 To list the size of all objects underneath a prefix: 83 84 gsutil du gs://bucketname/prefix/* 85 86 To print the total number of bytes in a bucket, in human-readable form: 87 88 gsutil du -ch gs://bucketname 89 90 To see a summary of the total bytes in the two given buckets: 91 92 gsutil du -s gs://bucket1 gs://bucket2 93 94 To list the size of all objects in a versioned bucket, including objects that 95 are not the latest: 96 97 gsutil du -a gs://bucketname 98 99 To list all objects in a bucket, except objects that end in ".bak", 100 with each object printed ending in a null byte: 101 102 gsutil du -e "*.bak" -0 gs://bucketname 103 104 To get a total of all buckets in a project with a grand total for an entire 105 project: 106 107 gsutil -o GSUtil:default_project_id=project-name du -shc 108""") 109 110 111class DuCommand(Command): 112 """Implementation of gsutil du command.""" 113 114 # Command specification. See base class for documentation. 115 command_spec = Command.CreateCommandSpec( 116 'du', 117 command_name_aliases=[], 118 usage_synopsis=_SYNOPSIS, 119 min_args=0, 120 max_args=NO_MAX, 121 supported_sub_args='0ace:hsX:', 122 file_url_ok=False, 123 provider_url_ok=True, 124 urls_start_arg=0, 125 gs_api_support=[ApiSelector.XML, ApiSelector.JSON], 126 gs_default_api=ApiSelector.JSON, 127 argparse_arguments=[ 128 CommandArgument.MakeZeroOrMoreCloudURLsArgument() 129 ] 130 ) 131 # Help specification. See help_provider.py for documentation. 132 help_spec = Command.HelpSpec( 133 help_name='du', 134 help_name_aliases=[], 135 help_type='command_help', 136 help_one_line_summary='Display object size usage', 137 help_text=_DETAILED_HELP_TEXT, 138 subcommand_help_text={}, 139 ) 140 141 def _PrintSummaryLine(self, num_bytes, name): 142 size_string = (MakeHumanReadable(num_bytes) 143 if self.human_readable else str(num_bytes)) 144 sys.stdout.write('%(size)-10s %(name)s%(ending)s' % { 145 'size': size_string, 'name': name, 'ending': self.line_ending}) 146 147 def _PrintInfoAboutBucketListingRef(self, bucket_listing_ref): 148 """Print listing info for given bucket_listing_ref. 149 150 Args: 151 bucket_listing_ref: BucketListing being listed. 152 153 Returns: 154 Tuple (number of objects, object size) 155 156 Raises: 157 Exception: if calling bug encountered. 158 """ 159 obj = bucket_listing_ref.root_object 160 url_str = bucket_listing_ref.url_string 161 if (obj.metadata and S3_DELETE_MARKER_GUID in 162 obj.metadata.additionalProperties): 163 size_string = '0' 164 num_bytes = 0 165 num_objs = 0 166 url_str += '<DeleteMarker>' 167 else: 168 size_string = (MakeHumanReadable(obj.size) 169 if self.human_readable else str(obj.size)) 170 num_bytes = obj.size 171 num_objs = 1 172 173 if not self.summary_only: 174 sys.stdout.write('%(size)-10s %(url)s%(ending)s' % { 175 'size': size_string, 176 'url': url_str.encode(UTF8), 177 'ending': self.line_ending}) 178 179 return (num_objs, num_bytes) 180 181 def RunCommand(self): 182 """Command entry point for the du command.""" 183 self.line_ending = '\n' 184 self.all_versions = False 185 self.produce_total = False 186 self.human_readable = False 187 self.summary_only = False 188 self.exclude_patterns = [] 189 if self.sub_opts: 190 for o, a in self.sub_opts: 191 if o == '-0': 192 self.line_ending = '\0' 193 elif o == '-a': 194 self.all_versions = True 195 elif o == '-c': 196 self.produce_total = True 197 elif o == '-e': 198 self.exclude_patterns.append(a) 199 elif o == '-h': 200 self.human_readable = True 201 elif o == '-s': 202 self.summary_only = True 203 elif o == '-X': 204 if a == '-': 205 f = sys.stdin 206 else: 207 f = open(a, 'r') 208 try: 209 for line in f: 210 line = line.strip() 211 if line: 212 self.exclude_patterns.append(line) 213 finally: 214 f.close() 215 216 if not self.args: 217 # Default to listing all gs buckets. 218 self.args = ['gs://'] 219 220 total_bytes = 0 221 got_nomatch_errors = False 222 223 def _PrintObjectLong(blr): 224 return self._PrintInfoAboutBucketListingRef(blr) 225 226 def _PrintNothing(unused_blr=None): 227 pass 228 229 def _PrintDirectory(num_bytes, name): 230 if not self.summary_only: 231 self._PrintSummaryLine(num_bytes, name) 232 233 for url_arg in self.args: 234 top_level_storage_url = StorageUrlFromString(url_arg) 235 if top_level_storage_url.IsFileUrl(): 236 raise CommandException('Only cloud URLs are supported for %s' 237 % self.command_name) 238 bucket_listing_fields = ['size'] 239 240 ls_helper = LsHelper( 241 self.WildcardIterator, self.logger, 242 print_object_func=_PrintObjectLong, print_dir_func=_PrintNothing, 243 print_dir_header_func=_PrintNothing, 244 print_dir_summary_func=_PrintDirectory, 245 print_newline_func=_PrintNothing, all_versions=self.all_versions, 246 should_recurse=True, exclude_patterns=self.exclude_patterns, 247 fields=bucket_listing_fields) 248 249 # ls_helper expands to objects and prefixes, so perform a top-level 250 # expansion first. 251 if top_level_storage_url.IsProvider(): 252 # Provider URL: use bucket wildcard to iterate over all buckets. 253 top_level_iter = self.WildcardIterator( 254 '%s://*' % top_level_storage_url.scheme).IterBuckets( 255 bucket_fields=['id']) 256 elif top_level_storage_url.IsBucket(): 257 top_level_iter = self.WildcardIterator( 258 '%s://%s' % (top_level_storage_url.scheme, 259 top_level_storage_url.bucket_name)).IterBuckets( 260 bucket_fields=['id']) 261 else: 262 top_level_iter = [BucketListingObject(top_level_storage_url)] 263 264 for blr in top_level_iter: 265 storage_url = blr.storage_url 266 if storage_url.IsBucket() and self.summary_only: 267 storage_url = StorageUrlFromString( 268 storage_url.CreatePrefixUrl(wildcard_suffix='**')) 269 _, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint(storage_url) 270 if (storage_url.IsObject() and exp_objs == 0 and 271 ContainsWildcard(url_arg) and not self.exclude_patterns): 272 got_nomatch_errors = True 273 total_bytes += exp_bytes 274 275 if self.summary_only: 276 self._PrintSummaryLine(exp_bytes, blr.url_string.rstrip('/')) 277 278 if self.produce_total: 279 self._PrintSummaryLine(total_bytes, 'total') 280 281 if got_nomatch_errors: 282 raise CommandException('One or more URLs matched no objects.') 283 284 return 0 285