18d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# -*- coding: utf-8 -*- 28d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# Copyright 2014 Google Inc. All Rights Reserved. 38d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# 48d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# Licensed under the Apache License, Version 2.0 (the "License"); 58d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# you may not use this file except in compliance with the License. 68d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# You may obtain a copy of the License at 78d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# 88d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# http://www.apache.org/licenses/LICENSE-2.0 98d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# 108d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# Unless required by applicable law or agreed to in writing, software 118d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# distributed under the License is distributed on an "AS IS" BASIS, 128d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 138d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# See the License for the specific language governing permissions and 148d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# limitations under the License. 158d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi"""Implementation of Unix-like rsync command.""" 168d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 178d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoifrom __future__ import absolute_import 188d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 198d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoiimport errno 208d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoiimport heapq 218d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoiimport io 228d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoifrom itertools import islice 238d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoiimport os 248d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoiimport re 258d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoiimport tempfile 268d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoiimport textwrap 278d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoiimport traceback 288d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoiimport urllib 298d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 308d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoifrom boto import config 318d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoiimport crcmod 328d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 338d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoifrom gslib import copy_helper 34cef7893435aa41160dd1255c43cb8498279738ccChris Craikfrom gslib.bucket_listing_ref import BucketListingObject 358d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoifrom gslib.cloud_api import NotFoundException 368d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoifrom gslib.command import Command 378d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoifrom gslib.command import DummyArgChecker 388d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoifrom gslib.command_argument import CommandArgument 398d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoifrom gslib.copy_helper import CreateCopyHelperOpts 408d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoifrom gslib.copy_helper import SkipUnsupportedObjectError 418d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoifrom gslib.cs_api_map import ApiSelector 428d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoifrom gslib.exception import CommandException 438d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoifrom gslib.hashing_helper import CalculateB64EncodedCrc32cFromContents 448d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoifrom gslib.hashing_helper import CalculateB64EncodedMd5FromContents 458d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoifrom gslib.hashing_helper import SLOW_CRCMOD_WARNING 468d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoifrom gslib.plurality_checkable_iterator import PluralityCheckableIterator 478d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoifrom gslib.sig_handling import GetCaughtSignals 488d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoifrom gslib.sig_handling import RegisterSignalHandler 498d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoifrom gslib.storage_url import StorageUrlFromString 508d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoifrom gslib.util import GetCloudApiInstance 518d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoifrom gslib.util import IsCloudSubdirPlaceholder 528d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoifrom gslib.util import TEN_MIB 538d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoifrom gslib.util import UsingCrcmodExtension 548d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoifrom gslib.util import UTF8 558d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoifrom gslib.wildcard_iterator import CreateWildcardIterator 568d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 578d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 588d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi_SYNOPSIS = """ 598d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi gsutil rsync [-c] [-C] [-d] [-e] [-n] [-p] [-r] [-U] [-x] src_url dst_url 608d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi""" 618d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 628d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi_DETAILED_HELP_TEXT = (""" 638d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi<B>SYNOPSIS</B> 648d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi""" + _SYNOPSIS + """ 658d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 668d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 678d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi<B>DESCRIPTION</B> 688d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi The gsutil rsync command makes the contents under dst_url the same as the 698d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi contents under src_url, by copying any missing files/objects, and (if the 708d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi -d option is specified) deleting any extra files/objects. For example, to 718d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi make gs://mybucket/data match the contents of the local directory "data" 728d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi you could do: 738d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 748d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi gsutil rsync -d data gs://mybucket/data 758d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 768d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi To recurse into directories use the -r option: 778d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 788d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi gsutil rsync -d -r data gs://mybucket/data 798d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 808d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi To copy only new/changed files without deleting extra files from 818d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi gs://mybucket/data leave off the -d option: 828d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 838d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi gsutil rsync -r data gs://mybucket/data 848d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 858d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi If you have a large number of objects to synchronize you might want to use the 868d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi gsutil -m option, to perform parallel (multi-threaded/multi-processing) 878d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi synchronization: 888d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 898d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi gsutil -m rsync -d -r data gs://mybucket/data 908d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 918d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi The -m option typically will provide a large performance boost if either the 928d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi source or destination (or both) is a cloud URL. If both source and 938d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi destination are file URLs the -m option will typically thrash the disk and 948d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi slow synchronization down. 958d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 968d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi To make the local directory "data" the same as the contents of 978d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi gs://mybucket/data: 988d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 998d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi gsutil rsync -d -r gs://mybucket/data data 1008d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1018d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi To make the contents of gs://mybucket2 the same as gs://mybucket1: 1028d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1038d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi gsutil rsync -d -r gs://mybucket1 gs://mybucket2 1048d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1058d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi You can also mirror data across local directories: 1068d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1078d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi gsutil rsync -d -r dir1 dir2 1088d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1098d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi To mirror your content across clouds: 1108d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1118d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi gsutil rsync -d -r gs://my-gs-bucket s3://my-s3-bucket 1128d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1138d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Note: If you are synchronizing a large amount of data between clouds you might 1148d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi consider setting up a 1158d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi `Google Compute Engine <https://cloud.google.com/products/compute-engine>`_ 1168d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi account and running gsutil there. Since cross-provider gsutil data transfers 1178d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi flow through the machine where gsutil is running, doing this can make your 1188d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi transfer run significantly faster than running gsutil on your local 1198d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi workstation. 1208d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1218d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1228d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi<B>BE CAREFUL WHEN USING -d OPTION!</B> 1238d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi The rsync -d option is very useful and commonly used, because it provides a 1248d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi means of making the contents of a destination bucket or directory match those 1258d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi of a source bucket or directory. However, please exercise caution when you 1268d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi use this option: It's possible to delete large amounts of data accidentally 1278d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if, for example, you erroneously reverse source and destination. For example, 1288d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if you meant to synchronize a local directory from a bucket in the cloud but 1298d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi instead run the command: 1308d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1318d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi gsutil -m rsync -r -d ./your-dir gs://your-bucket 1328d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1338d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi and your-dir is currently empty, you will quickly delete all of the objects in 1348d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi gs://your-bucket. 1358d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1368d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi You can also cause large amounts of data to be lost quickly by specifying a 1378d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi subdirectory of the destination as the source of an rsync. For example, the 1388d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi command: 1398d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1408d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi gsutil -m rsync -r -d gs://your-bucket/data gs://your-bucket 1418d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1428d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi would cause most or all of the objects in gs://your-bucket to be deleted 1438d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi (some objects may survive if there are any with names that sort lower than 1448d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi "data" under gs://your-bucket/data). 1458d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1468d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi In addition to paying careful attention to the source and destination you 1478d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi specify with the rsync command, there are two more safety measures your can 1488d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi take when using gsutil rsync -d: 1498d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1508d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1. Try running the command with the rsync -n option first, to see what it 1518d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi would do without actually performing the operations. For example, if 1528d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi you run the command: 1538d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1548d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi gsutil -m rsync -r -d -n gs://your-bucket/data gs://your-bucket 1558d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1568d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi it will be immediately evident that running that command without the -n 1578d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi option would cause many objects to be deleted. 1588d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1598d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 2. Enable object versioning in your bucket, which will allow you to restore 1608d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi objects if you accidentally delete them. For more details see 1618d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi "gsutil help versions". 1628d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1638d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1648d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi<B>IMPACT OF BUCKET LISTING EVENTUAL CONSISTENCY</B> 1658d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi The rsync command operates by listing the source and destination URLs, and 1668d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi then performing copy and remove operations according to the differences 1678d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi between these listings. Because bucket listing is eventually (not strongly) 1688d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi consistent, if you upload new objects or delete objects from a bucket and then 1698d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi immediately run gsutil rsync with that bucket as the source or destination, 1708d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi it's possible the rsync command will not see the recent updates and thus 1718d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi synchronize incorrectly. You can rerun the rsync operation again later to 1728d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi correct the incorrect synchronization. 1738d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1748d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1758d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi<B>CHECKSUM VALIDATION AND FAILURE HANDLING</B> 1768d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi At the end of every upload or download, the gsutil rsync command validates 1778d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi that the checksum of the source file/object matches the checksum of the 1788d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi destination file/object. If the checksums do not match, gsutil will delete 1798d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi the invalid copy and print a warning message. This very rarely happens, but 1808d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if it does, please contact gs-team@google.com. 1818d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1828d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi The rsync command will retry when failures occur, but if enough failures 1838d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi happen during a particular copy or delete operation the command will skip that 1848d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi object and move on. At the end of the synchronization run if any failures were 1858d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi not successfully retried, the rsync command will report the count of failures, 1868d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi and exit with non-zero status. At this point you can run the rsync command 1878d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi again, and it will attempt any remaining needed copy and/or delete operations. 1888d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1898d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Note that there are cases where retrying will never succeed, such as if you 1908d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi don't have write permission to the destination bucket or if the destination 1918d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi path for some objects is longer than the maximum allowed length. 1928d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1938d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi For more details about gsutil's retry handling, please see 1948d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi "gsutil help retries". 1958d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1968d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1978d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi<B>CHANGE DETECTION ALGORITHM</B> 1988d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi To determine if a file or object has changed gsutil rsync first checks whether 1998d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi the source and destination sizes match. If they match, it next checks if their 2008d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi checksums match, using checksums if available (see below). Unlike the Unix 2018d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi rsync command, gsutil rsync does not use timestamps to determine if the 2028d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi file/object changed, because the GCS API does not permit the caller to set an 2038d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi object's timestamp (hence, timestamps of identical files/objects cannot be 2048d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi made to match). 2058d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 2068d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Checksums will not be available in two cases: 2078d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 2088d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1. When synchronizing to or from a file system. By default, gsutil does not 2098d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi checksum files, because of the slowdown caused when working with large 2108d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi files. You can cause gsutil to checksum files by using the gsutil rsync -c 2118d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi option, at the cost of increased local disk I/O and run time when working 2128d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi with large files. You should consider using the -c option if your files can 2138d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi change without changing sizes (e.g., if you have files that contain fixed 2148d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi width data, such as timestamps). 2158d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 2168d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 2. When comparing composite GCS objects with objects at a cloud provider that 2178d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi does not support CRC32C (which is the only checksum available for composite 2188d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi objects). See 'gsutil help compose' for details about composite objects. 2198d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 2208d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 2218d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi<B>COPYING IN THE CLOUD AND METADATA PRESERVATION</B> 2228d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi If both the source and destination URL are cloud URLs from the same provider, 2238d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi gsutil copies data "in the cloud" (i.e., without downloading to and uploading 2248d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi from the machine where you run gsutil). In addition to the performance and 2258d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi cost advantages of doing this, copying in the cloud preserves metadata (like 2268d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Content-Type and Cache-Control). In contrast, when you download data from the 2278d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi cloud it ends up in a file, which has no associated metadata. Thus, unless you 2288d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi have some way to hold on to or re-create that metadata, synchronizing a bucket 2298d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi to a directory in the local file system will not retain the metadata. 2308d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 2318d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Note that by default, the gsutil rsync command does not copy the ACLs of 2328d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi objects being synchronized and instead will use the default bucket ACL (see 2338d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi "gsutil help defacl"). You can override this behavior with the -p option (see 2348d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi OPTIONS below). 2358d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 2368d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 2378d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi<B>SLOW CHECKSUMS</B> 2388d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi If you find that CRC32C checksum computation runs slowly, this is likely 2398d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi because you don't have a compiled CRC32c on your system. Try running: 2408d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 2418d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi gsutil ver -l 2428d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 2438d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi If the output contains: 2448d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 2458d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi compiled crcmod: False 2468d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 2478d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi you are running a Python library for computing CRC32C, which is much slower 2488d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi than using the compiled code. For information on getting a compiled CRC32C 2498d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi implementation, see 'gsutil help crc32c'. 2508d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 2518d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 2528d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi<B>LIMITATIONS</B> 2538d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1. The gsutil rsync command doesn't make the destination object's timestamps 2548d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi match those of the source object (it can't; timestamp setting is not 2558d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi allowed by the GCS API). 2568d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 257cef7893435aa41160dd1255c43cb8498279738ccChris Craik 2. The gsutil rsync command considers only the current object generations in 258cef7893435aa41160dd1255c43cb8498279738ccChris Craik the source and destination buckets when deciding what to copy / delete. If 259cef7893435aa41160dd1255c43cb8498279738ccChris Craik versioning is enabled in the destination bucket then gsutil rsync's 260cef7893435aa41160dd1255c43cb8498279738ccChris Craik overwriting or deleting objects will end up creating versions, but the 261cef7893435aa41160dd1255c43cb8498279738ccChris Craik command doesn't try to make the archived generations match in the source 262cef7893435aa41160dd1255c43cb8498279738ccChris Craik and destination buckets. 263cef7893435aa41160dd1255c43cb8498279738ccChris Craik 2648d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 2658d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 2668d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi<B>OPTIONS</B> 2678d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi -c Causes the rsync command to compute checksums for files if the 2688d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi size of source and destination match, and then compare 2698d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi checksums. This option increases local disk I/O and run time 2708d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if either src_url or dst_url are on the local file system. 2718d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 2728d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi -C If an error occurs, continue to attempt to copy the remaining 2738d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi files. If errors occurred, gsutil's exit status will be non-zero 2748d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi even if this flag is set. This option is implicitly set when 2758d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi running "gsutil -m rsync...". Note: -C only applies to the 2768d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi actual copying operation. If an error occurs while iterating 2778d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi over the files in the local directory (e.g., invalid Unicode 2788d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi file name) gsutil will print an error message and abort. 2798d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 2808d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi -d Delete extra files under dst_url not found under src_url. By 2818d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi default extra files are not deleted. Note: this option can 2828d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi delete data quickly if you specify the wrong source/destination 2838d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi combination. See the help section above, 2848d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi "BE CAREFUL WHEN USING -d OPTION!". 2858d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 2868d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi -e Exclude symlinks. When specified, symbolic links will be 2878d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi ignored. 2888d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 2898d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi -n Causes rsync to run in "dry run" mode, i.e., just outputting 2908d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi what would be copied or deleted without actually doing any 2918d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi copying/deleting. 2928d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 2938d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi -p Causes ACLs to be preserved when synchronizing in the cloud. 2948d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Note that this option has performance and cost implications when 2958d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi using the XML API, as it requires separate HTTP calls for 2968d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi interacting with ACLs. The performance issue can be mitigated to 2978d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi some degree by using gsutil -m rsync to cause parallel 2988d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi synchronization. Also, this option only works if you have OWNER 2998d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi access to all of the objects that are copied. 3008d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 3018d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi You can avoid the additional performance and cost of using 3028d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi rsync -p if you want all objects in the destination bucket to 3038d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi end up with the same ACL by setting a default object ACL on that 3048d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi bucket instead of using rsync -p. See 'help gsutil defacl'. 3058d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 3068d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi -R, -r Causes directories, buckets, and bucket subdirectories to be 3078d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi synchronized recursively. If you neglect to use this option 3088d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi gsutil will make only the top-level directory in the source 3098d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi and destination URLs match, skipping any sub-directories. 3108d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 3118d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi -U Skip objects with unsupported object types instead of failing. 312cef7893435aa41160dd1255c43cb8498279738ccChris Craik Unsupported object types are Amazon S3 Objects in the GLACIER 313cef7893435aa41160dd1255c43cb8498279738ccChris Craik storage class. 3148d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 3158d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi -x pattern Causes files/objects matching pattern to be excluded, i.e., any 3168d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi matching files/objects will not be copied or deleted. Note that 3178d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi the pattern is a Python regular expression, not a wildcard (so, 3188d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi matching any string ending in 'abc' would be specified using 3198d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi '.*abc' rather than '*abc'). Note also that the exclude path is 3208d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi always relative (similar to Unix rsync or tar exclude options). 3218d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi For example, if you run the command: 3228d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 3238d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi gsutil rsync -x 'data./.*\\.txt' dir gs://my-bucket 3248d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 3258d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi it will skip the file dir/data1/a.txt. 3268d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 3278d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi You can use regex alternation to specify multiple exclusions, 3288d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi for example: 3298d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 3308d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi gsutil rsync -x '.*\\.txt|.*\\.jpg' dir gs://my-bucket 3318d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi""") 3328d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 3338d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 3348d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoiclass _DiffAction(object): 3358d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi COPY = 'copy' 3368d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi REMOVE = 'remove' 3378d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 3388d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 3398d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi_NA = '-' 3408d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi_OUTPUT_BUFFER_SIZE = 64 * 1024 3418d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi_PROGRESS_REPORT_LISTING_COUNT = 10000 3428d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 3438d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 3448d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# Tracks files we need to clean up at end or if interrupted. 3458d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi_tmp_files = [] 3468d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 3478d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 3488d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# pylint: disable=unused-argument 3498d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoidef _HandleSignals(signal_num, cur_stack_frame): 3508d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """Called when rsync command is killed with SIGINT, SIGQUIT or SIGTERM.""" 3518d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi CleanUpTempFiles() 3528d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 3538d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 3548d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoidef CleanUpTempFiles(): 3558d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """Cleans up temp files. 3568d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 3578d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi This function allows the main (RunCommand) function to clean up at end of 3588d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi operation, or if gsutil rsync is interrupted (e.g., via ^C). This is necessary 3598d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi because tempfile.NamedTemporaryFile doesn't allow the created file to be 3608d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi re-opened in read mode on Windows, so we have to use tempfile.mkstemp, which 3618d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi doesn't automatically delete temp files. 3628d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """ 3638d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi try: 3648d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi for fname in _tmp_files: 3658d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi os.unlink(fname) 3668d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi except: # pylint: disable=bare-except 3678d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi pass 3688d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 3698d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 3708d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoiclass _DiffToApply(object): 3718d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """Class that encapsulates info needed to apply diff for one object.""" 3728d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 3738d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi def __init__(self, src_url_str, dst_url_str, diff_action): 3748d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """Constructor. 3758d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 3768d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Args: 3778d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi src_url_str: The source URL string, or None if diff_action is REMOVE. 3788d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi dst_url_str: The destination URL string. 3798d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi diff_action: _DiffAction to be applied. 3808d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """ 3818d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.src_url_str = src_url_str 3828d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.dst_url_str = dst_url_str 3838d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.diff_action = diff_action 3848d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 3858d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 3868d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoidef _DiffToApplyArgChecker(command_instance, diff_to_apply): 3878d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """Arg checker that skips symlinks if -e flag specified.""" 3888d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if (diff_to_apply.diff_action == _DiffAction.REMOVE 3898d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi or not command_instance.exclude_symlinks): 3908d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # No src URL is populated for REMOVE actions. 3918d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi return True 3928d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi exp_src_url = StorageUrlFromString(diff_to_apply.src_url_str) 3938d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if exp_src_url.IsFileUrl() and os.path.islink(exp_src_url.object_name): 3948d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi command_instance.logger.info('Skipping symbolic link %s...', exp_src_url) 3958d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi return False 3968d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi return True 3978d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 3988d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 3998d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoidef _ComputeNeededFileChecksums(logger, src_url_str, src_size, src_crc32c, 4008d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi src_md5, dst_url_str, dst_size, dst_crc32c, 4018d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi dst_md5): 4028d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """Computes any file checksums needed by _ObjectsMatch. 4038d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 4048d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Args: 4058d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi logger: logging.logger for outputting log messages. 4068d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi src_url_str: Source URL string. 4078d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi src_size: Source size 4088d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi src_crc32c: Source CRC32c. 4098d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi src_md5: Source MD5. 4108d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi dst_url_str: Destination URL string. 4118d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi dst_size: Destination size 4128d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi dst_crc32c: Destination CRC32c. 4138d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi dst_md5: Destination MD5. 4148d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 4158d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Returns: 4168d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi (src_crc32c, src_md5, dst_crc32c, dst_md5) 4178d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """ 4188d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi src_url = StorageUrlFromString(src_url_str) 4198d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi dst_url = StorageUrlFromString(dst_url_str) 4208d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if src_url.IsFileUrl(): 4218d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if dst_crc32c != _NA or dst_url.IsFileUrl(): 4228d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if src_size > TEN_MIB: 4238d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi logger.info('Computing MD5 for %s...', src_url_str) 4248d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi with open(src_url.object_name, 'rb') as fp: 4258d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi src_crc32c = CalculateB64EncodedCrc32cFromContents(fp) 4268d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi elif dst_md5 != _NA or dst_url.IsFileUrl(): 4278d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if dst_size > TEN_MIB: 4288d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi logger.info('Computing MD5 for %s...', dst_url_str) 4298d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi with open(src_url.object_name, 'rb') as fp: 4308d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi src_md5 = CalculateB64EncodedMd5FromContents(fp) 4318d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if dst_url.IsFileUrl(): 4328d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if src_crc32c != _NA: 4338d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if src_size > TEN_MIB: 4348d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi logger.info('Computing CRC32C for %s...', src_url_str) 4358d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi with open(dst_url.object_name, 'rb') as fp: 4368d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi dst_crc32c = CalculateB64EncodedCrc32cFromContents(fp) 4378d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi elif src_md5 != _NA: 4388d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if dst_size > TEN_MIB: 4398d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi logger.info('Computing CRC32C for %s...', dst_url_str) 4408d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi with open(dst_url.object_name, 'rb') as fp: 4418d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi dst_md5 = CalculateB64EncodedMd5FromContents(fp) 4428d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi return (src_crc32c, src_md5, dst_crc32c, dst_md5) 4438d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 4448d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 4458d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoidef _ListUrlRootFunc(cls, args_tuple, thread_state=None): 4468d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """Worker function for listing files/objects under to be sync'd. 4478d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 4488d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Outputs sorted list to out_file_name, formatted per _BuildTmpOutputLine. We 4498d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi sort the listed URLs because we don't want to depend on consistent sort 4508d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi order across file systems and cloud providers. 4518d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 4528d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Args: 4538d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi cls: Command instance. 4548d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi args_tuple: (base_url_str, out_file_name, desc), where base_url_str is 4558d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi top-level URL string to list; out_filename is name of file to 4568d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi which sorted output should be written; desc is 'source' or 4578d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 'destination'. 4588d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi thread_state: gsutil Cloud API instance to use. 4598d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """ 4608d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi gsutil_api = GetCloudApiInstance(cls, thread_state=thread_state) 4618d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi (base_url_str, out_filename, desc) = args_tuple 4628d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # We sort while iterating over base_url_str, allowing parallelism of batched 4638d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # sorting with collecting the listing. 4648d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi out_file = io.open(out_filename, mode='w', encoding=UTF8) 4658d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi try: 4668d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi _BatchSort(_FieldedListingIterator(cls, gsutil_api, base_url_str, desc), 4678d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi out_file) 4688d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi except Exception as e: # pylint: disable=broad-except 4698d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # Abandon rsync if an exception percolates up to this layer - retryable 4708d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # exceptions are handled in the lower layers, so we got a non-retryable 4718d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # exception (like 404 bucket not found) and proceeding would either be 4728d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # futile or could result in data loss - for example: 4738d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # gsutil rsync -d gs://non-existent-bucket ./localdir 4748d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # would delete files from localdir. 4758d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi cls.logger.error( 4768d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 'Caught non-retryable exception while listing %s: %s' % 4778d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi (base_url_str, e)) 4788d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi cls.non_retryable_listing_failures = 1 4798d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi out_file.close() 4808d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 4818d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 482cef7893435aa41160dd1255c43cb8498279738ccChris Craikdef _LocalDirIterator(base_url): 483cef7893435aa41160dd1255c43cb8498279738ccChris Craik """A generator that yields a BLR for each file in a local directory. 484cef7893435aa41160dd1255c43cb8498279738ccChris Craik 485cef7893435aa41160dd1255c43cb8498279738ccChris Craik We use this function instead of WildcardIterator for listing a local 486cef7893435aa41160dd1255c43cb8498279738ccChris Craik directory without recursion, because the glob.globi implementation called 487cef7893435aa41160dd1255c43cb8498279738ccChris Craik by WildcardIterator skips "dot" files (which we don't want to do when 488cef7893435aa41160dd1255c43cb8498279738ccChris Craik synchronizing to or from a local directory). 489cef7893435aa41160dd1255c43cb8498279738ccChris Craik 490cef7893435aa41160dd1255c43cb8498279738ccChris Craik Args: 491cef7893435aa41160dd1255c43cb8498279738ccChris Craik base_url: URL for the directory over which to iterate. 492cef7893435aa41160dd1255c43cb8498279738ccChris Craik 493cef7893435aa41160dd1255c43cb8498279738ccChris Craik Yields: 494cef7893435aa41160dd1255c43cb8498279738ccChris Craik BucketListingObject for each file in the directory. 495cef7893435aa41160dd1255c43cb8498279738ccChris Craik """ 496cef7893435aa41160dd1255c43cb8498279738ccChris Craik for filename in os.listdir(base_url.object_name): 497cef7893435aa41160dd1255c43cb8498279738ccChris Craik filename = os.path.join(base_url.object_name, filename) 498cef7893435aa41160dd1255c43cb8498279738ccChris Craik if os.path.isfile(filename): 499cef7893435aa41160dd1255c43cb8498279738ccChris Craik yield BucketListingObject(StorageUrlFromString(filename), None) 500cef7893435aa41160dd1255c43cb8498279738ccChris Craik 501cef7893435aa41160dd1255c43cb8498279738ccChris Craik 5028d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoidef _FieldedListingIterator(cls, gsutil_api, base_url_str, desc): 5038d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """Iterator over base_url_str formatting output per _BuildTmpOutputLine. 5048d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 5058d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Args: 5068d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi cls: Command instance. 5078d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi gsutil_api: gsutil Cloud API instance to use for bucket listing. 5088d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi base_url_str: The top-level URL string over which to iterate. 5098d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi desc: 'source' or 'destination'. 5108d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 5118d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Yields: 5128d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Output line formatted per _BuildTmpOutputLine. 5138d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """ 514cef7893435aa41160dd1255c43cb8498279738ccChris Craik base_url = StorageUrlFromString(base_url_str) 515cef7893435aa41160dd1255c43cb8498279738ccChris Craik if base_url.scheme == 'file' and not cls.recursion_requested: 516cef7893435aa41160dd1255c43cb8498279738ccChris Craik iterator = _LocalDirIterator(base_url) 5178d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi else: 518cef7893435aa41160dd1255c43cb8498279738ccChris Craik if cls.recursion_requested: 519cef7893435aa41160dd1255c43cb8498279738ccChris Craik wildcard = '%s/**' % base_url_str.rstrip('/\\') 520cef7893435aa41160dd1255c43cb8498279738ccChris Craik else: 521cef7893435aa41160dd1255c43cb8498279738ccChris Craik wildcard = '%s/*' % base_url_str.rstrip('/\\') 522cef7893435aa41160dd1255c43cb8498279738ccChris Craik iterator = CreateWildcardIterator( 523cef7893435aa41160dd1255c43cb8498279738ccChris Craik wildcard, gsutil_api, debug=cls.debug, 524cef7893435aa41160dd1255c43cb8498279738ccChris Craik project_id=cls.project_id).IterObjects( 525cef7893435aa41160dd1255c43cb8498279738ccChris Craik # Request just the needed fields, to reduce bandwidth usage. 526cef7893435aa41160dd1255c43cb8498279738ccChris Craik bucket_listing_fields=['crc32c', 'md5Hash', 'name', 'size']) 527cef7893435aa41160dd1255c43cb8498279738ccChris Craik 5288d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi i = 0 529cef7893435aa41160dd1255c43cb8498279738ccChris Craik for blr in iterator: 5308d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # Various GUI tools (like the GCS web console) create placeholder objects 5318d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # ending with '/' when the user creates an empty directory. Normally these 5328d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # tools should delete those placeholders once objects have been written 5338d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # "under" the directory, but sometimes the placeholders are left around. 5348d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # We need to filter them out here, otherwise if the user tries to rsync 5358d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # from GCS to a local directory it will result in a directory/file 5368d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # conflict (e.g., trying to download an object called "mydata/" where the 5378d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # local directory "mydata" exists). 5388d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi url = blr.storage_url 5398d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if IsCloudSubdirPlaceholder(url, blr=blr): 540cef7893435aa41160dd1255c43cb8498279738ccChris Craik # We used to output the message 'Skipping cloud sub-directory placeholder 541cef7893435aa41160dd1255c43cb8498279738ccChris Craik # object...' but we no longer do so because it caused customer confusion. 5428d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi continue 5438d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if (cls.exclude_symlinks and url.IsFileUrl() 5448d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi and os.path.islink(url.object_name)): 5458d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi continue 5468d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if cls.exclude_pattern: 5478d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi str_to_check = url.url_string[len(base_url_str):] 5488d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if str_to_check.startswith(url.delim): 5498d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi str_to_check = str_to_check[1:] 5508d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if cls.exclude_pattern.match(str_to_check): 5518d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi continue 5528d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi i += 1 5538d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if i % _PROGRESS_REPORT_LISTING_COUNT == 0: 5548d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi cls.logger.info('At %s listing %d...', desc, i) 5558d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi yield _BuildTmpOutputLine(blr) 5568d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 5578d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 5588d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoidef _BuildTmpOutputLine(blr): 5598d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """Builds line to output to temp file for given BucketListingRef. 5608d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 5618d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Args: 5628d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi blr: The BucketListingRef. 5638d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 5648d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Returns: 5658d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi The output line, formatted as _EncodeUrl(URL)<sp>size<sp>crc32c<sp>md5 5668d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi where crc32c will only be present for GCS URLs, and md5 will only be 5678d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi present for cloud URLs that aren't composite objects. A missing field is 5688d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi populated with '-'. 5698d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """ 5708d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi crc32c = _NA 5718d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi md5 = _NA 5728d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi url = blr.storage_url 5738d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if url.IsFileUrl(): 5748d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi size = os.path.getsize(url.object_name) 5758d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi elif url.IsCloudUrl(): 5768d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi size = blr.root_object.size 5778d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi crc32c = blr.root_object.crc32c or _NA 5788d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi md5 = blr.root_object.md5Hash or _NA 5798d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi else: 5808d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi raise CommandException('Got unexpected URL type (%s)' % url.scheme) 5818d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi return '%s %d %s %s\n' % (_EncodeUrl(url.url_string), size, crc32c, md5) 5828d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 5838d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 5848d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoidef _EncodeUrl(url_string): 5858d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """Encodes url_str with quote plus encoding and UTF8 character encoding. 5868d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 5878d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi We use this for all URL encodings. 5888d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 5898d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Args: 5908d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi url_string: String URL to encode. 5918d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 5928d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Returns: 5938d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi encoded URL. 5948d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """ 5958d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi return urllib.quote_plus(url_string.encode(UTF8)) 5968d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 5978d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 5988d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoidef _DecodeUrl(enc_url_string): 5998d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """Inverts encoding from EncodeUrl. 6008d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 6018d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Args: 6028d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi enc_url_string: String URL to decode. 6038d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 6048d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Returns: 6058d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi decoded URL. 6068d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """ 6078d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi return urllib.unquote_plus(enc_url_string).decode(UTF8) 6088d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 6098d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 6108d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# pylint: disable=bare-except 6118d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoidef _BatchSort(in_iter, out_file): 6128d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """Sorts input lines from in_iter and outputs to out_file. 6138d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 6148d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Sorts in batches as input arrives, so input file does not need to be loaded 6158d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi into memory all at once. Derived from Python Recipe 466302: Sorting big 6168d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi files the Python 2.4 way by Nicolas Lehuen. 6178d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 6188d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Sorted format is per _BuildTmpOutputLine. We're sorting on the entire line 6198d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi when we could just sort on the first record (URL); but the sort order is 6208d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi identical either way. 6218d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 6228d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Args: 6238d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi in_iter: Input iterator. 6248d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi out_file: Output file. 6258d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """ 6268d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # Note: If chunk_files gets very large we can run out of open FDs. See .boto 6278d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # file comments about rsync_buffer_lines. If increasing rsync_buffer_lines 6288d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # doesn't suffice (e.g., for someone synchronizing with a really large 6298d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # bucket), an option would be to make gsutil merge in passes, never 6308d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # opening all chunk files simultaneously. 6318d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi buffer_size = config.getint('GSUtil', 'rsync_buffer_lines', 32000) 6328d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi chunk_files = [] 6338d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi try: 6348d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi while True: 6358d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi current_chunk = sorted(islice(in_iter, buffer_size)) 6368d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if not current_chunk: 6378d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi break 6388d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi output_chunk = io.open('%s-%06i' % (out_file.name, len(chunk_files)), 6398d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi mode='w+', encoding=UTF8) 6408d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi chunk_files.append(output_chunk) 6418d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi output_chunk.writelines(unicode(''.join(current_chunk))) 6428d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi output_chunk.flush() 6438d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi output_chunk.seek(0) 6448d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi out_file.writelines(heapq.merge(*chunk_files)) 6458d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi except IOError as e: 6468d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if e.errno == errno.EMFILE: 6478d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi raise CommandException('\n'.join(textwrap.wrap( 6488d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 'Synchronization failed because too many open file handles were ' 6498d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 'needed while building synchronization state. Please see the ' 6508d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 'comments about rsync_buffer_lines in your .boto config file for a ' 6518d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 'possible way to address this problem.'))) 6528d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi raise 6538d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi finally: 6548d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi for chunk_file in chunk_files: 6558d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi try: 6568d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi chunk_file.close() 6578d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi os.remove(chunk_file.name) 6588d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi except: 6598d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi pass 6608d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 6618d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 6628d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoiclass _DiffIterator(object): 6638d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """Iterator yielding sequence of _DiffToApply objects.""" 6648d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 6658d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi def __init__(self, command_obj, base_src_url, base_dst_url): 6668d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.command_obj = command_obj 6678d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.compute_file_checksums = command_obj.compute_file_checksums 6688d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.delete_extras = command_obj.delete_extras 6698d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.recursion_requested = command_obj.recursion_requested 6708d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.logger = self.command_obj.logger 6718d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.base_src_url = base_src_url 6728d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.base_dst_url = base_dst_url 6738d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.logger.info('Building synchronization state...') 6748d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 6758d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi (src_fh, self.sorted_list_src_file_name) = tempfile.mkstemp( 6768d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi prefix='gsutil-rsync-src-') 6778d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi _tmp_files.append(self.sorted_list_src_file_name) 6788d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi (dst_fh, self.sorted_list_dst_file_name) = tempfile.mkstemp( 6798d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi prefix='gsutil-rsync-dst-') 6808d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi _tmp_files.append(self.sorted_list_dst_file_name) 6818d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # Close the file handles; the file will be opened in write mode by 6828d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # _ListUrlRootFunc. 6838d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi os.close(src_fh) 6848d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi os.close(dst_fh) 6858d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 6868d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # Build sorted lists of src and dst URLs in parallel. To do this, pass args 6878d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # to _ListUrlRootFunc as tuple (base_url_str, out_filename, desc) 6888d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # where base_url_str is the starting URL string for listing. 6898d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi args_iter = iter([ 6908d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi (self.base_src_url.url_string, self.sorted_list_src_file_name, 6918d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 'source'), 6928d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi (self.base_dst_url.url_string, self.sorted_list_dst_file_name, 6938d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 'destination') 6948d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi ]) 6958d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 6968d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # Contains error message from non-retryable listing failure. 6978d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi command_obj.non_retryable_listing_failures = 0 6988d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi shared_attrs = ['non_retryable_listing_failures'] 6998d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi command_obj.Apply(_ListUrlRootFunc, args_iter, _RootListingExceptionHandler, 7008d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi shared_attrs, arg_checker=DummyArgChecker, 7018d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi parallel_operations_override=True, 7028d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi fail_on_error=True) 7038d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 7048d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if command_obj.non_retryable_listing_failures: 7058d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi raise CommandException('Caught non-retryable exception - aborting rsync') 7068d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 7078d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.sorted_list_src_file = open(self.sorted_list_src_file_name, 'r') 7088d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.sorted_list_dst_file = open(self.sorted_list_dst_file_name, 'r') 7098d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 7108d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # Wrap iterators in PluralityCheckableIterator so we can check emptiness. 7118d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.sorted_src_urls_it = PluralityCheckableIterator( 7128d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi iter(self.sorted_list_src_file)) 7138d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.sorted_dst_urls_it = PluralityCheckableIterator( 7148d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi iter(self.sorted_list_dst_file)) 7158d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 7168d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi def _ParseTmpFileLine(self, line): 7178d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """Parses output from _BuildTmpOutputLine. 7188d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 7198d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Parses into tuple: 7208d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi (URL, size, crc32c, md5) 7218d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi where crc32c and/or md5 can be _NA. 7228d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 7238d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Args: 7248d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi line: The line to parse. 7258d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 7268d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Returns: 7278d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Parsed tuple: (url, size, crc32c, md5) 7288d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """ 7298d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi (encoded_url, size, crc32c, md5) = line.split() 7308d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi return (_DecodeUrl(encoded_url), int(size), crc32c, md5.strip()) 7318d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 7328d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi def _WarnIfMissingCloudHash(self, url_str, crc32c, md5): 7338d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """Warns if given url_str is a cloud URL and is missing both crc32c and md5. 7348d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 7358d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Args: 7368d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi url_str: Destination URL string. 7378d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi crc32c: Destination CRC32c. 7388d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi md5: Destination MD5. 7398d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 7408d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Returns: 7418d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi True if issued warning. 7428d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """ 7438d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # One known way this can currently happen is when rsync'ing objects larger 7448d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # than 5 GB from S3 (for which the etag is not an MD5). 7458d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if (StorageUrlFromString(url_str).IsCloudUrl() 7468d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi and crc32c == _NA and md5 == _NA): 7478d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.logger.warn( 7488d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 'Found no hashes to validate %s. Integrity cannot be assured without ' 7498d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 'hashes.', url_str) 7508d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi return True 7518d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi return False 7528d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 7538d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi def _ObjectsMatch(self, src_url_str, src_size, src_crc32c, src_md5, 7548d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi dst_url_str, dst_size, dst_crc32c, dst_md5): 7558d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """Returns True if src and dst objects are the same. 7568d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 7578d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Uses size plus whatever checksums are available. 7588d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 7598d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Args: 7608d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi src_url_str: Source URL string. 7618d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi src_size: Source size 7628d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi src_crc32c: Source CRC32c. 7638d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi src_md5: Source MD5. 7648d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi dst_url_str: Destination URL string. 7658d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi dst_size: Destination size 7668d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi dst_crc32c: Destination CRC32c. 7678d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi dst_md5: Destination MD5. 7688d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 7698d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Returns: 7708d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi True/False. 7718d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """ 7728d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # Note: This function is called from __iter__, which is called from the 7738d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # Command.Apply driver. Thus, all checksum computation will be run in a 7748d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # single thread, which is good (having multiple threads concurrently 7758d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # computing checksums would thrash the disk). 7768d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if src_size != dst_size: 7778d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi return False 7788d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if self.compute_file_checksums: 7798d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi (src_crc32c, src_md5, dst_crc32c, dst_md5) = _ComputeNeededFileChecksums( 7808d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.logger, src_url_str, src_size, src_crc32c, src_md5, dst_url_str, 7818d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi dst_size, dst_crc32c, dst_md5) 7828d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if src_md5 != _NA and dst_md5 != _NA: 7838d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.logger.debug('Comparing md5 for %s and %s', src_url_str, dst_url_str) 7848d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi return src_md5 == dst_md5 7858d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if src_crc32c != _NA and dst_crc32c != _NA: 7868d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.logger.debug( 7878d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 'Comparing crc32c for %s and %s', src_url_str, dst_url_str) 7888d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi return src_crc32c == dst_crc32c 7898d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if not self._WarnIfMissingCloudHash(src_url_str, src_crc32c, src_md5): 7908d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self._WarnIfMissingCloudHash(dst_url_str, dst_crc32c, dst_md5) 7918d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # Without checksums to compare we depend only on basic size comparison. 7928d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi return True 7938d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 7948d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi def __iter__(self): 7958d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """Iterates over src/dst URLs and produces a _DiffToApply sequence. 7968d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 7978d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Yields: 7988d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi The _DiffToApply. 7998d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """ 8008d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # Strip trailing slashes, if any, so we compute tail length against 8018d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # consistent position regardless of whether trailing slashes were included 8028d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # or not in URL. 8038d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi base_src_url_len = len(self.base_src_url.url_string.rstrip('/\\')) 8048d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi base_dst_url_len = len(self.base_dst_url.url_string.rstrip('/\\')) 8058d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi src_url_str = dst_url_str = None 8068d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # Invariant: After each yield, the URLs in src_url_str, dst_url_str, 8078d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # self.sorted_src_urls_it, and self.sorted_dst_urls_it are not yet 8088d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # processed. Each time we encounter None in src_url_str or dst_url_str we 8098d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # populate from the respective iterator, and we reset one or the other value 8108d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # to None after yielding an action that disposes of that URL. 8118d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi while not self.sorted_src_urls_it.IsEmpty() or src_url_str is not None: 8128d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if src_url_str is None: 8138d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi (src_url_str, src_size, src_crc32c, src_md5) = self._ParseTmpFileLine( 8148d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.sorted_src_urls_it.next()) 8158d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # Skip past base URL and normalize slashes so we can compare across 8168d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # clouds/file systems (including Windows). 8178d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi src_url_str_to_check = _EncodeUrl( 8188d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi src_url_str[base_src_url_len:].replace('\\', '/')) 8198d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi dst_url_str_would_copy_to = copy_helper.ConstructDstUrl( 8208d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.base_src_url, StorageUrlFromString(src_url_str), True, True, 8218d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.base_dst_url, False, self.recursion_requested).url_string 8228d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if self.sorted_dst_urls_it.IsEmpty(): 8238d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # We've reached end of dst URLs, so copy src to dst. 8248d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi yield _DiffToApply( 8258d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi src_url_str, dst_url_str_would_copy_to, _DiffAction.COPY) 8268d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi src_url_str = None 8278d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi continue 8288d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if not dst_url_str: 8298d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi (dst_url_str, dst_size, dst_crc32c, dst_md5) = ( 8308d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self._ParseTmpFileLine(self.sorted_dst_urls_it.next())) 8318d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # Skip past base URL and normalize slashes so we can compare acros 8328d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # clouds/file systems (including Windows). 8338d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi dst_url_str_to_check = _EncodeUrl( 8348d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi dst_url_str[base_dst_url_len:].replace('\\', '/')) 8358d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 8368d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if src_url_str_to_check < dst_url_str_to_check: 8378d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # There's no dst object corresponding to src object, so copy src to dst. 8388d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi yield _DiffToApply( 8398d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi src_url_str, dst_url_str_would_copy_to, _DiffAction.COPY) 8408d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi src_url_str = None 8418d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi elif src_url_str_to_check > dst_url_str_to_check: 8428d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # dst object without a corresponding src object, so remove dst if -d 8438d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # option was specified. 8448d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if self.delete_extras: 8458d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi yield _DiffToApply(None, dst_url_str, _DiffAction.REMOVE) 8468d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi dst_url_str = None 8478d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi else: 8488d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # There is a dst object corresponding to src object, so check if objects 8498d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # match. 8508d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if self._ObjectsMatch( 8518d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi src_url_str, src_size, src_crc32c, src_md5, 8528d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi dst_url_str, dst_size, dst_crc32c, dst_md5): 8538d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # Continue iterating without yielding a _DiffToApply. 8548d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi pass 8558d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi else: 8568d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi yield _DiffToApply(src_url_str, dst_url_str, _DiffAction.COPY) 8578d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi src_url_str = None 8588d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi dst_url_str = None 8598d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 8608d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # If -d option specified any files/objects left in dst iteration should be 8618d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # removed. 8628d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if not self.delete_extras: 8638d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi return 8648d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if dst_url_str: 8658d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi yield _DiffToApply(None, dst_url_str, _DiffAction.REMOVE) 8668d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi dst_url_str = None 8678d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi for line in self.sorted_dst_urls_it: 8688d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi (dst_url_str, _, _, _) = self._ParseTmpFileLine(line) 8698d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi yield _DiffToApply(None, dst_url_str, _DiffAction.REMOVE) 8708d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 8718d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 8728d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoidef _RsyncFunc(cls, diff_to_apply, thread_state=None): 8738d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """Worker function for performing the actual copy and remove operations.""" 8748d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi gsutil_api = GetCloudApiInstance(cls, thread_state=thread_state) 8758d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi dst_url_str = diff_to_apply.dst_url_str 8768d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi dst_url = StorageUrlFromString(dst_url_str) 8778d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if diff_to_apply.diff_action == _DiffAction.REMOVE: 8788d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if cls.dryrun: 8798d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi cls.logger.info('Would remove %s', dst_url) 8808d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi else: 8818d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi cls.logger.info('Removing %s', dst_url) 8828d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if dst_url.IsFileUrl(): 8838d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi os.unlink(dst_url.object_name) 8848d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi else: 8858d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi try: 8868d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi gsutil_api.DeleteObject( 8878d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi dst_url.bucket_name, dst_url.object_name, 8888d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi generation=dst_url.generation, provider=dst_url.scheme) 8898d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi except NotFoundException: 8908d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # If the object happened to be deleted by an external process, this 8918d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # is fine because it moves us closer to the desired state. 8928d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi pass 8938d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi elif diff_to_apply.diff_action == _DiffAction.COPY: 8948d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi src_url_str = diff_to_apply.src_url_str 8958d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi src_url = StorageUrlFromString(src_url_str) 8968d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if cls.dryrun: 8978d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi cls.logger.info('Would copy %s to %s', src_url, dst_url) 8988d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi else: 8998d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi try: 9008d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi copy_helper.PerformCopy(cls.logger, src_url, dst_url, gsutil_api, cls, 9018d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi _RsyncExceptionHandler, 9028d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi headers=cls.headers) 9038d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi except SkipUnsupportedObjectError, e: 9048d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi cls.logger.info('Skipping item %s with unsupported object type %s', 9058d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi src_url, e.unsupported_type) 9068d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 9078d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi else: 9088d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi raise CommandException('Got unexpected DiffAction (%d)' 9098d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi % diff_to_apply.diff_action) 9108d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 9118d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 9128d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoidef _RootListingExceptionHandler(cls, e): 9138d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """Simple exception handler for exceptions during listing URLs to sync.""" 9148d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi cls.logger.error(str(e)) 9158d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 9168d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 9178d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoidef _RsyncExceptionHandler(cls, e): 9188d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """Simple exception handler to allow post-completion status.""" 9198d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi cls.logger.error(str(e)) 9208d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi cls.op_failure_count += 1 9218d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi cls.logger.debug('\n\nEncountered exception while syncing:\n%s\n', 9228d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi traceback.format_exc()) 9238d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 9248d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 9258d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoiclass RsyncCommand(Command): 9268d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """Implementation of gsutil rsync command.""" 9278d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 9288d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # Command specification. See base class for documentation. 9298d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi command_spec = Command.CreateCommandSpec( 9308d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 'rsync', 9318d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi command_name_aliases=[], 9328d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi usage_synopsis=_SYNOPSIS, 9338d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi min_args=2, 9348d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi max_args=2, 9358d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi supported_sub_args='cCdenprRUx:', 9368d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi file_url_ok=True, 9378d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi provider_url_ok=False, 9388d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi urls_start_arg=0, 9398d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi gs_api_support=[ApiSelector.XML, ApiSelector.JSON], 9408d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi gs_default_api=ApiSelector.JSON, 9418d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi argparse_arguments=[ 9428d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi CommandArgument.MakeNCloudOrFileURLsArgument(2) 9438d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi ] 9448d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi ) 9458d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # Help specification. See help_provider.py for documentation. 9468d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi help_spec = Command.HelpSpec( 9478d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi help_name='rsync', 9488d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi help_name_aliases=['sync', 'synchronize'], 9498d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi help_type='command_help', 9508d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi help_one_line_summary='Synchronize content of two buckets/directories', 9518d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi help_text=_DETAILED_HELP_TEXT, 9528d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi subcommand_help_text={}, 9538d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi ) 9548d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi total_bytes_transferred = 0 9558d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 9568d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi def _InsistContainer(self, url_str, treat_nonexistent_object_as_subdir): 9578d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """Sanity checks that URL names an existing container. 9588d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 9598d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Args: 9608d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi url_str: URL string to check. 9618d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi treat_nonexistent_object_as_subdir: indicates if should treat a 9628d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi non-existent object as a subdir. 9638d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 9648d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Returns: 9658d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi URL for checked string. 9668d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 9678d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi Raises: 9688d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi CommandException if url_str doesn't name an existing container. 9698d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """ 9708d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi (url, have_existing_container) = ( 9718d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi copy_helper.ExpandUrlToSingleBlr(url_str, self.gsutil_api, self.debug, 9728d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.project_id, 9738d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi treat_nonexistent_object_as_subdir)) 9748d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if not have_existing_container: 9758d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi raise CommandException( 9768d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 'arg (%s) does not name a directory, bucket, or bucket subdir.' 9778d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi % url_str) 9788d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi return url 9798d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 9808d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi def RunCommand(self): 9818d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi """Command entry point for the rsync command.""" 9828d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self._ParseOpts() 9838d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if self.compute_file_checksums and not UsingCrcmodExtension(crcmod): 9848d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.logger.warn(SLOW_CRCMOD_WARNING) 9858d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 9868d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi src_url = self._InsistContainer(self.args[0], False) 9878d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi dst_url = self._InsistContainer(self.args[1], True) 9888d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 9898d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # Tracks if any copy or rm operations failed. 9908d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.op_failure_count = 0 9918d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 9928d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # List of attributes to share/manage across multiple processes in 9938d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # parallel (-m) mode. 9948d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi shared_attrs = ['op_failure_count'] 9958d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 9968d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi for signal_num in GetCaughtSignals(): 9978d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi RegisterSignalHandler(signal_num, _HandleSignals) 9988d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 9998d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # Perform sync requests in parallel (-m) mode, if requested, using 10008d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # configured number of parallel processes and threads. Otherwise, 10018d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # perform requests with sequential function calls in current process. 10028d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi diff_iterator = _DiffIterator(self, src_url, dst_url) 10038d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.logger.info('Starting synchronization') 10048d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi try: 10058d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.Apply(_RsyncFunc, diff_iterator, _RsyncExceptionHandler, 10068d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi shared_attrs, arg_checker=_DiffToApplyArgChecker, 10078d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi fail_on_error=True) 10088d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi finally: 10098d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi CleanUpTempFiles() 10108d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 10118d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if self.op_failure_count: 10128d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi plural_str = 's' if self.op_failure_count else '' 10138d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi raise CommandException( 10148d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi '%d file%s/object%s could not be copied/removed.' % 10158d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi (self.op_failure_count, plural_str, plural_str)) 10168d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 10178d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi def _ParseOpts(self): 10188d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # exclude_symlinks is handled by Command parent class, so save in Command 10198d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # state rather than CopyHelperOpts. 10208d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.exclude_symlinks = False 10218d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # continue_on_error is handled by Command parent class, so save in Command 10228d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # state rather than CopyHelperOpts. 10238d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.continue_on_error = False 10248d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.delete_extras = False 10258d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi preserve_acl = False 10268d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.compute_file_checksums = False 10278d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.dryrun = False 10288d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.exclude_pattern = None 10298d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.skip_unsupported_objects = False 10308d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # self.recursion_requested is initialized in command.py (so it can be 10318d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # checked in parent class for all commands). 10328d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 10338d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if self.sub_opts: 10348d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi for o, a in self.sub_opts: 10358d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if o == '-c': 10368d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.compute_file_checksums = True 10378d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # Note: In gsutil cp command this is specified using -c but here we use 10388d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # -C so we can use -c for checksum arg (to be consistent with Unix rsync 10398d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # command options). 10408d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi elif o == '-C': 10418d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.continue_on_error = True 10428d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi elif o == '-d': 10438d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.delete_extras = True 10448d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi elif o == '-e': 10458d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.exclude_symlinks = True 10468d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi elif o == '-n': 10478d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.dryrun = True 10488d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi elif o == '-p': 10498d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi preserve_acl = True 10508d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi elif o == '-r' or o == '-R': 10518d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.recursion_requested = True 10528d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi elif o == '-U': 10538d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.skip_unsupported_objects = True 10548d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi elif o == '-x': 10558d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if not a: 10568d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi raise CommandException('Invalid blank exclude filter') 10578d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi try: 10588d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.exclude_pattern = re.compile(a) 10598d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi except re.error: 10608d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi raise CommandException('Invalid exclude filter (%s)' % a) 10618d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi return CreateCopyHelperOpts( 10628d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi preserve_acl=preserve_acl, 10638d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi skip_unsupported_objects=self.skip_unsupported_objects) 1064