1 # Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3 # Use of this source code is governed by a BSD-style license
4 # that can be found in the LICENSE file in the root of the source
5 # tree. An additional intellectual property rights grant can be found
6 # in the file PATENTS. All contributing project authors may
7 # be found in the AUTHORS file in the root of the source tree.
9 # This simple script pulls test files from the webm homepage
10 # It is intelligent enough to only pull files if
11 # 1) File / test_data folder does not exist
27 local_resource_path = ''
30 # A simple function which returns the sha hash of a file in hex
31 def get_file_sha(filename):
33 sha_hash = hashlib.sha1()
34 with open(filename, 'rb') as file:
35 buf = file.read(HASH_CHUNK)
38 buf = file.read(HASH_CHUNK)
39 return sha_hash.hexdigest()
41 print "Error reading " + filename
43 # Downloads a file from a url, and then checks the sha against the passed
45 def download_and_check_sha(url, filename, sha):
46 path = os.path.join(local_resource_path, filename)
49 curl.setopt(pycurl.URL, url + "/" + filename)
50 curl.setopt(pycurl.WRITEDATA, fp)
54 return get_file_sha(path) == sha
67 getopt.getopt(sys.argv[1:], \
68 "u:i:o:", ["url=", "input_csv=", "output_dir="])
70 print 'get_files.py -u <url> -i <input_csv> -o <output_dir>'
76 elif opt in ("-i", "--input_csv"):
77 file_list_path = os.path.join(arg)
78 elif opt in ("-o", "--output_dir"):
79 local_resource_path = os.path.join(arg)
81 if len(sys.argv) != 7:
82 print "Expects two paths and a url!"
85 if not os.path.isdir(local_resource_path):
86 os.makedirs(local_resource_path)
88 file_list_csv = open(file_list_path, "rb")
90 # Our 'csv' file uses multiple spaces as a delimiter, python's
91 # csv class only uses single character delimiters, so we convert them below
92 file_list_reader = csv.reader((re.sub(' +', ' ', line) \
93 for line in file_list_csv), delimiter = ' ')
98 for row in file_list_reader:
99 if len(row) != EXPECTED_COL:
101 file_shas.append(row[SHA_COL])
102 file_names.append(row[NAME_COL])
104 file_list_csv.close()
106 # Download files, only if they don't already exist and have correct shas
107 for filename, sha in itertools.izip(file_names, file_shas):
108 path = os.path.join(local_resource_path, filename)
109 if os.path.isfile(path) \
110 and get_file_sha(path) == sha:
111 print path + ' exists, skipping'
113 for retry in range(0, ftp_retries):
114 print "Downloading " + path
115 if not download_and_check_sha(url, filename, sha):
116 print "Sha does not match, retrying..."