granicus.if.org Git - libvpx/blob - test/android/get_files.py

   1 # Copyright (c) 2013 The WebM project authors. All Rights Reserved.
   2 #
   3 # Use of this source code is governed by a BSD-style license
   4 # that can be found in the LICENSE file in the root of the source
   5 # tree. An additional intellectual property rights grant can be found
   6 # in the file PATENTS.  All contributing project authors may
   7 # be found in the AUTHORS file in the root of the source tree.
   8 #
   9 # This simple script pulls test files from the webm homepage
  10 # It is intelligent enough to only pull files if
  11 #   1) File / test_data folder does not exist
  12 #   2) SHA mismatch
  13
  14 import pycurl
  15 import csv
  16 import hashlib
  17 import re
  18 import os.path
  19 import time
  20 import itertools
  21 import sys
  22 import getopt
  23
  24 #globals
  25 url = ''
  26 file_list_path = ''
  27 local_resource_path = ''
  28
  29 # Helper functions:
  30 # A simple function which returns the sha hash of a file in hex
  31 def get_file_sha(filename):
  32   try:
  33     sha_hash = hashlib.sha1()
  34     with open(filename, 'rb') as file:
  35       buf = file.read(HASH_CHUNK)
  36       while len(buf) > 0:
  37         sha_hash.update(buf)
  38         buf = file.read(HASH_CHUNK)
  39       return sha_hash.hexdigest()
  40   except IOError:
  41     print "Error reading " + filename
  42
  43 # Downloads a file from a url, and then checks the sha against the passed
  44 # in sha
  45 def download_and_check_sha(url, filename, sha):
  46   path = os.path.join(local_resource_path, filename)
  47   fp = open(path, "wb")
  48   curl = pycurl.Curl()
  49   curl.setopt(pycurl.URL, url + "/" + filename)
  50   curl.setopt(pycurl.WRITEDATA, fp)
  51   curl.perform()
  52   curl.close()
  53   fp.close()
  54   return get_file_sha(path) == sha
  55
  56 #constants
  57 ftp_retries = 3
  58
  59 SHA_COL = 0
  60 NAME_COL = 1
  61 EXPECTED_COL = 2
  62 HASH_CHUNK = 65536
  63
  64 # Main script
  65 try:
  66   opts, args = \
  67       getopt.getopt(sys.argv[1:], \
  68                     "u:i:o:", ["url=", "input_csv=", "output_dir="])
  69 except:
  70   print 'get_files.py -u <url> -i <input_csv> -o <output_dir>'
  71   sys.exit(2)
  72
  73 for opt, arg in opts:
  74   if opt == '-u':
  75     url = arg
  76   elif opt in ("-i", "--input_csv"):
  77     file_list_path = os.path.join(arg)
  78   elif opt in ("-o", "--output_dir"):
  79     local_resource_path = os.path.join(arg)
  80
  81 if len(sys.argv) != 7:
  82   print "Expects two paths and a url!"
  83   exit(1)
  84
  85 if not os.path.isdir(local_resource_path):
  86   os.makedirs(local_resource_path)
  87
  88 file_list_csv = open(file_list_path, "rb")
  89
  90 # Our 'csv' file uses multiple spaces as a delimiter, python's
  91 # csv class only uses single character delimiters, so we convert them below
  92 file_list_reader = csv.reader((re.sub(' +', ' ', line) \
  93     for line in file_list_csv), delimiter = ' ')
  94
  95 file_shas = []
  96 file_names = []
  97
  98 for row in file_list_reader:
  99   if len(row) != EXPECTED_COL:
 100       continue
 101   file_shas.append(row[SHA_COL])
 102   file_names.append(row[NAME_COL])
 103
 104 file_list_csv.close()
 105
 106 # Download files, only if they don't already exist and have correct shas
 107 for filename, sha in itertools.izip(file_names, file_shas):
 108   path = os.path.join(local_resource_path, filename)
 109   if os.path.isfile(path) \
 110       and get_file_sha(path) == sha:
 111     print path + ' exists, skipping'
 112     continue
 113   for retry in range(0, ftp_retries):
 114     print "Downloading " + path
 115     if not download_and_check_sha(url, filename, sha):
 116       print "Sha does not match, retrying..."
 117     else:
 118       break