extract.py
Go to the documentation of this file.
1 # Copyright (c) 2015, Google Inc.
2 #
3 # Permission to use, copy, modify, and/or distribute this software for any
4 # purpose with or without fee is hereby granted, provided that the above
5 # copyright notice and this permission notice appear in all copies.
6 #
7 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10 # SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12 # OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13 # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14 
15 """Extracts archives."""
16 
17 
18 import hashlib
19 import optparse
20 import os
21 import os.path
22 import tarfile
23 import shutil
24 import sys
25 import zipfile
26 
27 
28 def CheckedJoin(output, path):
29  """
30  CheckedJoin returns os.path.join(output, path). It does sanity checks to
31  ensure the resulting path is under output, but shouldn't be used on untrusted
32  input.
33  """
34  path = os.path.normpath(path)
35  if os.path.isabs(path) or path.startswith('.'):
36  raise ValueError(path)
37  return os.path.join(output, path)
38 
39 
40 class FileEntry(object):
41  def __init__(self, path, mode, fileobj):
42  self.path = path
43  self.mode = mode
44  self.fileobj = fileobj
45 
46 
47 class SymlinkEntry(object):
48  def __init__(self, path, mode, target):
49  self.path = path
50  self.mode = mode
51  self.target = target
52 
53 
54 def IterateZip(path):
55  """
56  IterateZip opens the zip file at path and returns a generator of entry objects
57  for each file in it.
58  """
59  with zipfile.ZipFile(path, 'r') as zip_file:
60  for info in zip_file.infolist():
61  if info.filename.endswith('/'):
62  continue
63  yield FileEntry(info.filename, None, zip_file.open(info))
64 
65 
66 def IterateTar(path, compression):
67  """
68  IterateTar opens the tar.gz or tar.bz2 file at path and returns a generator of
69  entry objects for each file in it.
70  """
71  with tarfile.open(path, 'r:' + compression) as tar_file:
72  for info in tar_file:
73  if info.isdir():
74  pass
75  elif info.issym():
76  yield SymlinkEntry(info.name, None, info.linkname)
77  elif info.isfile():
78  yield FileEntry(info.name, info.mode, tar_file.extractfile(info))
79  else:
80  raise ValueError('Unknown entry type "%s"' % (info.name, ))
81 
82 
83 def main(args):
84  parser = optparse.OptionParser(usage='Usage: %prog ARCHIVE OUTPUT')
85  parser.add_option('--no-prefix', dest='no_prefix', action='store_true',
86  help='Do not remove a prefix from paths in the archive.')
87  options, args = parser.parse_args(args)
88 
89  if len(args) != 2:
90  parser.print_help()
91  return 1
92 
93  archive, output = args
94 
95  if not os.path.exists(archive):
96  # Skip archives that weren't downloaded.
97  return 0
98 
99  with open(archive, 'rb') as f:
100  sha256 = hashlib.sha256()
101  while True:
102  chunk = f.read(1024 * 1024)
103  if not chunk:
104  break
105  sha256.update(chunk)
106  digest = sha256.hexdigest()
107 
108  stamp_path = os.path.join(output, ".boringssl_archive_digest")
109  if os.path.exists(stamp_path):
110  with open(stamp_path) as f:
111  if f.read().strip() == digest:
112  print("Already up-to-date.")
113  return 0
114 
115  if archive.endswith('.zip'):
116  entries = IterateZip(archive)
117  elif archive.endswith('.tar.gz'):
118  entries = IterateTar(archive, 'gz')
119  elif archive.endswith('.tar.bz2'):
120  entries = IterateTar(archive, 'bz2')
121  else:
122  raise ValueError(archive)
123 
124  try:
125  if os.path.exists(output):
126  print("Removing %s" % (output, ))
127  shutil.rmtree(output)
128 
129  print("Extracting %s to %s" % (archive, output))
130  prefix = None
131  num_extracted = 0
132  for entry in entries:
133  # Even on Windows, zip files must always use forward slashes.
134  if '\\' in entry.path or entry.path.startswith('/'):
135  raise ValueError(entry.path)
136 
137  if not options.no_prefix:
138  new_prefix, rest = entry.path.split('/', 1)
139 
140  # Ensure the archive is consistent.
141  if prefix is None:
142  prefix = new_prefix
143  if prefix != new_prefix:
144  raise ValueError((prefix, new_prefix))
145  else:
146  rest = entry.path
147 
148  # Extract the file into the output directory.
149  fixed_path = CheckedJoin(output, rest)
150  if not os.path.isdir(os.path.dirname(fixed_path)):
151  os.makedirs(os.path.dirname(fixed_path))
152  if isinstance(entry, FileEntry):
153  with open(fixed_path, 'wb') as out:
154  shutil.copyfileobj(entry.fileobj, out)
155  elif isinstance(entry, SymlinkEntry):
156  os.symlink(entry.target, fixed_path)
157  else:
158  raise TypeError('unknown entry type')
159 
160  # Fix up permissions if needbe.
161  # TODO(davidben): To be extra tidy, this should only track the execute bit
162  # as in git.
163  if entry.mode is not None:
164  os.chmod(fixed_path, entry.mode)
165 
166  # Print every 100 files, so bots do not time out on large archives.
167  num_extracted += 1
168  if num_extracted % 100 == 0:
169  print("Extracted %d files..." % (num_extracted,))
170  finally:
171  entries.close()
172 
173  with open(stamp_path, 'w') as f:
174  f.write(digest)
175 
176  print("Done. Extracted %d files." % (num_extracted,))
177  return 0
178 
179 
180 if __name__ == '__main__':
181  sys.exit(main(sys.argv[1:]))
extract.CheckedJoin
def CheckedJoin(output, path)
Definition: extract.py:28
extract.SymlinkEntry.path
path
Definition: extract.py:49
extract.FileEntry.fileobj
fileobj
Definition: extract.py:44
extract.main
def main(args)
Definition: extract.py:83
extract.SymlinkEntry
Definition: extract.py:47
main
Definition: main.py:1
extract.SymlinkEntry.target
target
Definition: extract.py:51
extract.SymlinkEntry.__init__
def __init__(self, path, mode, target)
Definition: extract.py:48
extract.FileEntry.mode
mode
Definition: extract.py:43
extract.IterateTar
def IterateTar(path, compression)
Definition: extract.py:66
extract.IterateZip
def IterateZip(path)
Definition: extract.py:54
extract.FileEntry.path
path
Definition: extract.py:42
open
#define open
Definition: test-fs.c:46
extract.FileEntry.__init__
def __init__(self, path, mode, fileobj)
Definition: extract.py:41
len
int len
Definition: abseil-cpp/absl/base/internal/low_level_alloc_test.cc:46
extract.SymlinkEntry.mode
mode
Definition: extract.py:50
extract.FileEntry
Definition: extract.py:40


grpc
Author(s):
autogenerated on Thu Mar 13 2025 02:59:18