git-archive-all.py
Go to the documentation of this file.
1 #! /usr/bin/env python
2 # This file comes originally from: https://github.com/Kentzo/git-archive-all
3 #
4 # coding=utf-8
5 #
6 # The MIT License (MIT)
7 #
8 # Copyright (c) 2010 Ilya Kulakov
9 #
10 # Permission is hereby granted, free of charge, to any person obtaining a copy
11 # of this software and associated documentation files (the "Software"), to deal
12 # in the Software without restriction, including without limitation the rights
13 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 # copies of the Software, and to permit persons to whom the Software is
15 # furnished to do so, subject to the following conditions:
16 #
17 # The above copyright notice and this permission notice shall be included in
18 # all copies or substantial portions of the Software.
19 #
20 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 # THE SOFTWARE.
27 
28 from __future__ import print_function
29 from __future__ import unicode_literals
30 
31 import logging
32 from os import extsep, path, readlink, curdir
33 from subprocess import CalledProcessError, Popen, PIPE
34 import sys
35 import tarfile
36 from zipfile import ZipFile, ZipInfo, ZIP_DEFLATED
37 import re
38 
39 __version__ = "1.17"
40 
41 
42 class GitArchiver(object):
43  """
44  GitArchiver
45 
46  Scan a git repository and export all tracked files, and submodules.
47  Checks for .gitattributes files in each directory and uses 'export-ignore'
48  pattern entries for ignore files in the archive.
49 
50  >>> archiver = GitArchiver(main_repo_abspath='my/repo/path')
51  >>> archiver.create('output.zip')
52  """
53  LOG = logging.getLogger('GitArchiver')
54 
55  def __init__(self, prefix='', exclude=True, force_sub=False, extra=None, main_repo_abspath=None):
56  """
57  @param prefix: Prefix used to prepend all paths in the resulting archive.
58  Extra file paths are only prefixed if they are not relative.
59  E.g. if prefix is 'foo' and extra is ['bar', '/baz'] the resulting archive will look like this:
60  /
61  baz
62  foo/
63  bar
64  @type prefix: str
65 
66  @param exclude: Determines whether archiver should follow rules specified in .gitattributes files.
67  @type exclude: bool
68 
69  @param force_sub: Determines whether submodules are initialized and updated before archiving.
70  @type force_sub: bool
71 
72  @param extra: List of extra paths to include in the resulting archive.
73  @type extra: list
74 
75  @param main_repo_abspath: Absolute path to the main repository (or one of subdirectories).
76  If given path is path to a subdirectory (but not a submodule directory!) it will be replaced
77  with abspath to top-level directory of the repository.
78  If None, current cwd is used.
79  @type main_repo_abspath: str
80  """
81  if extra is None:
82  extra = []
83 
84  if main_repo_abspath is None:
85  main_repo_abspath = path.abspath('')
86  elif not path.isabs(main_repo_abspath):
87  raise ValueError("main_repo_abspath must be an absolute path")
88 
89  try:
90  main_repo_abspath = path.abspath(self.run_git_shell('git rev-parse --show-toplevel', main_repo_abspath).rstrip())
91  except CalledProcessError:
92  raise ValueError("{0} is not part of a git repository".format(main_repo_abspath))
93 
94  self.prefix = prefix
95  self.exclude = exclude
96  self.extra = extra
97  self.force_sub = force_sub
98  self.main_repo_abspath = main_repo_abspath
99 
100  def create(self, output_path, dry_run=False, output_format=None):
101  """
102  Create the archive at output_file_path.
103 
104  Type of the archive is determined either by extension of output_file_path or by output_format.
105  Supported formats are: gz, zip, bz2, xz, tar, tgz, txz
106 
107  @param output_path: Output file path.
108  @type output_path: str
109 
110  @param dry_run: Determines whether create should do nothing but print what it would archive.
111  @type dry_run: bool
112 
113  @param output_format: Determines format of the output archive. If None, format is determined from extension
114  of output_file_path.
115  @type output_format: str
116  """
117  if output_format is None:
118  file_name, file_ext = path.splitext(output_path)
119  output_format = file_ext[len(extsep):].lower()
120  self.LOG.debug("Output format is not explicitly set, determined format is {0}.".format(output_format))
121 
122  if not dry_run:
123  if output_format == 'zip':
124  archive = ZipFile(path.abspath(output_path), 'w')
125 
126  def add_file(file_path, arcname):
127  if not path.islink(file_path):
128  archive.write(file_path, arcname, ZIP_DEFLATED)
129  else:
130  i = ZipInfo(arcname)
131  i.create_system = 3
132  i.external_attr = 0xA1ED0000
133  archive.writestr(i, readlink(file_path))
134  elif output_format in ['tar', 'bz2', 'gz', 'xz', 'tgz', 'txz']:
135  if output_format == 'tar':
136  t_mode = 'w'
137  elif output_format == 'tgz':
138  t_mode = 'w:gz'
139  elif output_format == 'txz':
140  t_mode = 'w:xz'
141  else:
142  t_mode = 'w:{0}'.format(output_format)
143 
144  archive = tarfile.open(path.abspath(output_path), t_mode)
145 
146  def add_file(file_path, arcname):
147  archive.add(file_path, arcname)
148  else:
149  raise RuntimeError("unknown format: {0}".format(output_format))
150 
151  def archiver(file_path, arcname):
152  self.LOG.debug("Compressing {0} => {1}...".format(file_path, arcname))
153  add_file(file_path, arcname)
154  else:
155  archive = None
156 
157  def archiver(file_path, arcname):
158  self.LOG.info("{0} => {1}".format(file_path, arcname))
159 
160  self.archive_all_files(archiver)
161 
162  if archive is not None:
163  archive.close()
164 
165  def get_exclude_patterns(self, repo_abspath, repo_file_paths):
166  """
167  Returns exclude patterns for a given repo. It looks for .gitattributes files in repo_file_paths.
168 
169  Resulting dictionary will contain exclude patterns per path (relative to the repo_abspath).
170  E.g. {('.', 'Catalyst', 'Editions', 'Base'): ['Foo*', '*Bar']}
171 
172  @param repo_abspath: Absolute path to the git repository.
173  @type repo_abspath: str
174 
175  @param repo_file_paths: List of paths relative to the repo_abspath that are under git control.
176  @type repo_file_paths: list
177 
178  @return: Dictionary representing exclude patterns.
179  Keys are tuples of strings. Values are lists of strings.
180  Returns None if self.exclude is not set.
181  @rtype: dict or None
182  """
183  if not self.exclude:
184  return None
185 
186  def read_attributes(attributes_abspath):
187  patterns = []
188  if path.isfile(attributes_abspath):
189  attributes = open(attributes_abspath, 'r').readlines()
190  patterns = []
191  for line in attributes:
192  tokens = line.strip().split()
193  if "export-ignore" in tokens[1:]:
194  patterns.append(tokens[0])
195  return patterns
196 
197  exclude_patterns = {(): []}
198 
199  # There may be no gitattributes.
200  try:
201  global_attributes_abspath = self.run_git_shell("git config --get core.attributesfile", repo_abspath).rstrip()
202  exclude_patterns[()] = read_attributes(global_attributes_abspath)
203  except:
204  # And it's valid to not have them.
205  pass
206 
207  for attributes_abspath in [path.join(repo_abspath, f) for f in repo_file_paths if f.endswith(".gitattributes")]:
208  # Each .gitattributes affects only files within its directory.
209  key = tuple(self.get_path_components(repo_abspath, path.dirname(attributes_abspath)))
210  exclude_patterns[key] = read_attributes(attributes_abspath)
211 
212  local_attributes_abspath = path.join(repo_abspath, ".git", "info", "attributes")
213  key = tuple(self.get_path_components(repo_abspath, repo_abspath))
214 
215  if key in exclude_patterns:
216  exclude_patterns[key].extend(read_attributes(local_attributes_abspath))
217  else:
218  exclude_patterns[key] = read_attributes(local_attributes_abspath)
219 
220  return exclude_patterns
221 
222  def is_file_excluded(self, repo_abspath, repo_file_path, exclude_patterns):
223  """
224  Checks whether file at a given path is excluded.
225 
226  @param repo_abspath: Absolute path to the git repository.
227  @type repo_abspath: str
228 
229  @param repo_file_path: Path to a file within repo_abspath.
230  @type repo_file_path: str
231 
232  @param exclude_patterns: Exclude patterns with format specified for get_exclude_patterns.
233  @type exclude_patterns: dict
234 
235  @return: True if file should be excluded. Otherwise False.
236  @rtype: bool
237  """
238  if exclude_patterns is None or not len(exclude_patterns):
239  return False
240 
241  from fnmatch import fnmatch
242 
243  file_name = path.basename(repo_file_path)
244  components = self.get_path_components(repo_abspath, path.join(repo_abspath, path.dirname(repo_file_path)))
245 
246  is_excluded = False
247  # We should check all patterns specified in intermediate directories to the given file.
248  # At the end we should also check for the global patterns (key '()' or empty tuple).
249  while not is_excluded:
250  key = tuple(components)
251  if key in exclude_patterns:
252  patterns = exclude_patterns[key]
253  for p in patterns:
254  if fnmatch(file_name, p) or fnmatch(repo_file_path, p):
255  self.LOG.debug("Exclude pattern matched {0}: {1}".format(p, repo_file_path))
256  is_excluded = True
257 
258  if not len(components):
259  break
260 
261  components.pop()
262 
263  return is_excluded
264 
265  def archive_all_files(self, archiver):
266  """
267  Archive all files using archiver.
268 
269  @param archiver: Callable that accepts 2 arguments:
270  abspath to file on the system and relative path within archive.
271  @type archiver: Callable
272  """
273  for file_path in self.extra:
274  archiver(path.abspath(file_path), path.join(self.prefix, file_path))
275 
276  for file_path in self.walk_git_files():
277  archiver(path.join(self.main_repo_abspath, file_path), path.join(self.prefix, file_path))
278 
279  def walk_git_files(self, repo_path=''):
280  """
281  An iterator method that yields a file path relative to main_repo_abspath
282  for each file that should be included in the archive.
283  Skips those that match the exclusion patterns found in
284  any discovered .gitattributes files along the way.
285 
286  Recurs into submodules as well.
287 
288  @param repo_path: Path to the git submodule repository relative to main_repo_abspath.
289  @type repo_path: str
290 
291  @return: Iterator to traverse files under git control relative to main_repo_abspath.
292  @rtype: Iterable
293  """
294  repo_abspath = path.join(self.main_repo_abspath, repo_path)
295  repo_file_paths = self.run_git_shell(
296  "git ls-files --cached --full-name --no-empty-directory",
297  repo_abspath
298  ).splitlines()
299  exclude_patterns = self.get_exclude_patterns(repo_abspath, repo_file_paths)
300 
301  for repo_file_path in repo_file_paths:
302  # Git puts path in quotes if file path has unicode characters.
303  repo_file_path = repo_file_path.strip('"') # file path relative to current repo
304  repo_file_abspath = path.join(repo_abspath, repo_file_path) # absolute file path
305  main_repo_file_path = path.join(repo_path, repo_file_path) # file path relative to the main repo
306 
307  # Only list symlinks and files.
308  if not path.islink(repo_file_abspath) and path.isdir(repo_file_abspath):
309  continue
310 
311  if self.is_file_excluded(repo_abspath, repo_file_path, exclude_patterns):
312  continue
313 
314  yield main_repo_file_path
315 
316  if self.force_sub:
317  self.run_git_shell("git submodule init", repo_abspath)
318  self.run_git_shell("git submodule update", repo_abspath)
319 
320  try:
321  repo_gitmodules_abspath = path.join(repo_abspath, ".gitmodules")
322 
323  with open(repo_gitmodules_abspath) as f:
324  lines = f.readlines()
325 
326  for l in lines:
327  m = re.match("^\s*path\s*=\s*(.*)\s*$", l)
328 
329  if m:
330  submodule_path = m.group(1)
331  submodule_abspath = path.join(repo_path, submodule_path)
332 
333  if self.is_file_excluded(repo_abspath, submodule_path, exclude_patterns):
334  continue
335 
336  for submodule_file_path in self.walk_git_files(submodule_abspath):
337  rel_file_path = submodule_file_path.replace(repo_path, "", 1).strip("/")
338  if self.is_file_excluded(repo_abspath, rel_file_path, exclude_patterns):
339  continue
340 
341  yield submodule_file_path
342  except IOError:
343  pass
344 
345  @staticmethod
346  def get_path_components(repo_abspath, abspath):
347  """
348  Split given abspath into components relative to repo_abspath.
349  These components are primarily used as unique keys of files and folders within a repository.
350 
351  E.g. if repo_abspath is '/Documents/Hobby/ParaView/' and abspath is
352  '/Documents/Hobby/ParaView/Catalyst/Editions/Base/', function will return:
353  ['.', 'Catalyst', 'Editions', 'Base']
354 
355  First element is always os.curdir (concrete symbol depends on OS).
356 
357  @param repo_abspath: Absolute path to the git repository. Normalized via os.path.normpath.
358  @type repo_abspath: str
359 
360  @param abspath: Absolute path to a file within repo_abspath. Normalized via os.path.normpath.
361  @type abspath: str
362 
363  @return: List of path components.
364  @rtype: list
365  """
366  repo_abspath = path.normpath(repo_abspath)
367  abspath = path.normpath(abspath)
368 
369  if not path.isabs(repo_abspath):
370  raise ValueError("repo_abspath MUST be absolute path.")
371 
372  if not path.isabs(abspath):
373  raise ValueError("abspath MUST be absoulte path.")
374 
375  if not path.commonprefix([repo_abspath, abspath]):
376  raise ValueError(
377  "abspath (\"{0}\") MUST have common prefix with repo_abspath (\"{1}\")"
378  .format(abspath, repo_abspath)
379  )
380 
381  components = []
382 
383  while not abspath == repo_abspath:
384  abspath, tail = path.split(abspath)
385 
386  if tail:
387  components.insert(0, tail)
388 
389  components.insert(0, curdir)
390  return components
391 
392  @staticmethod
393  def run_git_shell(cmd, cwd=None):
394  """
395  Runs git shell command, reads output and decodes it into unicode string.
396 
397  @param cmd: Command to be executed.
398  @type cmd: str
399 
400  @type cwd: str
401  @param cwd: Working directory.
402 
403  @rtype: str
404  @return: Output of the command.
405 
406  @raise CalledProcessError: Raises exception if return code of the command is non-zero.
407  """
408  p = Popen(cmd, shell=True, stdout=PIPE, cwd=cwd)
409  output, _ = p.communicate()
410  output = output.decode('unicode_escape').encode('raw_unicode_escape').decode('utf-8')
411 
412  if p.returncode:
413  if sys.version_info > (2, 6):
414  raise CalledProcessError(returncode=p.returncode, cmd=cmd, output=output)
415  else:
416  raise CalledProcessError(returncode=p.returncode, cmd=cmd)
417 
418  return output
419 
420 
421 def main():
422  from optparse import OptionParser
423 
424  parser = OptionParser(
425  usage="usage: %prog [-v] [--prefix PREFIX] [--no-exclude] [--force-submodules]"
426  " [--extra EXTRA1 [EXTRA2]] [--dry-run] OUTPUT_FILE",
427  version="%prog {0}".format(__version__)
428  )
429 
430  parser.add_option('--prefix',
431  type='string',
432  dest='prefix',
433  default=None,
434  help="""prepend PREFIX to each filename in the archive.
435  OUTPUT_FILE name is used by default to avoid tarbomb.
436  You can set it to '' in order to explicitly request tarbomb""")
437 
438  parser.add_option('-v', '--verbose',
439  action='store_true',
440  dest='verbose',
441  help='enable verbose mode')
442 
443  parser.add_option('--no-exclude',
444  action='store_false',
445  dest='exclude',
446  default=True,
447  help="don't read .gitattributes files for patterns containing export-ignore attrib")
448 
449  parser.add_option('--force-submodules',
450  action='store_true',
451  dest='force_sub',
452  help='force a git submodule init && git submodule update at each level before iterating submodules')
453 
454  parser.add_option('--extra',
455  action='append',
456  dest='extra',
457  default=[],
458  help="any additional files to include in the archive")
459 
460  parser.add_option('--dry-run',
461  action='store_true',
462  dest='dry_run',
463  help="don't actually archive anything, just show what would be done")
464 
465  options, args = parser.parse_args()
466 
467  if len(args) != 1:
468  parser.error("You must specify exactly one output file")
469 
470  output_file_path = args[0]
471 
472  if path.isdir(output_file_path):
473  parser.error("You cannot use directory as output")
474 
475  # avoid tarbomb
476  if options.prefix is not None:
477  options.prefix = path.join(options.prefix, '')
478  else:
479  import re
480 
481  output_name = path.basename(output_file_path)
482  output_name = re.sub(
483  '(\.zip|\.tar|\.tgz|\.txz|\.gz|\.bz2|\.xz|\.tar\.gz|\.tar\.bz2|\.tar\.xz)$',
484  '',
485  output_name
486  ) or "Archive"
487  options.prefix = path.join(output_name, '')
488 
489  try:
490  handler = logging.StreamHandler(sys.stdout)
491  handler.setFormatter(logging.Formatter('%(message)s'))
492  GitArchiver.LOG.addHandler(handler)
493  GitArchiver.LOG.setLevel(logging.DEBUG if options.verbose else logging.INFO)
494  archiver = GitArchiver(options.prefix,
495  options.exclude,
496  options.force_sub,
497  options.extra)
498  archiver.create(output_file_path, options.dry_run)
499  except Exception as e:
500  parser.exit(2, "{0}\n".format(e))
501 
502  sys.exit(0)
503 
504 
505 if __name__ == '__main__':
506  main()
507 
def archive_all_files(self, archiver)
def get_exclude_patterns(self, repo_abspath, repo_file_paths)
def get_path_components(repo_abspath, abspath)
def run_git_shell(cmd, cwd=None)
def create(self, output_path, dry_run=False, output_format=None)
def is_file_excluded(self, repo_abspath, repo_file_path, exclude_patterns)
def walk_git_files(self, repo_path='')
def __init__(self, prefix='', exclude=True, force_sub=False, extra=None, main_repo_abspath=None)


eigenpy
Author(s): Justin Carpentier, Nicolas Mansard
autogenerated on Sat Apr 17 2021 02:37:59