git-archive-all.py
Go to the documentation of this file.
1 #! /usr/bin/env python
2 # This file comes originally from: https://github.com/Kentzo/git-archive-all
3 #
4 # coding=utf-8
5 #
6 # The MIT License (MIT)
7 #
8 # Copyright (c) 2010 Ilya Kulakov
9 #
10 # Permission is hereby granted, free of charge, to any person obtaining a copy
11 # of this software and associated documentation files (the "Software"), to deal
12 # in the Software without restriction, including without limitation the rights
13 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 # copies of the Software, and to permit persons to whom the Software is
15 # furnished to do so, subject to the following conditions:
16 #
17 # The above copyright notice and this permission notice shall be included in
18 # all copies or substantial portions of the Software.
19 #
20 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 # THE SOFTWARE.
27 
28 from __future__ import print_function
29 from __future__ import unicode_literals
30 
31 import logging
32 from os import extsep, path, readlink, curdir
33 from subprocess import CalledProcessError, Popen, PIPE
34 import sys
35 import tarfile
36 from zipfile import ZipFile, ZipInfo, ZIP_DEFLATED
37 import re
38 
39 __version__ = "1.17"
40 
41 
42 class GitArchiver(object):
43  """
44  GitArchiver
45 
46  Scan a git repository and export all tracked files, and submodules.
47  Checks for .gitattributes files in each directory and uses 'export-ignore'
48  pattern entries for ignore files in the archive.
49 
50  >>> archiver = GitArchiver(main_repo_abspath='my/repo/path')
51  >>> archiver.create('output.zip')
52  """
53 
54  LOG = logging.getLogger("GitArchiver")
55 
56  def __init__(
57  self,
58  prefix="",
59  exclude=True,
60  force_sub=False,
61  extra=None,
62  main_repo_abspath=None,
63  ):
64  """
65  @param prefix: Prefix used to prepend all paths in the resulting archive.
66  Extra file paths are only prefixed if they are not relative.
67  E.g. if prefix is 'foo' and extra is ['bar', '/baz'] the resulting archive
68  will look like this:
69  /
70  baz
71  foo/
72  bar
73  @type prefix: str
74 
75  @param exclude: Determines whether archiver should follow rules specified in
76  .gitattributes files.
77  @type exclude: bool
78 
79  @param force_sub: Determines whether submodules are initialized and updated
80  before archiving.
81  @type force_sub: bool
82 
83  @param extra: List of extra paths to include in the resulting archive.
84  @type extra: list
85 
86  @param main_repo_abspath: Absolute path to the main repository (or one of
87  subdirectories).
88  If given path is path to a subdirectory (but not a submodule directory!) it
89  will be replaced with abspath to top-level directory of the repository.
90  If None, current cwd is used.
91  @type main_repo_abspath: str
92  """
93  if extra is None:
94  extra = []
95 
96  if main_repo_abspath is None:
97  main_repo_abspath = path.abspath("")
98  elif not path.isabs(main_repo_abspath):
99  raise ValueError("main_repo_abspath must be an absolute path")
100 
101  try:
102  main_repo_abspath = path.abspath(
103  self.run_git_shell(
104  "git rev-parse --show-toplevel", main_repo_abspath
105  ).rstrip()
106  )
107  except CalledProcessError:
108  raise ValueError(
109  "{0} is not part of a git repository".format(main_repo_abspath)
110  )
111 
112  self.prefix = prefix
113  self.exclude = exclude
114  self.extra = extra
115  self.force_sub = force_sub
116  self.main_repo_abspath = main_repo_abspath
117 
118  def create(self, output_path, dry_run=False, output_format=None):
119  """
120  Create the archive at output_file_path.
121 
122  Type of the archive is determined either by extension of output_file_path or by
123  output_format.
124  Supported formats are: gz, zip, bz2, xz, tar, tgz, txz
125 
126  @param output_path: Output file path.
127  @type output_path: str
128 
129  @param dry_run: Determines whether create should do nothing but print what it
130  would archive.
131  @type dry_run: bool
132 
133  @param output_format: Determines format of the output archive. If None, format
134  is determined from extension of output_file_path.
135  @type output_format: str
136  """
137  if output_format is None:
138  file_name, file_ext = path.splitext(output_path)
139  output_format = file_ext[len(extsep) :].lower()
140  self.LOG.debug(
141  "Output format is not explicitly set, determined format is {0}.".format(
142  output_format
143  )
144  )
145 
146  if not dry_run:
147  if output_format == "zip":
148  archive = ZipFile(path.abspath(output_path), "w")
149 
150  def add_file(file_path, arcname):
151  if not path.islink(file_path):
152  archive.write(file_path, arcname, ZIP_DEFLATED)
153  else:
154  i = ZipInfo(arcname)
155  i.create_system = 3
156  i.external_attr = 0xA1ED0000
157  archive.writestr(i, readlink(file_path))
158 
159  elif output_format in ["tar", "bz2", "gz", "xz", "tgz", "txz"]:
160  if output_format == "tar":
161  t_mode = "w"
162  elif output_format == "tgz":
163  t_mode = "w:gz"
164  elif output_format == "txz":
165  t_mode = "w:xz"
166  else:
167  t_mode = "w:{0}".format(output_format)
168 
169  archive = tarfile.open(path.abspath(output_path), t_mode)
170 
171  def add_file(file_path, arcname):
172  archive.add(file_path, arcname)
173 
174  else:
175  raise RuntimeError("unknown format: {0}".format(output_format))
176 
177  def archiver(file_path, arcname):
178  self.LOG.debug("Compressing {0} => {1}...".format(file_path, arcname))
179  add_file(file_path, arcname)
180 
181  else:
182  archive = None
183 
184  def archiver(file_path, arcname):
185  self.LOG.info("{0} => {1}".format(file_path, arcname))
186 
187  self.archive_all_files(archiver)
188 
189  if archive is not None:
190  archive.close()
191 
192  def get_exclude_patterns(self, repo_abspath, repo_file_paths):
193  """
194  Returns exclude patterns for a given repo. It looks for .gitattributes files in
195  repo_file_paths.
196 
197  Resulting dictionary will contain exclude patterns per path (relative to the
198  repo_abspath).
199  E.g. {('.', 'Catalyst', 'Editions', 'Base'): ['Foo*', '*Bar']}
200 
201  @param repo_abspath: Absolute path to the git repository.
202  @type repo_abspath: str
203 
204  @param repo_file_paths: List of paths relative to the repo_abspath that are
205  under git control.
206  @type repo_file_paths: list
207 
208  @return: Dictionary representing exclude patterns.
209  Keys are tuples of strings. Values are lists of strings.
210  Returns None if self.exclude is not set.
211  @rtype: dict or None
212  """
213  if not self.exclude:
214  return None
215 
216  def read_attributes(attributes_abspath):
217  patterns = []
218  if path.isfile(attributes_abspath):
219  attributes = open(attributes_abspath, "r").readlines()
220  patterns = []
221  for line in attributes:
222  tokens = line.strip().split()
223  if "export-ignore" in tokens[1:]:
224  patterns.append(tokens[0])
225  return patterns
226 
227  exclude_patterns = {(): []}
228 
229  # There may be no gitattributes.
230  try:
231  global_attributes_abspath = self.run_git_shell(
232  "git config --get core.attributesfile", repo_abspath
233  ).rstrip()
234  exclude_patterns[()] = read_attributes(global_attributes_abspath)
235  except Exception:
236  # And it's valid to not have them.
237  pass
238 
239  for attributes_abspath in [
240  path.join(repo_abspath, f)
241  for f in repo_file_paths
242  if f.endswith(".gitattributes")
243  ]:
244  # Each .gitattributes affects only files within its directory.
245  key = tuple(
246  self.get_path_components(repo_abspath, path.dirname(attributes_abspath))
247  )
248  exclude_patterns[key] = read_attributes(attributes_abspath)
249 
250  local_attributes_abspath = path.join(repo_abspath, ".git", "info", "attributes")
251  key = tuple(self.get_path_components(repo_abspath, repo_abspath))
252 
253  if key in exclude_patterns:
254  exclude_patterns[key].extend(read_attributes(local_attributes_abspath))
255  else:
256  exclude_patterns[key] = read_attributes(local_attributes_abspath)
257 
258  return exclude_patterns
259 
260  def is_file_excluded(self, repo_abspath, repo_file_path, exclude_patterns):
261  """
262  Checks whether file at a given path is excluded.
263 
264  @param repo_abspath: Absolute path to the git repository.
265  @type repo_abspath: str
266 
267  @param repo_file_path: Path to a file within repo_abspath.
268  @type repo_file_path: str
269 
270  @param exclude_patterns: Exclude patterns with format specified for
271  get_exclude_patterns.
272  @type exclude_patterns: dict
273 
274  @return: True if file should be excluded. Otherwise False.
275  @rtype: bool
276  """
277  if exclude_patterns is None or not len(exclude_patterns):
278  return False
279 
280  from fnmatch import fnmatch
281 
282  file_name = path.basename(repo_file_path)
283  components = self.get_path_components(
284  repo_abspath, path.join(repo_abspath, path.dirname(repo_file_path))
285  )
286 
287  is_excluded = False
288  # We should check all patterns specified in intermediate directories to the
289  # given file.
290  # At the end we should also check for the global patterns (key '()' or empty
291  # tuple).
292  while not is_excluded:
293  key = tuple(components)
294  if key in exclude_patterns:
295  patterns = exclude_patterns[key]
296  for p in patterns:
297  if fnmatch(file_name, p) or fnmatch(repo_file_path, p):
298  self.LOG.debug(
299  "Exclude pattern matched {0}: {1}".format(p, repo_file_path)
300  )
301  is_excluded = True
302 
303  if not len(components):
304  break
305 
306  components.pop()
307 
308  return is_excluded
309 
310  def archive_all_files(self, archiver):
311  """
312  Archive all files using archiver.
313 
314  @param archiver: Callable that accepts 2 arguments:
315  abspath to file on the system and relative path within archive.
316  @type archiver: Callable
317  """
318  for file_path in self.extra:
319  archiver(path.abspath(file_path), path.join(self.prefix, file_path))
320 
321  for file_path in self.walk_git_files():
322  archiver(
323  path.join(self.main_repo_abspath, file_path),
324  path.join(self.prefix, file_path),
325  )
326 
327  def walk_git_files(self, repo_path=""):
328  """
329  An iterator method that yields a file path relative to main_repo_abspath
330  for each file that should be included in the archive.
331  Skips those that match the exclusion patterns found in
332  any discovered .gitattributes files along the way.
333 
334  Recurs into submodules as well.
335 
336  @param repo_path: Path to the git submodule repository relative to
337  main_repo_abspath.
338  @type repo_path: str
339 
340  @return: Iterator to traverse files under git control relative to
341  main_repo_abspath.
342  @rtype: Iterable
343  """
344  repo_abspath = path.join(self.main_repo_abspath, repo_path)
345  repo_file_paths = self.run_git_shell(
346  "git ls-files --cached --full-name --no-empty-directory", repo_abspath
347  ).splitlines()
348  exclude_patterns = self.get_exclude_patterns(repo_abspath, repo_file_paths)
349 
350  for repo_file_path in repo_file_paths:
351  # Git puts path in quotes if file path has unicode characters.
352  repo_file_path = repo_file_path.strip(
353  '"'
354  ) # file path relative to current repo
355  repo_file_abspath = path.join(
356  repo_abspath, repo_file_path
357  ) # absolute file path
358  main_repo_file_path = path.join(
359  repo_path, repo_file_path
360  ) # file path relative to the main repo
361 
362  # Only list symlinks and files.
363  if not path.islink(repo_file_abspath) and path.isdir(repo_file_abspath):
364  continue
365 
366  if self.is_file_excluded(repo_abspath, repo_file_path, exclude_patterns):
367  continue
368 
369  yield main_repo_file_path
370 
371  if self.force_sub:
372  self.run_git_shell("git submodule init", repo_abspath)
373  self.run_git_shell("git submodule update", repo_abspath)
374 
375  try:
376  repo_gitmodules_abspath = path.join(repo_abspath, ".gitmodules")
377 
378  with open(repo_gitmodules_abspath) as f:
379  lines = f.readlines()
380 
381  for line in lines:
382  m = re.match(r"^\s*path\s*=\s*(.*)\s*$", line)
383 
384  if m:
385  submodule_path = m.group(1)
386  submodule_abspath = path.join(repo_path, submodule_path)
387 
388  if self.is_file_excluded(
389  repo_abspath, submodule_path, exclude_patterns
390  ):
391  continue
392 
393  for submodule_file_path in self.walk_git_files(submodule_abspath):
394  rel_file_path = submodule_file_path.replace(
395  repo_path, "", 1
396  ).strip("/")
397  if self.is_file_excluded(
398  repo_abspath, rel_file_path, exclude_patterns
399  ):
400  continue
401 
402  yield submodule_file_path
403  except IOError:
404  pass
405 
406  @staticmethod
407  def get_path_components(repo_abspath, abspath):
408  """
409  Split given abspath into components relative to repo_abspath.
410  These components are primarily used as unique keys of files and folders within a
411  repository.
412 
413  E.g. if repo_abspath is '/Documents/Hobby/ParaView/' and abspath is
414  '/Documents/Hobby/ParaView/Catalyst/Editions/Base/', function will return:
415  ['.', 'Catalyst', 'Editions', 'Base']
416 
417  First element is always os.curdir (concrete symbol depends on OS).
418 
419  @param repo_abspath: Absolute path to the git repository. Normalized via
420  os.path.normpath.
421  @type repo_abspath: str
422 
423  @param abspath: Absolute path to a file within repo_abspath. Normalized via
424  os.path.normpath.
425  @type abspath: str
426 
427  @return: List of path components.
428  @rtype: list
429  """
430  repo_abspath = path.normpath(repo_abspath)
431  abspath = path.normpath(abspath)
432 
433  if not path.isabs(repo_abspath):
434  raise ValueError("repo_abspath MUST be absolute path.")
435 
436  if not path.isabs(abspath):
437  raise ValueError("abspath MUST be absoulte path.")
438 
439  if not path.commonprefix([repo_abspath, abspath]):
440  raise ValueError(
441  'abspath ("%s") MUST have common prefix with repo_abspath ("%s")'
442  % (abspath, repo_abspath)
443  )
444 
445  components = []
446 
447  while not abspath == repo_abspath:
448  abspath, tail = path.split(abspath)
449 
450  if tail:
451  components.insert(0, tail)
452 
453  components.insert(0, curdir)
454  return components
455 
456  @staticmethod
457  def run_git_shell(cmd, cwd=None):
458  """
459  Runs git shell command, reads output and decodes it into unicode string.
460 
461  @param cmd: Command to be executed.
462  @type cmd: str
463 
464  @type cwd: str
465  @param cwd: Working directory.
466 
467  @rtype: str
468  @return: Output of the command.
469 
470  @raise CalledProcessError: Raises exception if return code of the command is
471  non-zero.
472  """
473  p = Popen(cmd, shell=True, stdout=PIPE, cwd=cwd)
474  output, _ = p.communicate()
475  output = (
476  output.decode("unicode_escape").encode("raw_unicode_escape").decode("utf-8")
477  )
478 
479  if p.returncode:
480  if sys.version_info > (2, 6):
481  raise CalledProcessError(
482  returncode=p.returncode, cmd=cmd, output=output
483  )
484  else:
485  raise CalledProcessError(returncode=p.returncode, cmd=cmd)
486 
487  return output
488 
489 
490 def main():
491  from optparse import OptionParser
492 
493  parser = OptionParser(
494  usage="usage: %prog [-v] [--prefix PREFIX] [--no-exclude] [--force-submodules]"
495  " [--extra EXTRA1 [EXTRA2]] [--dry-run] OUTPUT_FILE",
496  version="%prog {0}".format(__version__),
497  )
498 
499  parser.add_option(
500  "--prefix",
501  type="string",
502  dest="prefix",
503  default=None,
504  help="""prepend PREFIX to each filename in the archive.
505  OUTPUT_FILE name is used by default to avoid tarbomb.
506  You can set it to '' in order to explicitly request tarbomb""",
507  )
508 
509  parser.add_option(
510  "-v",
511  "--verbose",
512  action="store_true",
513  dest="verbose",
514  help="enable verbose mode",
515  )
516 
517  parser.add_option(
518  "--no-exclude",
519  action="store_false",
520  dest="exclude",
521  default=True,
522  help="don't read .gitattributes for patterns containing export-ignore attrib",
523  )
524 
525  parser.add_option(
526  "--force-submodules",
527  action="store_true",
528  dest="force_sub",
529  help="force a git submodule init && git submodule update"
530  "at each level before iterating submodules",
531  )
532 
533  parser.add_option(
534  "--extra",
535  action="append",
536  dest="extra",
537  default=[],
538  help="any additional files to include in the archive",
539  )
540 
541  parser.add_option(
542  "--dry-run",
543  action="store_true",
544  dest="dry_run",
545  help="don't actually archive anything, just show what would be done",
546  )
547 
548  options, args = parser.parse_args()
549 
550  if len(args) != 1:
551  parser.error("You must specify exactly one output file")
552 
553  output_file_path = args[0]
554 
555  if path.isdir(output_file_path):
556  parser.error("You cannot use directory as output")
557 
558  # avoid tarbomb
559  if options.prefix is not None:
560  options.prefix = path.join(options.prefix, "")
561  else:
562  import re
563 
564  output_name = path.basename(output_file_path)
565  extensions = [
566  "zip",
567  "tar",
568  "tgz",
569  "txz",
570  "gz",
571  "bz2",
572  "xz",
573  r"tar\.gz",
574  r"tar\.bz2",
575  r"tar\.xz",
576  ]
577  output_name = (
578  re.sub(
579  "(" + "|".join(r"\." + e for e in extensions) + ")$",
580  "",
581  output_name,
582  )
583  or "Archive"
584  )
585  options.prefix = path.join(output_name, "")
586 
587  try:
588  handler = logging.StreamHandler(sys.stdout)
589  handler.setFormatter(logging.Formatter("%(message)s"))
590  GitArchiver.LOG.addHandler(handler)
591  GitArchiver.LOG.setLevel(logging.DEBUG if options.verbose else logging.INFO)
592  archiver = GitArchiver(
593  options.prefix, options.exclude, options.force_sub, options.extra
594  )
595  archiver.create(output_file_path, options.dry_run)
596  except Exception as e:
597  parser.exit(2, "{0}\n".format(e))
598 
599  sys.exit(0)
600 
601 
602 if __name__ == "__main__":
603  main()
git-archive-all::main
def main()
Definition: git-archive-all.py:490
git-archive-all::GitArchiver
Definition: git-archive-all.py:42


dynamic-graph-python
Author(s): Nicolas Mansard, Olivier Stasse
autogenerated on Fri Oct 27 2023 02:16:36