28 from __future__
import print_function
29 from __future__
import unicode_literals
32 from os
import extsep, path, readlink, curdir
33 from subprocess
import CalledProcessError, Popen, PIPE
36 from zipfile
import ZipFile, ZipInfo, ZIP_DEFLATED
42 class GitArchiver(object):
46 Scan a git repository and export all tracked files, and submodules.
47 Checks for .gitattributes files in each directory and uses 'export-ignore'
48 pattern entries for ignore files in the archive.
50 >>> archiver = GitArchiver(main_repo_abspath='my/repo/path')
51 >>> archiver.create('output.zip')
54 LOG = logging.getLogger(
"GitArchiver")
62 main_repo_abspath=None,
65 @param prefix: Prefix used to prepend all paths in the resulting archive.
66 Extra file paths are only prefixed if they are not relative.
67 E.g. if prefix is 'foo' and extra is ['bar', '/baz'] the resulting archive
75 @param exclude: Determines whether archiver should follow rules specified in
79 @param force_sub: Determines whether submodules are initialized and updated
83 @param extra: List of extra paths to include in the resulting archive.
86 @param main_repo_abspath: Absolute path to the main repository (or one of
88 If given path is path to a subdirectory (but not a submodule directory!) it
89 will be replaced with abspath to top-level directory of the repository.
90 If None, current cwd is used.
91 @type main_repo_abspath: str
96 if main_repo_abspath
is None:
97 main_repo_abspath = path.abspath(
"")
98 elif not path.isabs(main_repo_abspath):
99 raise ValueError(
"main_repo_abspath must be an absolute path")
102 main_repo_abspath = path.abspath(
104 "git rev-parse --show-toplevel", main_repo_abspath
107 except CalledProcessError:
109 "{0} is not part of a git repository".format(main_repo_abspath)
113 self.exclude = exclude
115 self.force_sub = force_sub
116 self.main_repo_abspath = main_repo_abspath
118 def create(self, output_path, dry_run=False, output_format=None):
120 Create the archive at output_file_path.
122 Type of the archive is determined either by extension of output_file_path or by
124 Supported formats are: gz, zip, bz2, xz, tar, tgz, txz
126 @param output_path: Output file path.
127 @type output_path: str
129 @param dry_run: Determines whether create should do nothing but print what it
133 @param output_format: Determines format of the output archive. If None, format
134 is determined from extension of output_file_path.
135 @type output_format: str
137 if output_format
is None:
138 file_name, file_ext = path.splitext(output_path)
139 output_format = file_ext[len(extsep) :].lower()
141 "Output format is not explicitly set, determined format is {0}.".format(
147 if output_format ==
"zip":
148 archive = ZipFile(path.abspath(output_path),
"w")
150 def add_file(file_path, arcname):
151 if not path.islink(file_path):
152 archive.write(file_path, arcname, ZIP_DEFLATED)
156 i.external_attr = 0xA1ED0000
157 archive.writestr(i, readlink(file_path))
159 elif output_format
in [
"tar",
"bz2",
"gz",
"xz",
"tgz",
"txz"]:
160 if output_format ==
"tar":
162 elif output_format ==
"tgz":
164 elif output_format ==
"txz":
167 t_mode =
"w:{0}".format(output_format)
169 archive = tarfile.open(path.abspath(output_path), t_mode)
171 def add_file(file_path, arcname):
172 archive.add(file_path, arcname)
175 raise RuntimeError(
"unknown format: {0}".format(output_format))
177 def archiver(file_path, arcname):
178 self.LOG.debug(
"Compressing {0} => {1}...".format(file_path, arcname))
179 add_file(file_path, arcname)
184 def archiver(file_path, arcname):
185 self.LOG.
info(
"{0} => {1}".format(file_path, arcname))
187 self.archive_all_files(archiver)
189 if archive
is not None:
192 def get_exclude_patterns(self, repo_abspath, repo_file_paths):
194 Returns exclude patterns for a given repo. It looks for .gitattributes files in
197 Resulting dictionary will contain exclude patterns per path (relative to the
199 E.g. {('.', 'Catalyst', 'Editions', 'Base'): ['Foo*', '*Bar']}
201 @param repo_abspath: Absolute path to the git repository.
202 @type repo_abspath: str
204 @param repo_file_paths: List of paths relative to the repo_abspath that are
206 @type repo_file_paths: list
208 @return: Dictionary representing exclude patterns.
209 Keys are tuples of strings. Values are lists of strings.
210 Returns None if self.exclude is not set.
216 def read_attributes(attributes_abspath):
218 if path.isfile(attributes_abspath):
219 attributes =
open(attributes_abspath,
"r").readlines()
221 for line
in attributes:
222 tokens = line.strip().split()
223 if "export-ignore" in tokens[1:]:
224 patterns.append(tokens[0])
227 exclude_patterns = {(): []}
231 global_attributes_abspath = self.run_git_shell(
232 "git config --get core.attributesfile", repo_abspath
234 exclude_patterns[()] = read_attributes(global_attributes_abspath)
239 for attributes_abspath
in [
240 path.join(repo_abspath, f)
241 for f
in repo_file_paths
242 if f.endswith(
".gitattributes")
246 self.get_path_components(repo_abspath, path.dirname(attributes_abspath))
248 exclude_patterns[key] = read_attributes(attributes_abspath)
250 local_attributes_abspath = path.join(repo_abspath,
".git",
"info",
"attributes")
251 key = tuple(self.get_path_components(repo_abspath, repo_abspath))
253 if key
in exclude_patterns:
254 exclude_patterns[key].extend(read_attributes(local_attributes_abspath))
256 exclude_patterns[key] = read_attributes(local_attributes_abspath)
258 return exclude_patterns
260 def is_file_excluded(self, repo_abspath, repo_file_path, exclude_patterns):
262 Checks whether file at a given path is excluded.
264 @param repo_abspath: Absolute path to the git repository.
265 @type repo_abspath: str
267 @param repo_file_path: Path to a file within repo_abspath.
268 @type repo_file_path: str
270 @param exclude_patterns: Exclude patterns with format specified for
271 get_exclude_patterns.
272 @type exclude_patterns: dict
274 @return: True if file should be excluded. Otherwise False.
277 if exclude_patterns
is None or not len(exclude_patterns):
280 from fnmatch
import fnmatch
282 file_name = path.basename(repo_file_path)
283 components = self.get_path_components(
284 repo_abspath, path.join(repo_abspath, path.dirname(repo_file_path))
292 while not is_excluded:
293 key = tuple(components)
294 if key
in exclude_patterns:
295 patterns = exclude_patterns[key]
297 if fnmatch(file_name, p)
or fnmatch(repo_file_path, p):
299 "Exclude pattern matched {0}: {1}".format(p, repo_file_path)
303 if not len(components):
310 def archive_all_files(self, archiver):
312 Archive all files using archiver.
314 @param archiver: Callable that accepts 2 arguments:
315 abspath to file on the system and relative path within archive.
316 @type archiver: Callable
318 for file_path
in self.extra:
319 archiver(path.abspath(file_path), path.join(self.prefix, file_path))
321 for file_path
in self.walk_git_files():
323 path.join(self.main_repo_abspath, file_path),
324 path.join(self.prefix, file_path),
327 def walk_git_files(self, repo_path=""):
329 An iterator method that yields a file path relative to main_repo_abspath
330 for each file that should be included in the archive.
331 Skips those that match the exclusion patterns found in
332 any discovered .gitattributes files along the way.
334 Recurs into submodules as well.
336 @param repo_path: Path to the git submodule repository relative to
340 @return: Iterator to traverse files under git control relative to
344 repo_abspath = path.join(self.main_repo_abspath, repo_path)
345 repo_file_paths = self.run_git_shell(
346 "git ls-files --cached --full-name --no-empty-directory", repo_abspath
348 exclude_patterns = self.get_exclude_patterns(repo_abspath, repo_file_paths)
350 for repo_file_path
in repo_file_paths:
352 repo_file_path = repo_file_path.strip(
355 repo_file_abspath = path.join(
356 repo_abspath, repo_file_path
358 main_repo_file_path = path.join(
359 repo_path, repo_file_path
363 if not path.islink(repo_file_abspath)
and path.isdir(repo_file_abspath):
366 if self.is_file_excluded(repo_abspath, repo_file_path, exclude_patterns):
369 yield main_repo_file_path
372 self.run_git_shell(
"git submodule init", repo_abspath)
373 self.run_git_shell(
"git submodule update", repo_abspath)
376 repo_gitmodules_abspath = path.join(repo_abspath,
".gitmodules")
378 with open(repo_gitmodules_abspath)
as f:
379 lines = f.readlines()
382 m = re.match(
r"^\s*path\s*=\s*(.*)\s*$", line)
385 submodule_path = m.group(1)
386 submodule_abspath = path.join(repo_path, submodule_path)
388 if self.is_file_excluded(
389 repo_abspath, submodule_path, exclude_patterns
393 for submodule_file_path
in self.walk_git_files(submodule_abspath):
394 rel_file_path = submodule_file_path.replace(
397 if self.is_file_excluded(
398 repo_abspath, rel_file_path, exclude_patterns
402 yield submodule_file_path
407 def get_path_components(repo_abspath, abspath):
409 Split given abspath into components relative to repo_abspath.
410 These components are primarily used as unique keys of files and folders within a
413 E.g. if repo_abspath is '/Documents/Hobby/ParaView/' and abspath is
414 '/Documents/Hobby/ParaView/Catalyst/Editions/Base/', function will return:
415 ['.', 'Catalyst', 'Editions', 'Base']
417 First element is always os.curdir (concrete symbol depends on OS).
419 @param repo_abspath: Absolute path to the git repository. Normalized via
421 @type repo_abspath: str
423 @param abspath: Absolute path to a file within repo_abspath. Normalized via
427 @return: List of path components.
430 repo_abspath = path.normpath(repo_abspath)
431 abspath = path.normpath(abspath)
433 if not path.isabs(repo_abspath):
434 raise ValueError(
"repo_abspath MUST be absolute path.")
436 if not path.isabs(abspath):
437 raise ValueError(
"abspath MUST be absoulte path.")
439 if not path.commonprefix([repo_abspath, abspath]):
441 'abspath ("%s") MUST have common prefix with repo_abspath ("%s")'
442 % (abspath, repo_abspath)
447 while not abspath == repo_abspath:
448 abspath, tail = path.split(abspath)
451 components.insert(0, tail)
453 components.insert(0, curdir)
457 def run_git_shell(cmd, cwd=None):
459 Runs git shell command, reads output and decodes it into unicode string.
461 @param cmd: Command to be executed.
465 @param cwd: Working directory.
468 @return: Output of the command.
470 @raise CalledProcessError: Raises exception if return code of the command is
473 p = Popen(cmd, shell=
True, stdout=PIPE, cwd=cwd)
474 output, _ = p.communicate()
476 output.decode(
"unicode_escape").encode(
"raw_unicode_escape").decode(
"utf-8")
480 if sys.version_info > (2, 6):
481 raise CalledProcessError(
482 returncode=p.returncode, cmd=cmd, output=output
485 raise CalledProcessError(returncode=p.returncode, cmd=cmd)
491 from optparse
import OptionParser
493 parser = OptionParser(
494 usage=
"usage: %prog [-v] [--prefix PREFIX] [--no-exclude] [--force-submodules]"
495 " [--extra EXTRA1 [EXTRA2]] [--dry-run] OUTPUT_FILE",
496 version=
"%prog {0}".format(__version__),
504 help=
"""prepend PREFIX to each filename in the archive.
505 OUTPUT_FILE name is used by default to avoid tarbomb.
506 You can set it to '' in order to explicitly request tarbomb""",
514 help=
"enable verbose mode",
519 action=
"store_false",
522 help=
"don't read .gitattributes for patterns containing export-ignore attrib",
526 "--force-submodules",
529 help=
"force a git submodule init && git submodule update"
530 "at each level before iterating submodules",
538 help=
"any additional files to include in the archive",
545 help=
"don't actually archive anything, just show what would be done",
548 options, args = parser.parse_args()
551 parser.error(
"You must specify exactly one output file")
553 output_file_path = args[0]
555 if path.isdir(output_file_path):
556 parser.error(
"You cannot use directory as output")
559 if options.prefix
is not None:
560 options.prefix = path.join(options.prefix,
"")
564 output_name = path.basename(output_file_path)
579 "(" +
"|".join(
r"\." + e
for e
in extensions) +
")$",
585 options.prefix = path.join(output_name,
"")
588 handler = logging.StreamHandler(sys.stdout)
589 handler.setFormatter(logging.Formatter(
"%(message)s"))
590 GitArchiver.LOG.addHandler(handler)
591 GitArchiver.LOG.setLevel(logging.DEBUG
if options.verbose
else logging.INFO)
592 archiver = GitArchiver(
593 options.prefix, options.exclude, options.force_sub, options.extra
595 archiver.create(output_file_path, options.dry_run)
596 except Exception
as e:
597 parser.exit(2,
"{0}\n".format(e))
602 if __name__ ==
"__main__":