28 from __future__
import print_function
29 from __future__
import unicode_literals
32 from os
import extsep, path, readlink, curdir
33 from subprocess
import CalledProcessError, Popen, PIPE
36 from zipfile
import ZipFile, ZipInfo, ZIP_DEFLATED
46 Scan a git repository and export all tracked files, and submodules. 47 Checks for .gitattributes files in each directory and uses 'export-ignore' 48 pattern entries for ignore files in the archive. 50 >>> archiver = GitArchiver(main_repo_abspath='my/repo/path') 51 >>> archiver.create('output.zip') 53 LOG = logging.getLogger(
'GitArchiver')
55 def __init__(self, prefix='', exclude=True, force_sub=False, extra=None, main_repo_abspath=None):
57 @param prefix: Prefix used to prepend all paths in the resulting archive. 58 Extra file paths are only prefixed if they are not relative. 59 E.g. if prefix is 'foo' and extra is ['bar', '/baz'] the resulting archive will look like this: 66 @param exclude: Determines whether archiver should follow rules specified in .gitattributes files. 69 @param force_sub: Determines whether submodules are initialized and updated before archiving. 72 @param extra: List of extra paths to include in the resulting archive. 75 @param main_repo_abspath: Absolute path to the main repository (or one of subdirectories). 76 If given path is path to a subdirectory (but not a submodule directory!) it will be replaced 77 with abspath to top-level directory of the repository. 78 If None, current cwd is used. 79 @type main_repo_abspath: str 84 if main_repo_abspath
is None:
85 main_repo_abspath = path.abspath(
'')
86 elif not path.isabs(main_repo_abspath):
87 raise ValueError(
"main_repo_abspath must be an absolute path")
90 main_repo_abspath = path.abspath(self.run_git_shell(
'git rev-parse --show-toplevel', main_repo_abspath).rstrip())
91 except CalledProcessError:
92 raise ValueError(
"{0} is not part of a git repository".format(main_repo_abspath))
95 self.exclude = exclude
97 self.force_sub = force_sub
98 self.main_repo_abspath = main_repo_abspath
100 def create(self, output_path, dry_run=False, output_format=None):
102 Create the archive at output_file_path. 104 Type of the archive is determined either by extension of output_file_path or by output_format. 105 Supported formats are: gz, zip, bz2, xz, tar, tgz, txz 107 @param output_path: Output file path. 108 @type output_path: str 110 @param dry_run: Determines whether create should do nothing but print what it would archive. 113 @param output_format: Determines format of the output archive. If None, format is determined from extension 115 @type output_format: str 117 if output_format
is None:
118 file_name, file_ext = path.splitext(output_path)
119 output_format = file_ext[len(extsep):].lower()
120 self.LOG.debug(
"Output format is not explicitly set, determined format is {0}.".format(output_format))
123 if output_format ==
'zip':
124 archive = ZipFile(path.abspath(output_path),
'w')
126 def add_file(file_path, arcname):
127 if not path.islink(file_path):
128 archive.write(file_path, arcname, ZIP_DEFLATED)
132 i.external_attr = 0xA1ED0000
133 archive.writestr(i, readlink(file_path))
134 elif output_format
in [
'tar',
'bz2',
'gz',
'xz',
'tgz',
'txz']:
135 if output_format ==
'tar':
137 elif output_format ==
'tgz':
139 elif output_format ==
'txz':
142 t_mode =
'w:{0}'.format(output_format)
144 archive = tarfile.open(path.abspath(output_path), t_mode)
146 def add_file(file_path, arcname):
147 archive.add(file_path, arcname)
149 raise RuntimeError(
"unknown format: {0}".format(output_format))
151 def archiver(file_path, arcname):
152 self.LOG.debug(
"Compressing {0} => {1}...".format(file_path, arcname))
153 add_file(file_path, arcname)
157 def archiver(file_path, arcname):
158 self.LOG.info(
"{0} => {1}".format(file_path, arcname))
160 self.archive_all_files(archiver)
162 if archive
is not None:
165 def get_exclude_patterns(self, repo_abspath, repo_file_paths):
167 Returns exclude patterns for a given repo. It looks for .gitattributes files in repo_file_paths. 169 Resulting dictionary will contain exclude patterns per path (relative to the repo_abspath). 170 E.g. {('.', 'Catalyst', 'Editions', 'Base'): ['Foo*', '*Bar']} 172 @param repo_abspath: Absolute path to the git repository. 173 @type repo_abspath: str 175 @param repo_file_paths: List of paths relative to the repo_abspath that are under git control. 176 @type repo_file_paths: list 178 @return: Dictionary representing exclude patterns. 179 Keys are tuples of strings. Values are lists of strings. 180 Returns None if self.exclude is not set. 186 def read_attributes(attributes_abspath):
188 if path.isfile(attributes_abspath):
189 attributes =
open(attributes_abspath,
'r').readlines() 191 for line
in attributes:
192 tokens = line.strip().split()
193 if "export-ignore" in tokens[1:]:
194 patterns.append(tokens[0])
197 exclude_patterns = {(): []}
201 global_attributes_abspath = self.run_git_shell(
"git config --get core.attributesfile", repo_abspath).rstrip()
202 exclude_patterns[()] = read_attributes(global_attributes_abspath)
207 for attributes_abspath
in [path.join(repo_abspath, f)
for f
in repo_file_paths
if f.endswith(
".gitattributes")]:
209 key = tuple(self.get_path_components(repo_abspath, path.dirname(attributes_abspath)))
210 exclude_patterns[key] = read_attributes(attributes_abspath)
212 local_attributes_abspath = path.join(repo_abspath,
".git",
"info",
"attributes")
213 key = tuple(self.get_path_components(repo_abspath, repo_abspath))
215 if key
in exclude_patterns:
216 exclude_patterns[key].extend(read_attributes(local_attributes_abspath))
218 exclude_patterns[key] = read_attributes(local_attributes_abspath)
220 return exclude_patterns
222 def is_file_excluded(self, repo_abspath, repo_file_path, exclude_patterns):
224 Checks whether file at a given path is excluded. 226 @param repo_abspath: Absolute path to the git repository. 227 @type repo_abspath: str 229 @param repo_file_path: Path to a file within repo_abspath. 230 @type repo_file_path: str 232 @param exclude_patterns: Exclude patterns with format specified for get_exclude_patterns. 233 @type exclude_patterns: dict 235 @return: True if file should be excluded. Otherwise False. 238 if exclude_patterns
is None or not len(exclude_patterns):
241 from fnmatch
import fnmatch
243 file_name = path.basename(repo_file_path)
244 components = self.get_path_components(repo_abspath, path.join(repo_abspath, path.dirname(repo_file_path)))
249 while not is_excluded:
250 key = tuple(components)
251 if key
in exclude_patterns:
252 patterns = exclude_patterns[key]
254 if fnmatch(file_name, p)
or fnmatch(repo_file_path, p):
255 self.LOG.debug(
"Exclude pattern matched {0}: {1}".format(p, repo_file_path))
258 if not len(components):
265 def archive_all_files(self, archiver):
267 Archive all files using archiver. 269 @param archiver: Callable that accepts 2 arguments: 270 abspath to file on the system and relative path within archive. 271 @type archiver: Callable 273 for file_path
in self.extra:
274 archiver(path.abspath(file_path), path.join(self.prefix, file_path))
276 for file_path
in self.walk_git_files():
277 archiver(path.join(self.main_repo_abspath, file_path), path.join(self.prefix, file_path))
279 def walk_git_files(self, repo_path=''):
281 An iterator method that yields a file path relative to main_repo_abspath 282 for each file that should be included in the archive. 283 Skips those that match the exclusion patterns found in 284 any discovered .gitattributes files along the way. 286 Recurs into submodules as well. 288 @param repo_path: Path to the git submodule repository relative to main_repo_abspath. 291 @return: Iterator to traverse files under git control relative to main_repo_abspath. 294 repo_abspath = path.join(self.main_repo_abspath, repo_path)
295 repo_file_paths = self.run_git_shell(
296 "git ls-files --cached --full-name --no-empty-directory",
299 exclude_patterns = self.get_exclude_patterns(repo_abspath, repo_file_paths)
301 for repo_file_path
in repo_file_paths:
303 repo_file_path = repo_file_path.strip(
'"')
304 repo_file_abspath = path.join(repo_abspath, repo_file_path)
305 main_repo_file_path = path.join(repo_path, repo_file_path)
308 if not path.islink(repo_file_abspath)
and path.isdir(repo_file_abspath):
311 if self.is_file_excluded(repo_abspath, repo_file_path, exclude_patterns):
314 yield main_repo_file_path
317 self.run_git_shell(
"git submodule init", repo_abspath)
318 self.run_git_shell(
"git submodule update", repo_abspath)
321 repo_gitmodules_abspath = path.join(repo_abspath,
".gitmodules")
323 with
open(repo_gitmodules_abspath)
as f:
324 lines = f.readlines()
327 m = re.match(
"^\s*path\s*=\s*(.*)\s*$", l)
330 submodule_path = m.group(1)
331 submodule_abspath = path.join(repo_path, submodule_path)
333 if self.is_file_excluded(repo_abspath, submodule_path, exclude_patterns):
336 for submodule_file_path
in self.walk_git_files(submodule_abspath):
337 rel_file_path = submodule_file_path.replace(repo_path,
"", 1).strip(
"/")
338 if self.is_file_excluded(repo_abspath, rel_file_path, exclude_patterns):
341 yield submodule_file_path
346 def get_path_components(repo_abspath, abspath):
348 Split given abspath into components relative to repo_abspath. 349 These components are primarily used as unique keys of files and folders within a repository. 351 E.g. if repo_abspath is '/Documents/Hobby/ParaView/' and abspath is 352 '/Documents/Hobby/ParaView/Catalyst/Editions/Base/', function will return: 353 ['.', 'Catalyst', 'Editions', 'Base'] 355 First element is always os.curdir (concrete symbol depends on OS). 357 @param repo_abspath: Absolute path to the git repository. Normalized via os.path.normpath. 358 @type repo_abspath: str 360 @param abspath: Absolute path to a file within repo_abspath. Normalized via os.path.normpath. 363 @return: List of path components. 366 repo_abspath = path.normpath(repo_abspath)
367 abspath = path.normpath(abspath)
369 if not path.isabs(repo_abspath):
370 raise ValueError(
"repo_abspath MUST be absolute path.")
372 if not path.isabs(abspath):
373 raise ValueError(
"abspath MUST be absoulte path.")
375 if not path.commonprefix([repo_abspath, abspath]):
377 "abspath (\"{0}\") MUST have common prefix with repo_abspath (\"{1}\")" 378 .format(abspath, repo_abspath)
383 while not abspath == repo_abspath:
384 abspath, tail = path.split(abspath)
387 components.insert(0, tail)
389 components.insert(0, curdir)
393 def run_git_shell(cmd, cwd=None):
395 Runs git shell command, reads output and decodes it into unicode string. 397 @param cmd: Command to be executed. 401 @param cwd: Working directory. 404 @return: Output of the command. 406 @raise CalledProcessError: Raises exception if return code of the command is non-zero. 408 p = Popen(cmd, shell=
True, stdout=PIPE, cwd=cwd)
409 output, _ = p.communicate()
410 output = output.decode(
'unicode_escape').encode(
'raw_unicode_escape').decode(
'utf-8')
413 if sys.version_info > (2, 6):
414 raise CalledProcessError(returncode=p.returncode, cmd=cmd, output=output)
416 raise CalledProcessError(returncode=p.returncode, cmd=cmd)
422 from optparse
import OptionParser
424 parser = OptionParser(
425 usage=
"usage: %prog [-v] [--prefix PREFIX] [--no-exclude] [--force-submodules]" 426 " [--extra EXTRA1 [EXTRA2]] [--dry-run] OUTPUT_FILE",
427 version=
"%prog {0}".format(__version__)
430 parser.add_option(
'--prefix',
434 help=
"""prepend PREFIX to each filename in the archive. 435 OUTPUT_FILE name is used by default to avoid tarbomb. 436 You can set it to '' in order to explicitly request tarbomb""")
438 parser.add_option(
'-v',
'--verbose',
441 help=
'enable verbose mode')
443 parser.add_option(
'--no-exclude',
444 action=
'store_false',
447 help=
"don't read .gitattributes files for patterns containing export-ignore attrib")
449 parser.add_option(
'--force-submodules',
452 help=
'force a git submodule init && git submodule update at each level before iterating submodules')
454 parser.add_option(
'--extra',
458 help=
"any additional files to include in the archive")
460 parser.add_option(
'--dry-run',
463 help=
"don't actually archive anything, just show what would be done")
465 options, args = parser.parse_args()
468 parser.error(
"You must specify exactly one output file")
470 output_file_path = args[0]
472 if path.isdir(output_file_path):
473 parser.error(
"You cannot use directory as output")
476 if options.prefix
is not None:
477 options.prefix = path.join(options.prefix,
'')
481 output_name = path.basename(output_file_path)
482 output_name = re.sub(
483 '(\.zip|\.tar|\.tgz|\.txz|\.gz|\.bz2|\.xz|\.tar\.gz|\.tar\.bz2|\.tar\.xz)$',
487 options.prefix = path.join(output_name,
'')
490 handler = logging.StreamHandler(sys.stdout)
491 handler.setFormatter(logging.Formatter(
'%(message)s'))
492 GitArchiver.LOG.addHandler(handler)
493 GitArchiver.LOG.setLevel(logging.DEBUG
if options.verbose
else logging.INFO)
498 archiver.create(output_file_path, options.dry_run)
499 except Exception
as e:
500 parser.exit(2,
"{0}\n".format(e))
505 if __name__ ==
'__main__':