28 from __future__ 
import print_function
 
   29 from __future__ 
import unicode_literals
 
   32 from os 
import extsep, path, readlink, curdir
 
   33 from subprocess 
import CalledProcessError, Popen, PIPE
 
   36 from zipfile 
import ZipFile, ZipInfo, ZIP_DEFLATED
 
   42 class GitArchiver(object):
 
   46     Scan a git repository and export all tracked files, and submodules. 
   47     Checks for .gitattributes files in each directory and uses 'export-ignore' 
   48     pattern entries for ignore files in the archive. 
   50     >>> archiver = GitArchiver(main_repo_abspath='my/repo/path') 
   51     >>> archiver.create('output.zip') 
   54     LOG = logging.getLogger(
"GitArchiver")
 
   62         main_repo_abspath=None,
 
   65         @param prefix: Prefix used to prepend all paths in the resulting archive. 
   66             Extra file paths are only prefixed if they are not relative. 
   67             E.g. if prefix is 'foo' and extra is ['bar', '/baz'] the resulting archive 
   75         @param exclude: Determines whether archiver should follow rules specified in 
   79         @param force_sub: Determines whether submodules are initialized and updated 
   83         @param extra: List of extra paths to include in the resulting archive. 
   86         @param main_repo_abspath: Absolute path to the main repository (or one of 
   88             If given path is path to a subdirectory (but not a submodule directory!) it 
   89             will be replaced with abspath to top-level directory of the repository. 
   90             If None, current cwd is used. 
   91         @type main_repo_abspath: str 
   96         if main_repo_abspath 
is None:
 
   97             main_repo_abspath = path.abspath(
"")
 
   98         elif not path.isabs(main_repo_abspath):
 
   99             raise ValueError(
"main_repo_abspath must be an absolute path")
 
  102             main_repo_abspath = path.abspath(
 
  104                     "git rev-parse --show-toplevel", main_repo_abspath
 
  107         except CalledProcessError:
 
  109                 "{0} is not part of a git repository".format(main_repo_abspath)
 
  113         self.exclude = exclude
 
  115         self.force_sub = force_sub
 
  116         self.main_repo_abspath = main_repo_abspath
 
  118     def create(self, output_path, dry_run=False, output_format=None):
 
  120         Create the archive at output_file_path. 
  122         Type of the archive is determined either by extension of output_file_path or by 
  124         Supported formats are: gz, zip, bz2, xz, tar, tgz, txz 
  126         @param output_path: Output file path. 
  127         @type output_path: str 
  129         @param dry_run: Determines whether create should do nothing but print what it 
  133         @param output_format: Determines format of the output archive. If None, format 
  134             is determined from extension of output_file_path. 
  135         @type output_format: str 
  137         if output_format 
is None:
 
  138             file_name, file_ext = path.splitext(output_path)
 
  139             output_format = file_ext[len(extsep) :].lower()
 
  141                 "Output format is not explicitly set, determined format is {0}.".format(
 
  147             if output_format == 
"zip":
 
  148                 archive = ZipFile(path.abspath(output_path), 
"w")
 
  150                 def add_file(file_path, arcname):
 
  151                     if not path.islink(file_path):
 
  152                         archive.write(file_path, arcname, ZIP_DEFLATED)
 
  156                         i.external_attr = 0xA1ED0000
 
  157                         archive.writestr(i, readlink(file_path))
 
  159             elif output_format 
in [
"tar", 
"bz2", 
"gz", 
"xz", 
"tgz", 
"txz"]:
 
  160                 if output_format == 
"tar":
 
  162                 elif output_format == 
"tgz":
 
  164                 elif output_format == 
"txz":
 
  167                     t_mode = 
"w:{0}".format(output_format)
 
  169                 archive = tarfile.open(path.abspath(output_path), t_mode)
 
  171                 def add_file(file_path, arcname):
 
  172                     archive.add(file_path, arcname)
 
  175                 raise RuntimeError(
"unknown format: {0}".format(output_format))
 
  177             def archiver(file_path, arcname):
 
  178                 self.LOG.debug(
"Compressing {0} => {1}...".format(file_path, arcname))
 
  179                 add_file(file_path, arcname)
 
  184             def archiver(file_path, arcname):
 
  185                 self.LOG.
info(
"{0} => {1}".format(file_path, arcname))
 
  187         self.archive_all_files(archiver)
 
  189         if archive 
is not None:
 
  192     def get_exclude_patterns(self, repo_abspath, repo_file_paths):
 
  194         Returns exclude patterns for a given repo. It looks for .gitattributes files in 
  197         Resulting dictionary will contain exclude patterns per path (relative to the 
  199         E.g. {('.', 'Catalyst', 'Editions', 'Base'): ['Foo*', '*Bar']} 
  201         @param repo_abspath: Absolute path to the git repository. 
  202         @type repo_abspath: str 
  204         @param repo_file_paths: List of paths relative to the repo_abspath that are 
  206         @type repo_file_paths:  list 
  208         @return: Dictionary representing exclude patterns. 
  209             Keys are tuples of strings. Values are lists of strings. 
  210             Returns None if self.exclude is not set. 
  216         def read_attributes(attributes_abspath):
 
  218             if path.isfile(attributes_abspath):
 
  219                 attributes = 
open(attributes_abspath, 
"r").readlines()
 
  221                 for line 
in attributes:
 
  222                     tokens = line.strip().split()
 
  223                     if "export-ignore" in tokens[1:]:
 
  224                         patterns.append(tokens[0])
 
  227         exclude_patterns = {(): []}
 
  231             global_attributes_abspath = self.run_git_shell(
 
  232                 "git config --get core.attributesfile", repo_abspath
 
  234             exclude_patterns[()] = read_attributes(global_attributes_abspath)
 
  239         for attributes_abspath 
in [
 
  240             path.join(repo_abspath, f)
 
  241             for f 
in repo_file_paths
 
  242             if f.endswith(
".gitattributes")
 
  246                 self.get_path_components(repo_abspath, path.dirname(attributes_abspath))
 
  248             exclude_patterns[key] = read_attributes(attributes_abspath)
 
  250         local_attributes_abspath = path.join(repo_abspath, 
".git", 
"info", 
"attributes")
 
  251         key = tuple(self.get_path_components(repo_abspath, repo_abspath))
 
  253         if key 
in exclude_patterns:
 
  254             exclude_patterns[key].extend(read_attributes(local_attributes_abspath))
 
  256             exclude_patterns[key] = read_attributes(local_attributes_abspath)
 
  258         return exclude_patterns
 
  260     def is_file_excluded(self, repo_abspath, repo_file_path, exclude_patterns):
 
  262         Checks whether file at a given path is excluded. 
  264         @param repo_abspath: Absolute path to the git repository. 
  265         @type repo_abspath: str 
  267         @param repo_file_path: Path to a file within repo_abspath. 
  268         @type repo_file_path: str 
  270         @param exclude_patterns: Exclude patterns with format specified for 
  271             get_exclude_patterns. 
  272         @type exclude_patterns: dict 
  274         @return: True if file should be excluded. Otherwise False. 
  277         if exclude_patterns 
is None or not len(exclude_patterns):
 
  280         from fnmatch 
import fnmatch
 
  282         file_name = path.basename(repo_file_path)
 
  283         components = self.get_path_components(
 
  284             repo_abspath, path.join(repo_abspath, path.dirname(repo_file_path))
 
  292         while not is_excluded:
 
  293             key = tuple(components)
 
  294             if key 
in exclude_patterns:
 
  295                 patterns = exclude_patterns[key]
 
  297                     if fnmatch(file_name, p) 
or fnmatch(repo_file_path, p):
 
  299                             "Exclude pattern matched {0}: {1}".format(p, repo_file_path)
 
  303             if not len(components):
 
  310     def archive_all_files(self, archiver):
 
  312         Archive all files using archiver. 
  314         @param archiver: Callable that accepts 2 arguments: 
  315             abspath to file on the system and relative path within archive. 
  316         @type archiver: Callable 
  318         for file_path 
in self.extra:
 
  319             archiver(path.abspath(file_path), path.join(self.prefix, file_path))
 
  321         for file_path 
in self.walk_git_files():
 
  323                 path.join(self.main_repo_abspath, file_path),
 
  324                 path.join(self.prefix, file_path),
 
  327     def walk_git_files(self, repo_path=""):
 
  329         An iterator method that yields a file path relative to main_repo_abspath 
  330         for each file that should be included in the archive. 
  331         Skips those that match the exclusion patterns found in 
  332         any discovered .gitattributes files along the way. 
  334         Recurs into submodules as well. 
  336         @param repo_path: Path to the git submodule repository relative to 
  340         @return: Iterator to traverse files under git control relative to 
  344         repo_abspath = path.join(self.main_repo_abspath, repo_path)
 
  345         repo_file_paths = self.run_git_shell(
 
  346             "git ls-files --cached --full-name --no-empty-directory", repo_abspath
 
  348         exclude_patterns = self.get_exclude_patterns(repo_abspath, repo_file_paths)
 
  350         for repo_file_path 
in repo_file_paths:
 
  352             repo_file_path = repo_file_path.strip(
 
  355             repo_file_abspath = path.join(
 
  356                 repo_abspath, repo_file_path
 
  358             main_repo_file_path = path.join(
 
  359                 repo_path, repo_file_path
 
  363             if not path.islink(repo_file_abspath) 
and path.isdir(repo_file_abspath):
 
  366             if self.is_file_excluded(repo_abspath, repo_file_path, exclude_patterns):
 
  369             yield main_repo_file_path
 
  372             self.run_git_shell(
"git submodule init", repo_abspath)
 
  373             self.run_git_shell(
"git submodule update", repo_abspath)
 
  376             repo_gitmodules_abspath = path.join(repo_abspath, 
".gitmodules")
 
  378             with open(repo_gitmodules_abspath) 
as f:
 
  379                 lines = f.readlines()
 
  382                 m = re.match(
r"^\s*path\s*=\s*(.*)\s*$", line)
 
  385                     submodule_path = m.group(1)
 
  386                     submodule_abspath = path.join(repo_path, submodule_path)
 
  388                     if self.is_file_excluded(
 
  389                         repo_abspath, submodule_path, exclude_patterns
 
  393                     for submodule_file_path 
in self.walk_git_files(submodule_abspath):
 
  394                         rel_file_path = submodule_file_path.replace(
 
  397                         if self.is_file_excluded(
 
  398                             repo_abspath, rel_file_path, exclude_patterns
 
  402                         yield submodule_file_path
 
  407     def get_path_components(repo_abspath, abspath):
 
  409         Split given abspath into components relative to repo_abspath. 
  410         These components are primarily used as unique keys of files and folders within a 
  413         E.g. if repo_abspath is '/Documents/Hobby/ParaView/' and abspath is 
  414         '/Documents/Hobby/ParaView/Catalyst/Editions/Base/', function will return: 
  415         ['.', 'Catalyst', 'Editions', 'Base'] 
  417         First element is always os.curdir (concrete symbol depends on OS). 
  419         @param repo_abspath: Absolute path to the git repository. Normalized via 
  421         @type repo_abspath: str 
  423         @param abspath: Absolute path to a file within repo_abspath. Normalized via 
  427         @return: List of path components. 
  430         repo_abspath = path.normpath(repo_abspath)
 
  431         abspath = path.normpath(abspath)
 
  433         if not path.isabs(repo_abspath):
 
  434             raise ValueError(
"repo_abspath MUST be absolute path.")
 
  436         if not path.isabs(abspath):
 
  437             raise ValueError(
"abspath MUST be absoulte path.")
 
  439         if not path.commonprefix([repo_abspath, abspath]):
 
  441                 'abspath ("%s") MUST have common prefix with repo_abspath ("%s")' 
  442                 % (abspath, repo_abspath)
 
  447         while not abspath == repo_abspath:
 
  448             abspath, tail = path.split(abspath)
 
  451                 components.insert(0, tail)
 
  453         components.insert(0, curdir)
 
  457     def run_git_shell(cmd, cwd=None):
 
  459         Runs git shell command, reads output and decodes it into unicode string. 
  461         @param cmd: Command to be executed. 
  465         @param cwd: Working directory. 
  468         @return: Output of the command. 
  470         @raise CalledProcessError:  Raises exception if return code of the command is 
  473         p = Popen(cmd, shell=
True, stdout=PIPE, cwd=cwd)
 
  474         output, _ = p.communicate()
 
  476             output.decode(
"unicode_escape").encode(
"raw_unicode_escape").decode(
"utf-8")
 
  480             if sys.version_info > (2, 6):
 
  481                 raise CalledProcessError(
 
  482                     returncode=p.returncode, cmd=cmd, output=output
 
  485                 raise CalledProcessError(returncode=p.returncode, cmd=cmd)
 
  491     from optparse 
import OptionParser
 
  493     parser = OptionParser(
 
  494         usage=
"usage: %prog [-v] [--prefix PREFIX] [--no-exclude] [--force-submodules]" 
  495         " [--extra EXTRA1 [EXTRA2]] [--dry-run] OUTPUT_FILE",
 
  496         version=
"%prog {0}".format(__version__),
 
  504         help=
"""prepend PREFIX to each filename in the archive. 
  505                 OUTPUT_FILE name is used by default to avoid tarbomb. 
  506                 You can set it to '' in order to explicitly request tarbomb""",
 
  514         help=
"enable verbose mode",
 
  519         action=
"store_false",
 
  522         help=
"don't read .gitattributes for patterns containing export-ignore attrib",
 
  526         "--force-submodules",
 
  529         help=
"force a git submodule init && git submodule update" 
  530         "at each level before iterating submodules",
 
  538         help=
"any additional files to include in the archive",
 
  545         help=
"don't actually archive anything, just show what would be done",
 
  548     options, args = parser.parse_args()
 
  551         parser.error(
"You must specify exactly one output file")
 
  553     output_file_path = args[0]
 
  555     if path.isdir(output_file_path):
 
  556         parser.error(
"You cannot use directory as output")
 
  559     if options.prefix 
is not None:
 
  560         options.prefix = path.join(options.prefix, 
"")
 
  564         output_name = path.basename(output_file_path)
 
  579                 "(" + 
"|".join(
r"\." + e 
for e 
in extensions) + 
")$",
 
  585         options.prefix = path.join(output_name, 
"")
 
  588         handler = logging.StreamHandler(sys.stdout)
 
  589         handler.setFormatter(logging.Formatter(
"%(message)s"))
 
  590         GitArchiver.LOG.addHandler(handler)
 
  591         GitArchiver.LOG.setLevel(logging.DEBUG 
if options.verbose 
else logging.INFO)
 
  592         archiver = GitArchiver(
 
  593             options.prefix, options.exclude, options.force_sub, options.extra
 
  595         archiver.create(output_file_path, options.dry_run)
 
  596     except Exception 
as e:
 
  597         parser.exit(2, 
"{0}\n".format(e))
 
  602 if __name__ == 
"__main__":