From d052ecb0addc1d42f518a02227e5d9dce93759d3 Mon Sep 17 00:00:00 2001 From: crass Date: Mon, 16 Sep 2019 20:39:05 -0500 Subject: [PATCH 01/22] Turn on github package debugging when --debug is specified. --- github-backup.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/github-backup.py b/github-backup.py index ff4c108..916b39f 100755 --- a/github-backup.py +++ b/github-backup.py @@ -8,11 +8,11 @@ Created: Fri Jun 15 2012 """ -from github import Github from argparse import ArgumentParser import subprocess import os, os.path import logging +import github LOGGER = logging.getLogger('github-backup') @@ -27,6 +27,7 @@ def main(): LOGGER.setLevel(logging.WARN) elif args.debug: LOGGER.setLevel(logging.DEBUG) + github.enable_console_debug_logging() # Process args if args.quiet: @@ -40,7 +41,7 @@ def main(): if args.password: config['password'] = args.password - gh = Github(**config) + gh = github.Github(**config) # Check that backup dir exists if not os.path.exists(args.backupdir): From 3ac05ede4ad1b56c8b930c1ffb6739dbd83e5447 Mon Sep 17 00:00:00 2001 From: crass Date: Mon, 16 Sep 2019 21:15:31 -0500 Subject: [PATCH 02/22] Allow unauthenticated backups. Change get_org -> get_organization, since the api appears to have changed. --- github-backup.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/github-backup.py b/github-backup.py index 916b39f..216aa34 100755 --- a/github-backup.py +++ b/github-backup.py @@ -36,10 +36,13 @@ def main(): args.backupdir = args.backupdir.rstrip("/") # Make the connection to Github here. - config = {'login_or_token': args.login_or_token} - + config = {} if args.password: + config = {'login_or_token': args.login_or_token} config['password'] = args.password + else: + # unauthenticated users can only use http git method + args.type = 'http' gh = github.Github(**config) @@ -54,11 +57,18 @@ def main(): } if args.organization: - org = gh.get_org(args.org) - repos = org.get_repos(**filters) + if args.password: + account = gh.get_organization(args.org) + else: + filters = {} + account = gh.get_organization(args.login_or_token) else: - user = gh.get_user() - repos = user.get_repos(**filters) + if args.password: + account = gh.get_user() + else: + filters = {} + account = gh.get_user(args.login_or_token) + repos = account.get_repos(**filters) for repo in repos: if args.skip_forks and repo.fork: From 4474b95a0fa5dcb79c12dd27f93a0860548916ff Mon Sep 17 00:00:00 2001 From: crass Date: Wed, 18 Sep 2019 00:45:39 -0500 Subject: [PATCH 03/22] Add --username to backup a user other than the one authenticated with. --- github-backup.py | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/github-backup.py b/github-backup.py index 216aa34..3196ba3 100755 --- a/github-backup.py +++ b/github-backup.py @@ -16,7 +16,10 @@ import github LOGGER = logging.getLogger('github-backup') +IsAuthorized = False + def main(): + global IsAuthorized logging.basicConfig(level=logging.INFO) @@ -50,26 +53,28 @@ def main(): if not os.path.exists(args.backupdir): os.mkdir(args.backupdir) - # Get all repos - filters = { - 'affiliation': ','.join(args.affiliation), - 'visibility': args.visibility - } - if args.organization: if args.password: account = gh.get_organization(args.org) else: - filters = {} account = gh.get_organization(args.login_or_token) else: if args.password: - account = gh.get_user() + account = gh.get_user(args.username) else: - filters = {} - account = gh.get_user(args.login_or_token) - repos = account.get_repos(**filters) + account = gh.get_user(args.username or args.login_or_token) + IsAuthorized = isinstance(account, github.AuthenticatedUser.AuthenticatedUser) + + filters = {} + if IsAuthorized: + # Get all repos + filters = { + 'affiliation': ','.join(args.affiliation), + 'visibility': args.visibility + } + + repos = account.get_repos(**filters) for repo in repos: if args.skip_forks and repo.fork: continue @@ -81,7 +86,7 @@ def init_parser(): parser = ArgumentParser(description="makes a backup of all of a github user's repositories") - parser.add_argument("login_or_token", help="A Github username or token") + parser.add_argument("login_or_token", help="A Github username or token for authenticating") parser.add_argument("backupdir", help="The folder where you want your backups to go") parser.add_argument("-v", "--visibility", help="Filter repos by their visibility", choices=['all', 'public', 'private'], default='all') parser.add_argument("-a", "--affiliation", help="Filter repos by their affiliation", action='append', type=str, default=['owner'], choices=['owner', 'collaborator', 'organization_member']) @@ -92,6 +97,7 @@ def init_parser(): parser.add_argument("-g", "--git", nargs="+", help="Pass extra arguments to git", type=list, default=[], metavar="ARGS") parser.add_argument("-t", "--type", help="Select the protocol for cloning", choices=['git', 'http', 'ssh'], default='ssh') parser.add_argument("-s", "--suffix", help="Add suffix to repository directory names", default="") + parser.add_argument("-u", "--username", help="Backup USER account", metavar="USER") parser.add_argument("-p", "--password", help="Authenticate with Github API") parser.add_argument("-P", "--prefix", help="Add prefix to repository directory names", default="") parser.add_argument("-o", "--organization", help="Backup Organizational repositories", metavar="ORG") @@ -112,9 +118,15 @@ def process_repo(repo, args): LOGGER.info("Repo already exists, let's try to update it instead") update_repo(repo, dir, args) + if isinstance(repo, github.Gist.Gist): + # Save extra gist info + gist_file = os.path.join(os.path.dirname(dir), repo.id+'.json') + with codecs.open(gist_file, 'w', encoding='utf-8') as f: + json_dump(repo.raw_data, f) + def clone_repo(repo, dir, args): - if args.type == 'http': + if args.type == 'http' or not IsAuthorized: url = repo.clone_url elif args.type == 'ssh': url = repo.ssh_url From 67bafdf0ef50beabd483df905ff6370c114419c0 Mon Sep 17 00:00:00 2001 From: crass Date: Wed, 18 Sep 2019 00:55:10 -0500 Subject: [PATCH 04/22] Add option to backup account data. --- github-backup.py | 106 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 102 insertions(+), 4 deletions(-) diff --git a/github-backup.py b/github-backup.py index 3196ba3..5c90fd7 100755 --- a/github-backup.py +++ b/github-backup.py @@ -8,10 +8,15 @@ Created: Fri Jun 15 2012 """ -from argparse import ArgumentParser +import os +import errno +import codecs +import json import subprocess -import os, os.path import logging +from argparse import ArgumentParser + +import requests import github LOGGER = logging.getLogger('github-backup') @@ -35,6 +40,15 @@ def main(): # Process args if args.quiet: args.git.append("--quiet") + if args.include_everything: + args.account = True + args.include_starred = True + args.include_watched = True + args.include_followers = True + args.include_following = True + if args.include_starred or args.include_watched or args.include_followers \ + or args.include_following: + args.account = True args.backupdir = args.backupdir.rstrip("/") @@ -51,7 +65,7 @@ def main(): # Check that backup dir exists if not os.path.exists(args.backupdir): - os.mkdir(args.backupdir) + mkdir_p(args.backupdir) if args.organization: if args.password: @@ -74,6 +88,9 @@ def main(): 'visibility': args.visibility } + if args.account: + process_account(gh, account, args) + repos = account.get_repos(**filters) for repo in repos: if args.skip_forks and repo.fork: @@ -84,7 +101,7 @@ def main(): def init_parser(): """Set up the argument parser.""" - parser = ArgumentParser(description="makes a backup of all of a github user's repositories") + parser = ArgumentParser(description="makes a backup of a github user's account") parser.add_argument("login_or_token", help="A Github username or token for authenticating") parser.add_argument("backupdir", help="The folder where you want your backups to go") @@ -101,9 +118,69 @@ def init_parser(): parser.add_argument("-p", "--password", help="Authenticate with Github API") parser.add_argument("-P", "--prefix", help="Add prefix to repository directory names", default="") parser.add_argument("-o", "--organization", help="Backup Organizational repositories", metavar="ORG") + parser.add_argument("-A", "--account", help="Backup account data", action='store_true') + parser.add_argument('--all', + action='store_true', + dest='include_everything', + help='include everything in backup (not including [*])') + parser.add_argument('--starred', + action='store_true', + dest='include_starred', + help='include JSON output of starred repositories in backup') + parser.add_argument('--watched', + action='store_true', + dest='include_watched', + help='include JSON output of watched repositories in backup') + parser.add_argument('--followers', + action='store_true', + dest='include_followers', + help='include JSON output of followers in backup') + parser.add_argument('--following', + action='store_true', + dest='include_following', + help='include JSON output of following users in backup') return parser +def fetch_url(url, outfile): + headers = { + "User-Agent": "PyGithub/Python" + } + with open(outfile, 'w') as f: + f.write(requests.get(url, headers=headers).content) + +def process_account(gh, account, args): + LOGGER.info("Processing account: %s", account.login) + + dir = os.path.join(args.backupdir, 'account') + if not os.access(dir, os.F_OK): + mkdir_p(dir) + + account_file = os.path.join(dir, 'account.json') + with codecs.open(account_file, 'w', encoding='utf-8') as f: + json_dump(account.raw_data, f) + + if IsAuthorized: + emails_file = os.path.join(dir, 'emails.json') + with codecs.open(emails_file, 'w', encoding='utf-8') as f: + json_dump(list(account.get_emails()), f) + + if args.include_starred: + LOGGER.debug(" Getting starred repository list") + fetch_url(account.starred_url, os.path.join(dir, 'starred.json')) + + if args.include_watched: + LOGGER.debug(" Getting watched repository list") + fetch_url(account.subscriptions_url, os.path.join(dir, 'watched.json')) + + if args.include_followers: + LOGGER.debug(" Getting followers repository list") + fetch_url(account.followers_url, os.path.join(dir, 'followers.json')) + + if args.include_following: + LOGGER.debug(" Getting following repository list") + fetch_url(account.following_url, os.path.join(dir, 'following.json')) + def process_repo(repo, args): LOGGER.info("Processing repo: %s", repo.full_name) @@ -175,5 +252,26 @@ def git(gcmd, args=[], gargs=[], gdir=""): print(cmd) subprocess.call(cmd) +def json_dump(data, output_file): + json.dump(data, + output_file, + ensure_ascii=False, + sort_keys=True, + indent=4, + separators=(',', ': ')) + +def mkdir_p(path): + head, tail = os.path.split(path) + if head and not os.access(head, os.F_OK): + mkdir_p(head) + + try: + os.makedirs(path) + except OSError as exc: # Python >2.5 + if exc.errno == errno.EEXIST and os.path.isdir(path): + pass + else: + raise + if __name__ == "__main__": main() From 83ecc23da4b7f316eca3add41603ba741ab21aef Mon Sep 17 00:00:00 2001 From: crass Date: Wed, 18 Sep 2019 01:23:14 -0500 Subject: [PATCH 05/22] Add option to skip downloading of repositories. --- github-backup.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/github-backup.py b/github-backup.py index 5c90fd7..2c840bc 100755 --- a/github-backup.py +++ b/github-backup.py @@ -91,12 +91,13 @@ def main(): if args.account: process_account(gh, account, args) - repos = account.get_repos(**filters) - for repo in repos: - if args.skip_forks and repo.fork: - continue + if not args.skip_repos: + repos = account.get_repos(**filters) + for repo in repos: + if args.skip_forks and repo.fork: + continue - process_repo(repo, args) + process_repo(repo, args) def init_parser(): """Set up the argument parser.""" @@ -111,6 +112,7 @@ def init_parser(): parser.add_argument("-q", "--quiet", help="Only show errors", action="store_true") parser.add_argument("-m", "--mirror", help="Create a bare mirror", action="store_true") parser.add_argument("-f", "--skip-forks", help="Skip forks", action="store_true") + parser.add_argument("--skip-repos", help="Skip backing up repositories", action="store_true") parser.add_argument("-g", "--git", nargs="+", help="Pass extra arguments to git", type=list, default=[], metavar="ARGS") parser.add_argument("-t", "--type", help="Select the protocol for cloning", choices=['git', 'http', 'ssh'], default='ssh') parser.add_argument("-s", "--suffix", help="Add suffix to repository directory names", default="") From 3824d1196bf39564957fc51f7f4059449154a64a Mon Sep 17 00:00:00 2001 From: crass Date: Wed, 18 Sep 2019 01:26:30 -0500 Subject: [PATCH 06/22] Put repositories in backupdir/repositories/${REPO_NAME}/repository in preparation for adding other repository (meta)data. --- github-backup.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/github-backup.py b/github-backup.py index 2c840bc..51ff8a6 100755 --- a/github-backup.py +++ b/github-backup.py @@ -187,9 +187,11 @@ def process_account(gh, account, args): def process_repo(repo, args): LOGGER.info("Processing repo: %s", repo.full_name) - dir = args.backupdir + '/' + args.prefix + repo.name + args.suffix - config = "%s/%s" % (dir, "config" if args.mirror else ".git/config") + dir = os.path.join(args.backupdir, 'repositories', args.prefix + repo.name + args.suffix, 'repository') + config = os.path.join(dir, "config" if args.mirror else ".git/config") + if not os.access(os.path.dirname(dir), os.F_OK): + mkdir_p(os.path.dirname(dir)) if not os.access(config, os.F_OK): LOGGER.info("Repo doesn't exists, lets clone it") clone_repo(repo, dir, args) @@ -216,7 +218,7 @@ def clone_repo(repo, dir, args): if args.mirror: git_args.insert(0, '--mirror') - git("clone", git_args, args.git, args.backupdir) + git("clone", git_args, args.git, os.path.dirname(dir)) def update_repo(repo, dir, args): From d6ac98b392d1a03caf3356ec1b7c3f177d7bfc63 Mon Sep 17 00:00:00 2001 From: crass Date: Wed, 18 Sep 2019 01:32:10 -0500 Subject: [PATCH 07/22] Add option to download the repository wiki repositoriy. --- github-backup.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/github-backup.py b/github-backup.py index 51ff8a6..5d5b9e2 100755 --- a/github-backup.py +++ b/github-backup.py @@ -46,6 +46,7 @@ def main(): args.include_watched = True args.include_followers = True args.include_following = True + args.include_wiki = True if args.include_starred or args.include_watched or args.include_followers \ or args.include_following: args.account = True @@ -141,6 +142,10 @@ def init_parser(): action='store_true', dest='include_following', help='include JSON output of following users in backup') + parser.add_argument('--wikis', + action='store_true', + dest='include_wiki', + help='include wiki clone in backup') return parser @@ -218,6 +223,11 @@ def clone_repo(repo, dir, args): if args.mirror: git_args.insert(0, '--mirror') + if args.include_wiki and repo.has_wiki: + git_wiki_args = git_args.copy() + git_wiki_args[0] = url.replace('.git', '.wiki.git') + git("clone", git_wiki_args, args.git, os.path.join(os.path.dirname(dir), 'wiki')) + git("clone", git_args, args.git, os.path.dirname(dir)) From 6dc8ee60343d1662a66af7119ac87a48b47a29c0 Mon Sep 17 00:00:00 2001 From: crass Date: Thu, 19 Sep 2019 11:02:08 -0500 Subject: [PATCH 08/22] Refactor into RepositoryBackup class for backing up repository and metadata. --- github-backup.py | 117 ++++++++++++++++++++++++++--------------------- 1 file changed, 64 insertions(+), 53 deletions(-) diff --git a/github-backup.py b/github-backup.py index 5d5b9e2..806f436 100755 --- a/github-backup.py +++ b/github-backup.py @@ -98,7 +98,7 @@ def main(): if args.skip_forks and repo.fork: continue - process_repo(repo, args) + RepositoryBackup(repo, args).backup() def init_parser(): """Set up the argument parser.""" @@ -189,70 +189,81 @@ def process_account(gh, account, args): fetch_url(account.following_url, os.path.join(dir, 'following.json')) -def process_repo(repo, args): - LOGGER.info("Processing repo: %s", repo.full_name) +class RepositoryBackup(object): + def __init__(self, repo, args): + self.repo = repo + self.args = args - dir = os.path.join(args.backupdir, 'repositories', args.prefix + repo.name + args.suffix, 'repository') - config = os.path.join(dir, "config" if args.mirror else ".git/config") + dir = os.path.join(args.backupdir, 'repositories', args.prefix + repo.name + args.suffix, 'repository') + self.dir = dir - if not os.access(os.path.dirname(dir), os.F_OK): - mkdir_p(os.path.dirname(dir)) - if not os.access(config, os.F_OK): - LOGGER.info("Repo doesn't exists, lets clone it") - clone_repo(repo, dir, args) - else: - LOGGER.info("Repo already exists, let's try to update it instead") - update_repo(repo, dir, args) + if args.type == 'http' or not IsAuthorized: + url = repo.clone_url + elif args.type == 'ssh': + url = repo.ssh_url + elif args.type == 'git': + url = repo.git_url + self.url = url - if isinstance(repo, github.Gist.Gist): - # Save extra gist info - gist_file = os.path.join(os.path.dirname(dir), repo.id+'.json') - with codecs.open(gist_file, 'w', encoding='utf-8') as f: - json_dump(repo.raw_data, f) + self.wiki_url = None + if args.include_wiki and repo.has_wiki: + self.wiki_url = self.url.replace('.git', '.wiki.git') + self.wiki_dir = os.path.join(args.backupdir, 'repositories', args.prefix + repo.name + args.suffix, 'wiki') + def backup(self): + LOGGER.info("Processing repo: %s", self.repo.full_name) -def clone_repo(repo, dir, args): - if args.type == 'http' or not IsAuthorized: - url = repo.clone_url - elif args.type == 'ssh': - url = repo.ssh_url - elif args.type == 'git': - url = repo.git_url + config = os.path.join(self.dir, "config" if self.args.mirror else ".git/config") + if not os.access(os.path.dirname(self.dir), os.F_OK): + mkdir_p(os.path.dirname(self.dir)) + if not os.access(config, os.F_OK): + LOGGER.info("Repo doesn't exists, lets clone it") + self.clone_repo(self.url, self.dir) + else: + LOGGER.info("Repo already exists, let's try to update it instead") + self.update_repo(self.dir) - git_args = [url, os.path.basename(dir)] - if args.mirror: - git_args.insert(0, '--mirror') + if self.wiki_url: + config = os.path.join(self.wiki_dir, "config" if self.args.mirror else ".git/config") + if not os.access(os.path.dirname(self.wiki_dir), os.F_OK): + mkdir_p(os.path.dirname(self.wiki_dir)) + if not os.access(config, os.F_OK): + LOGGER.info("Wiki repo doesn't exists, lets clone it") + self.clone_repo(self.wiki_url, self.wiki_dir) + else: + LOGGER.info("Wiki repo already exists, let's try to update it instead") + self.update_repo(self.wiki_dir) - if args.include_wiki and repo.has_wiki: - git_wiki_args = git_args.copy() - git_wiki_args[0] = url.replace('.git', '.wiki.git') - git("clone", git_wiki_args, args.git, os.path.join(os.path.dirname(dir), 'wiki')) + def clone_repo(self, url, dir): + git_args = [url, os.path.basename(dir)] + if self.args.mirror: + git_args.insert(0, '--mirror') - git("clone", git_args, args.git, os.path.dirname(dir)) + git("clone", git_args, self.args.git, os.path.dirname(dir)) + def update_repo(self, dir): + # GitHub => Local + # TODO: use subprocess package and fork git into + # background (major performance boost expected) + args, repo = self.args, self.repo + if args.mirror: + git("fetch", ["--prune"], args.git, dir) + else: + git("pull", gargs=args.git, gdir=dir) -def update_repo(repo, dir, args): - # GitHub => Local - # TODO: use subprocess package and fork git into - # background (major performance boost expected) - if args.mirror: - git("fetch", ["--prune"], args.git, dir) - else: - git("pull", gargs=args.git, gdir=dir) + # Fetch description and owner (useful for gitweb, cgit etc.) + if repo.description: + git("config", ["--local", "gitweb.description", + repo.description.encode("utf-8")], gdir=dir) - # Fetch description and owner (useful for gitweb, cgit etc.) - if repo.description: - git("config", ["--local", "gitweb.description", - repo.description.encode("utf-8")], gdir=dir) + if repo.owner.name and repo.owner.email: + owner = "%s <%s>" % (repo.owner.name.encode("utf-8"), + repo.owner.email.encode("utf-8")) + git("config", ["--local", "gitweb.owner", owner], gdir=dir) - if repo.owner.name and repo.owner.email: - owner = "%s <%s>" % (repo.owner.name.encode("utf-8"), - repo.owner.email.encode("utf-8")) - git("config", ["--local", "gitweb.owner", owner], gdir=dir) - - git("config", ["--local", "cgit.name", str(repo.name)], gdir=dir) - git("config", ["--local", "cgit.defbranch", str(repo.default_branch)], gdir=dir) - git("config", ["--local", "cgit.clone-url", str(repo.clone_url)], gdir=dir) + git("config", ["--local", "cgit.name", str(repo.name)], gdir=dir) + git("config", ["--local", "cgit.defbranch", str(repo.default_branch)], gdir=dir) + git("config", ["--local", "cgit.clone-url", str(repo.clone_url)], gdir=dir) def git(gcmd, args=[], gargs=[], gdir=""): From fad148a3cfe17164980603e380db2c3520fc9cab Mon Sep 17 00:00:00 2001 From: crass Date: Thu, 19 Sep 2019 11:29:59 -0500 Subject: [PATCH 09/22] Add options for backing up gists and starred gists. --- github-backup.py | 48 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/github-backup.py b/github-backup.py index 806f436..f77f1be 100755 --- a/github-backup.py +++ b/github-backup.py @@ -92,6 +92,14 @@ def main(): if args.account: process_account(gh, account, args) + if args.include_gists: + for gist in account.get_gists(): + RepositoryBackup(gist, args).backup() + + if args.include_starred_gists and hasattr(account, 'get_starred_gists'): + for gist in account.get_starred_gists(): + RepositoryBackup(gist, args).backup() + if not args.skip_repos: repos = account.get_repos(**filters) for repo in repos: @@ -146,6 +154,14 @@ def init_parser(): action='store_true', dest='include_wiki', help='include wiki clone in backup') + parser.add_argument('--gists', + action='store_true', + dest='include_gists', + help='include gists in backup [*]') + parser.add_argument('--starred-gists', + action='store_true', + dest='include_starred_gists', + help='include starred gists in backup [*]') return parser @@ -194,10 +210,17 @@ class RepositoryBackup(object): self.repo = repo self.args = args - dir = os.path.join(args.backupdir, 'repositories', args.prefix + repo.name + args.suffix, 'repository') + self.is_gist = isinstance(repo, github.Gist.Gist) + + if self.is_gist: + dir = os.path.join(args.backupdir, 'gists', repo.id) + else: + dir = os.path.join(args.backupdir, 'repositories', args.prefix + repo.name + args.suffix, 'repository') self.dir = dir - if args.type == 'http' or not IsAuthorized: + if self.is_gist: + url = repo.git_pull_url + elif args.type == 'http' or not IsAuthorized: url = repo.clone_url elif args.type == 'ssh': url = repo.ssh_url @@ -211,7 +234,10 @@ class RepositoryBackup(object): self.wiki_dir = os.path.join(args.backupdir, 'repositories', args.prefix + repo.name + args.suffix, 'wiki') def backup(self): - LOGGER.info("Processing repo: %s", self.repo.full_name) + if self.is_gist: + LOGGER.info("Processing gist: %s", self.repo.id) + else: + LOGGER.info("Processing repo: %s", self.repo.full_name) config = os.path.join(self.dir, "config" if self.args.mirror else ".git/config") if not os.access(os.path.dirname(self.dir), os.F_OK): @@ -234,6 +260,12 @@ class RepositoryBackup(object): LOGGER.info("Wiki repo already exists, let's try to update it instead") self.update_repo(self.wiki_dir) + if self.is_gist: + # Save extra gist info + gist_file = os.path.join(os.path.dirname(self.dir), self.repo.id+'.json') + with codecs.open(gist_file, 'w', encoding='utf-8') as f: + json_dump(self.repo.raw_data, f) + def clone_repo(self, url, dir): git_args = [url, os.path.basename(dir)] if self.args.mirror: @@ -261,9 +293,13 @@ class RepositoryBackup(object): repo.owner.email.encode("utf-8")) git("config", ["--local", "gitweb.owner", owner], gdir=dir) - git("config", ["--local", "cgit.name", str(repo.name)], gdir=dir) - git("config", ["--local", "cgit.defbranch", str(repo.default_branch)], gdir=dir) - git("config", ["--local", "cgit.clone-url", str(repo.clone_url)], gdir=dir) + if self.is_gist: + git("config", ["--local", "cgit.name", str(repo.id)], gdir=dir) + git("config", ["--local", "cgit.clone-url", str(repo.git_pull_url)], gdir=dir) + else: + git("config", ["--local", "cgit.name", str(repo.name)], gdir=dir) + git("config", ["--local", "cgit.defbranch", str(repo.default_branch)], gdir=dir) + git("config", ["--local", "cgit.clone-url", str(repo.clone_url)], gdir=dir) def git(gcmd, args=[], gargs=[], gdir=""): From 21173ac39ecde7220b535afa8f29594a7da9bfe9 Mon Sep 17 00:00:00 2001 From: crass Date: Fri, 20 Sep 2019 00:31:49 -0500 Subject: [PATCH 10/22] Add options to download releases and associated assets. --- github-backup.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/github-backup.py b/github-backup.py index f77f1be..07323cd 100755 --- a/github-backup.py +++ b/github-backup.py @@ -46,6 +46,8 @@ def main(): args.include_watched = True args.include_followers = True args.include_following = True + args.include_releases = True + args.include_assets = True args.include_wiki = True if args.include_starred or args.include_watched or args.include_followers \ or args.include_following: @@ -162,6 +164,15 @@ def init_parser(): action='store_true', dest='include_starred_gists', help='include starred gists in backup [*]') + parser.add_argument('--releases', + action='store_true', + dest='include_releases', + help='include release information, not including assets or binaries' + ) + parser.add_argument('--assets', + action='store_true', + dest='include_assets', + help='include assets alongside release information; only applies if including releases') return parser @@ -265,6 +276,9 @@ class RepositoryBackup(object): gist_file = os.path.join(os.path.dirname(self.dir), self.repo.id+'.json') with codecs.open(gist_file, 'w', encoding='utf-8') as f: json_dump(self.repo.raw_data, f) + else: + if self.args.include_releases: + self._backup_releases() def clone_repo(self, url, dir): git_args = [url, os.path.basename(dir)] @@ -301,6 +315,24 @@ class RepositoryBackup(object): git("config", ["--local", "cgit.defbranch", str(repo.default_branch)], gdir=dir) git("config", ["--local", "cgit.clone-url", str(repo.clone_url)], gdir=dir) + def _backup_releases(self): + for release in self.repo.get_releases(): + rel_dir = os.path.join(os.path.dirname(self.dir), 'releases') + rel_file = os.path.join(rel_dir, release.tag_name+'.json') + if not os.access(rel_dir, os.F_OK): + mkdir_p(rel_dir) + with codecs.open(rel_file, 'w', encoding='utf-8') as f: + json_dump(release.raw_data, f) + + if self.args.include_assets: + for asset in release.get_assets(): + asset_dir = os.path.join(rel_dir, release.tag_name) + asset_file = os.path.join(asset_dir, asset.name) + if not os.access(asset_dir, os.F_OK): + mkdir_p(asset_dir) + fetch_url(asset.browser_download_url, asset_file) + assert asset.size == os.path.getsize(asset_file) + def git(gcmd, args=[], gargs=[], gdir=""): cmd = ["git"] From 00dd7fee237d4e40a9e8e7ee5b057e408c1b07e0 Mon Sep 17 00:00:00 2001 From: crass Date: Fri, 20 Sep 2019 00:43:44 -0500 Subject: [PATCH 11/22] Add options to backup issues and pull requests. --- github-backup.py | 96 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/github-backup.py b/github-backup.py index 07323cd..1f5a7e9 100755 --- a/github-backup.py +++ b/github-backup.py @@ -46,6 +46,12 @@ def main(): args.include_watched = True args.include_followers = True args.include_following = True + args.include_issues = True + args.include_issue_comments = True + args.include_issue_events = True + args.include_pulls = True + args.include_pull_comments = True + args.include_pull_commits = True args.include_releases = True args.include_assets = True args.include_wiki = True @@ -152,6 +158,30 @@ def init_parser(): action='store_true', dest='include_following', help='include JSON output of following users in backup') + parser.add_argument('--issues', + action='store_true', + dest='include_issues', + help='include issues in backup') + parser.add_argument('--issue-comments', + action='store_true', + dest='include_issue_comments', + help='include issue comments in backup') + parser.add_argument('--issue-events', + action='store_true', + dest='include_issue_events', + help='include issue events in backup') + parser.add_argument('--pulls', + action='store_true', + dest='include_pulls', + help='include pull requests in backup') + parser.add_argument('--pull-comments', + action='store_true', + dest='include_pull_comments', + help='include pull request review comments in backup') + parser.add_argument('--pull-commits', + action='store_true', + dest='include_pull_commits', + help='include pull request commits in backup') parser.add_argument('--wikis', action='store_true', dest='include_wiki', @@ -215,6 +245,24 @@ def process_account(gh, account, args): LOGGER.debug(" Getting following repository list") fetch_url(account.following_url, os.path.join(dir, 'following.json')) + if args.include_issues: + LOGGER.debug(" Getting issues for user %s", account.login) + if IsAuthorized: + issues = account.get_issues() + else: + issues = gh.search_issues('', author=account.login, type='issue') + + RepositoryBackup._backup_issues(issues, args, dir) + + if args.include_pulls: + LOGGER.debug(" Getting pull requests for user %s", account.login) + if IsAuthorized: + issues = account.get_issues() + else: + issues = gh.search_issues('', author=account.login, type='pr') + + RepositoryBackup._backup_pulls(issues, args, dir) + class RepositoryBackup(object): def __init__(self, repo, args): @@ -280,6 +328,14 @@ class RepositoryBackup(object): if self.args.include_releases: self._backup_releases() + if self.args.include_issues: + LOGGER.debug(" Getting issues for repo %s", self.repo.name) + self._backup_issues(self.repo.get_issues(), self.args, os.path.dirname(self.dir)) + + if self.args.include_pulls: + LOGGER.debug(" Getting pull requests for repo %s", self.repo.name) + self._backup_pulls(self.repo.get_pulls(), self.args, os.path.dirname(self.dir)) + def clone_repo(self, url, dir): git_args = [url, os.path.basename(dir)] if self.args.mirror: @@ -315,6 +371,46 @@ class RepositoryBackup(object): git("config", ["--local", "cgit.defbranch", str(repo.default_branch)], gdir=dir) git("config", ["--local", "cgit.clone-url", str(repo.clone_url)], gdir=dir) + @classmethod + def _backup_issues(cls, issues, args, dir): + for issue in issues: + issue_data = issue.raw_data.copy() + LOGGER.debug(" * %s", issue.number) + if args.include_issue_comments and issue.comments: + for comment in issue.get_comments(): + issue_data.setdefault('comment_data', []).append(comment.raw_data) + if args.include_issue_events: + for event in issue.get_events(): + issue_data.setdefault('event_data', []).append(event.raw_data) + + project = os.path.basename(os.path.dirname(os.path.dirname(issue.url))) + issue_file = os.path.join(dir, 'issues', "{0}:{1}.json".format(project, issue.number)) + if not os.access(os.path.dirname(issue_file), os.F_OK): + mkdir_p(os.path.dirname(issue_file)) + with codecs.open(issue_file, 'w', encoding='utf-8') as f: + json_dump(issue_data, f) + + @classmethod + def _backup_pulls(cls, issues, args, dir): + for issue in issues: + if isinstance(issue, github.Issue.Issue): + issue = issue.as_pull_request() + issue_data = issue.raw_data.copy() + LOGGER.debug(" * %s", issue.number) + if args.include_pull_comments and issue.comments: + for comment in issue.get_comments(): + issue_data.setdefault('comment_data', []).append(comment.raw_data) + if args.include_pull_commits and issue.commits: + for commit in issue.get_commits(): + issue_data.setdefault('commit_data', []).append(commit.raw_data) + + project = os.path.basename(os.path.dirname(os.path.dirname(issue.url))) + issue_file = os.path.join(dir, 'pull-requests', "{0}:{1}.json".format(project, issue.number)) + if not os.access(os.path.dirname(issue_file), os.F_OK): + mkdir_p(os.path.dirname(issue_file)) + with codecs.open(issue_file, 'w', encoding='utf-8') as f: + json_dump(issue_data, f) + def _backup_releases(self): for release in self.repo.get_releases(): rel_dir = os.path.join(os.path.dirname(self.dir), 'releases') From 592eb3093f390793efbdaa2ab16244604f25e933 Mon Sep 17 00:00:00 2001 From: crass Date: Sat, 28 Sep 2019 00:34:27 -0500 Subject: [PATCH 12/22] Set most log lines to INFO so that they are by default printed. --- github-backup.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/github-backup.py b/github-backup.py index 1f5a7e9..9d32d57 100755 --- a/github-backup.py +++ b/github-backup.py @@ -230,23 +230,23 @@ def process_account(gh, account, args): json_dump(list(account.get_emails()), f) if args.include_starred: - LOGGER.debug(" Getting starred repository list") + LOGGER.info(" Getting starred repository list") fetch_url(account.starred_url, os.path.join(dir, 'starred.json')) if args.include_watched: - LOGGER.debug(" Getting watched repository list") + LOGGER.info(" Getting watched repository list") fetch_url(account.subscriptions_url, os.path.join(dir, 'watched.json')) if args.include_followers: - LOGGER.debug(" Getting followers repository list") + LOGGER.info(" Getting followers repository list") fetch_url(account.followers_url, os.path.join(dir, 'followers.json')) if args.include_following: - LOGGER.debug(" Getting following repository list") + LOGGER.info(" Getting following repository list") fetch_url(account.following_url, os.path.join(dir, 'following.json')) if args.include_issues: - LOGGER.debug(" Getting issues for user %s", account.login) + LOGGER.info(" Getting issues for user %s", account.login) if IsAuthorized: issues = account.get_issues() else: @@ -255,7 +255,7 @@ def process_account(gh, account, args): RepositoryBackup._backup_issues(issues, args, dir) if args.include_pulls: - LOGGER.debug(" Getting pull requests for user %s", account.login) + LOGGER.info(" Getting pull requests for user %s", account.login) if IsAuthorized: issues = account.get_issues() else: @@ -329,11 +329,11 @@ class RepositoryBackup(object): self._backup_releases() if self.args.include_issues: - LOGGER.debug(" Getting issues for repo %s", self.repo.name) + LOGGER.info(" Getting issues for repo %s", self.repo.name) self._backup_issues(self.repo.get_issues(), self.args, os.path.dirname(self.dir)) if self.args.include_pulls: - LOGGER.debug(" Getting pull requests for repo %s", self.repo.name) + LOGGER.info(" Getting pull requests for repo %s", self.repo.name) self._backup_pulls(self.repo.get_pulls(), self.args, os.path.dirname(self.dir)) def clone_repo(self, url, dir): @@ -375,7 +375,7 @@ class RepositoryBackup(object): def _backup_issues(cls, issues, args, dir): for issue in issues: issue_data = issue.raw_data.copy() - LOGGER.debug(" * %s", issue.number) + LOGGER.info(" * %s", issue.number) if args.include_issue_comments and issue.comments: for comment in issue.get_comments(): issue_data.setdefault('comment_data', []).append(comment.raw_data) @@ -396,7 +396,7 @@ class RepositoryBackup(object): if isinstance(issue, github.Issue.Issue): issue = issue.as_pull_request() issue_data = issue.raw_data.copy() - LOGGER.debug(" * %s", issue.number) + LOGGER.info(" * %s", issue.number) if args.include_pull_comments and issue.comments: for comment in issue.get_comments(): issue_data.setdefault('comment_data', []).append(comment.raw_data) From 89bb0667f650eb62fad0c4fcfe29778944005ce3 Mon Sep 17 00:00:00 2001 From: crass Date: Sat, 28 Sep 2019 01:13:16 -0500 Subject: [PATCH 13/22] Nicer logging of issues and pull requests. --- github-backup.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/github-backup.py b/github-backup.py index 9d32d57..94aa75e 100755 --- a/github-backup.py +++ b/github-backup.py @@ -374,8 +374,9 @@ class RepositoryBackup(object): @classmethod def _backup_issues(cls, issues, args, dir): for issue in issues: + project = os.path.basename(os.path.dirname(os.path.dirname(issue.url))) issue_data = issue.raw_data.copy() - LOGGER.info(" * %s", issue.number) + LOGGER.info(" * %s[%s]: %s", project, issue.number, issue.title) if args.include_issue_comments and issue.comments: for comment in issue.get_comments(): issue_data.setdefault('comment_data', []).append(comment.raw_data) @@ -383,7 +384,6 @@ class RepositoryBackup(object): for event in issue.get_events(): issue_data.setdefault('event_data', []).append(event.raw_data) - project = os.path.basename(os.path.dirname(os.path.dirname(issue.url))) issue_file = os.path.join(dir, 'issues', "{0}:{1}.json".format(project, issue.number)) if not os.access(os.path.dirname(issue_file), os.F_OK): mkdir_p(os.path.dirname(issue_file)) @@ -393,10 +393,11 @@ class RepositoryBackup(object): @classmethod def _backup_pulls(cls, issues, args, dir): for issue in issues: + project = os.path.basename(os.path.dirname(os.path.dirname(issue.url))) if isinstance(issue, github.Issue.Issue): issue = issue.as_pull_request() issue_data = issue.raw_data.copy() - LOGGER.info(" * %s", issue.number) + LOGGER.info(" * %s[%s]: %s", project, issue.number, issue.title) if args.include_pull_comments and issue.comments: for comment in issue.get_comments(): issue_data.setdefault('comment_data', []).append(comment.raw_data) @@ -404,7 +405,6 @@ class RepositoryBackup(object): for commit in issue.get_commits(): issue_data.setdefault('commit_data', []).append(commit.raw_data) - project = os.path.basename(os.path.dirname(os.path.dirname(issue.url))) issue_file = os.path.join(dir, 'pull-requests', "{0}:{1}.json".format(project, issue.number)) if not os.access(os.path.dirname(issue_file), os.F_OK): mkdir_p(os.path.dirname(issue_file)) From a553714f6f0d6ab08ec9c2317e24d24d9e3655f3 Mon Sep 17 00:00:00 2001 From: crass Date: Sat, 28 Sep 2019 01:16:35 -0500 Subject: [PATCH 14/22] In addition to assigned, get created issues and pull requests, and both open and closed ones. --- github-backup.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/github-backup.py b/github-backup.py index 94aa75e..273ac1d 100755 --- a/github-backup.py +++ b/github-backup.py @@ -12,6 +12,7 @@ import os import errno import codecs import json +import itertools import subprocess import logging from argparse import ArgumentParser @@ -245,21 +246,29 @@ def process_account(gh, account, args): LOGGER.info(" Getting following repository list") fetch_url(account.following_url, os.path.join(dir, 'following.json')) + filters = ('assigned', 'created') + if args.include_issues: LOGGER.info(" Getting issues for user %s", account.login) - if IsAuthorized: - issues = account.get_issues() - else: - issues = gh.search_issues('', author=account.login, type='issue') + issues = [] + for filter in filters: + if IsAuthorized: + _issues = account.get_issues(state='all', filter=filter) + else: + _issues = gh.search_issues('', author=account.login, type='issue') + issues = itertools.chain(issues, _issues) RepositoryBackup._backup_issues(issues, args, dir) if args.include_pulls: LOGGER.info(" Getting pull requests for user %s", account.login) - if IsAuthorized: - issues = account.get_issues() - else: - issues = gh.search_issues('', author=account.login, type='pr') + issues = [] + for filter in filters: + if IsAuthorized: + _issues = account.get_issues(state='all', filter=filter) + else: + _issues = gh.search_issues('', author=account.login, type='pr') + issues = itertools.chain(issues, _issues) RepositoryBackup._backup_pulls(issues, args, dir) @@ -330,11 +339,11 @@ class RepositoryBackup(object): if self.args.include_issues: LOGGER.info(" Getting issues for repo %s", self.repo.name) - self._backup_issues(self.repo.get_issues(), self.args, os.path.dirname(self.dir)) + self._backup_issues(self.repo.get_issues(state='all'), self.args, os.path.dirname(self.dir)) if self.args.include_pulls: LOGGER.info(" Getting pull requests for repo %s", self.repo.name) - self._backup_pulls(self.repo.get_pulls(), self.args, os.path.dirname(self.dir)) + self._backup_pulls(self.repo.get_pulls(state='all'), self.args, os.path.dirname(self.dir)) def clone_repo(self, url, dir): git_args = [url, os.path.basename(dir)] From 1d2bc42dd61f96ad3d58186234e3919c83b800e2 Mon Sep 17 00:00:00 2001 From: crass Date: Sat, 28 Sep 2019 01:58:07 -0500 Subject: [PATCH 15/22] Handle pulls from searching and get_issues. --- github-backup.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/github-backup.py b/github-backup.py index 273ac1d..bf4cc18 100755 --- a/github-backup.py +++ b/github-backup.py @@ -404,7 +404,14 @@ class RepositoryBackup(object): for issue in issues: project = os.path.basename(os.path.dirname(os.path.dirname(issue.url))) if isinstance(issue, github.Issue.Issue): - issue = issue.as_pull_request() + try: + if issue.pull_request: + issue = issue.as_pull_request() + else: + continue + except github.UnknownObjectException, e: + LOGGER.info(" * %s[%s]: No associated pull request", project, issue.number) + continue issue_data = issue.raw_data.copy() LOGGER.info(" * %s[%s]: %s", project, issue.number, issue.title) if args.include_pull_comments and issue.comments: From ea443dcf5279348473bbbeb339d43b3ec86fb944 Mon Sep 17 00:00:00 2001 From: crass Date: Sat, 28 Sep 2019 01:59:42 -0500 Subject: [PATCH 16/22] Do an issue search regardless of authentication because it picks up more issues/pulls. --- github-backup.py | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/github-backup.py b/github-backup.py index bf4cc18..0627e56 100755 --- a/github-backup.py +++ b/github-backup.py @@ -252,10 +252,7 @@ def process_account(gh, account, args): LOGGER.info(" Getting issues for user %s", account.login) issues = [] for filter in filters: - if IsAuthorized: - _issues = account.get_issues(state='all', filter=filter) - else: - _issues = gh.search_issues('', author=account.login, type='issue') + _issues = gh.search_issues('', author=account.login, type='issue') issues = itertools.chain(issues, _issues) RepositoryBackup._backup_issues(issues, args, dir) @@ -264,10 +261,7 @@ def process_account(gh, account, args): LOGGER.info(" Getting pull requests for user %s", account.login) issues = [] for filter in filters: - if IsAuthorized: - _issues = account.get_issues(state='all', filter=filter) - else: - _issues = gh.search_issues('', author=account.login, type='pr') + _issues = gh.search_issues('', author=account.login, type='pr') issues = itertools.chain(issues, _issues) RepositoryBackup._backup_pulls(issues, args, dir) @@ -404,14 +398,7 @@ class RepositoryBackup(object): for issue in issues: project = os.path.basename(os.path.dirname(os.path.dirname(issue.url))) if isinstance(issue, github.Issue.Issue): - try: - if issue.pull_request: - issue = issue.as_pull_request() - else: - continue - except github.UnknownObjectException, e: - LOGGER.info(" * %s[%s]: No associated pull request", project, issue.number) - continue + issue = issue.as_pull_request() issue_data = issue.raw_data.copy() LOGGER.info(" * %s[%s]: %s", project, issue.number, issue.title) if args.include_pull_comments and issue.comments: From 1f1bad18e740ae63512615b027ce421b56144665 Mon Sep 17 00:00:00 2001 From: crass Date: Sat, 28 Sep 2019 02:10:23 -0500 Subject: [PATCH 17/22] Allow specifying -p without an argument, which will mean to check for config to get password and failing that prompt the user. --- github-backup.py | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/github-backup.py b/github-backup.py index 0627e56..58c1727 100755 --- a/github-backup.py +++ b/github-backup.py @@ -15,6 +15,11 @@ import json import itertools import subprocess import logging +import getpass +try: #PY3 + from configparser import SafeConfigParser as ConfigParser +except ImportError: + from ConfigParser import SafeConfigParser as ConfigParser from argparse import ArgumentParser import requests @@ -23,6 +28,7 @@ import github LOGGER = logging.getLogger('github-backup') IsAuthorized = False +CONFFILE = os.path.join(os.getenv('HOME'), '.github-backup.conf') def main(): global IsAuthorized @@ -64,13 +70,29 @@ def main(): # Make the connection to Github here. config = {} - if args.password: - config = {'login_or_token': args.login_or_token} - config['password'] = args.password - else: + if args.password == False: + # no password option given, continue unauthenticated # unauthenticated users can only use http git method args.type = 'http' + elif args.password == None: + # password option given, but no password value given + config = {'login_or_token': args.login_or_token} + if os.path.isfile(CONFFILE): + cfg = ConfigParser() + cfg.read(CONFFILE) + try: + config['password'] = cfg.get('github-backup', 'APITOKEN') + except: + config['password'] = cfg.get('github-backup', 'PASSWORD') + else: + password = getpass.getpass('Enter password for {}: '.format(config['login_or_token'])) + if password: + config['password'] = password + else: + config = {'login_or_token': args.login_or_token} + config['password'] = args.password + LOGGER.debug("Github config: %r", config) gh = github.Github(**config) # Check that backup dir exists @@ -83,12 +105,15 @@ def main(): else: account = gh.get_organization(args.login_or_token) else: - if args.password: + if args.username: account = gh.get_user(args.username) + elif config.get('password', None): + account = gh.get_user() else: - account = gh.get_user(args.username or args.login_or_token) + account = gh.get_user(args.login_or_token) IsAuthorized = isinstance(account, github.AuthenticatedUser.AuthenticatedUser) + assert not (bool(config.get('password', None)) ^ IsAuthorized), account filters = {} if IsAuthorized: @@ -135,7 +160,7 @@ def init_parser(): parser.add_argument("-t", "--type", help="Select the protocol for cloning", choices=['git', 'http', 'ssh'], default='ssh') parser.add_argument("-s", "--suffix", help="Add suffix to repository directory names", default="") parser.add_argument("-u", "--username", help="Backup USER account", metavar="USER") - parser.add_argument("-p", "--password", help="Authenticate with Github API") + parser.add_argument("-p", "--password", help="Authenticate with Github API (give no argument to check ~/.github-backup.conf or prompt for a password)", nargs="?", default=False) parser.add_argument("-P", "--prefix", help="Add prefix to repository directory names", default="") parser.add_argument("-o", "--organization", help="Backup Organizational repositories", metavar="ORG") parser.add_argument("-A", "--account", help="Backup account data", action='store_true') From 306021f6b021248401e82849cdb0358bd5a26afd Mon Sep 17 00:00:00 2001 From: crass Date: Sat, 28 Sep 2019 22:34:39 -0500 Subject: [PATCH 18/22] Add option to save ssh public keys. --- github-backup.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/github-backup.py b/github-backup.py index 58c1727..19e4cb3 100755 --- a/github-backup.py +++ b/github-backup.py @@ -59,11 +59,12 @@ def main(): args.include_pulls = True args.include_pull_comments = True args.include_pull_commits = True + args.include_keys = True args.include_releases = True args.include_assets = True args.include_wiki = True if args.include_starred or args.include_watched or args.include_followers \ - or args.include_following: + or args.include_following or args.include_keys: args.account = True args.backupdir = args.backupdir.rstrip("/") @@ -115,6 +116,10 @@ def main(): IsAuthorized = isinstance(account, github.AuthenticatedUser.AuthenticatedUser) assert not (bool(config.get('password', None)) ^ IsAuthorized), account + if args.include_keys and not IsAuthorized: + LOGGER.info("Cannot backup keys with unauthenticated account, ignoring...") + args.include_keys = False + filters = {} if IsAuthorized: # Get all repos @@ -208,6 +213,10 @@ def init_parser(): action='store_true', dest='include_pull_commits', help='include pull request commits in backup') + parser.add_argument('--keys', + action='store_true', + dest='include_keys', + help='include ssh keys in backup') parser.add_argument('--wikis', action='store_true', dest='include_wiki', @@ -271,6 +280,16 @@ def process_account(gh, account, args): LOGGER.info(" Getting following repository list") fetch_url(account.following_url, os.path.join(dir, 'following.json')) + if args.include_keys: + LOGGER.info(" Getting keys") + for key in account.get_keys(): + key_dir = os.path.join(dir, 'keys') + if not os.access(key_dir, os.F_OK): + mkdir_p(key_dir) + key_file = os.path.join(key_dir, key.title+'.json') + with codecs.open(key_file, 'w', encoding='utf-8') as f: + json_dump(key.raw_data, f) + filters = ('assigned', 'created') if args.include_issues: From edbd5253922c49e27130586f5a6f00d78b1c0da4 Mon Sep 17 00:00:00 2001 From: crass Date: Wed, 23 Oct 2019 01:29:33 -0500 Subject: [PATCH 19/22] Rename IsAuthorized global to IS_AUTHORIZED to maintain consistency in global variable naming. --- github-backup.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/github-backup.py b/github-backup.py index 19e4cb3..2e9e03f 100755 --- a/github-backup.py +++ b/github-backup.py @@ -27,11 +27,11 @@ import github LOGGER = logging.getLogger('github-backup') -IsAuthorized = False +IS_AUTHORIZED = False CONFFILE = os.path.join(os.getenv('HOME'), '.github-backup.conf') def main(): - global IsAuthorized + global IS_AUTHORIZED logging.basicConfig(level=logging.INFO) @@ -113,15 +113,15 @@ def main(): else: account = gh.get_user(args.login_or_token) - IsAuthorized = isinstance(account, github.AuthenticatedUser.AuthenticatedUser) - assert not (bool(config.get('password', None)) ^ IsAuthorized), account + IS_AUTHORIZED = isinstance(account, github.AuthenticatedUser.AuthenticatedUser) + assert not (bool(config.get('password', None)) ^ IS_AUTHORIZED), account - if args.include_keys and not IsAuthorized: + if args.include_keys and not IS_AUTHORIZED: LOGGER.info("Cannot backup keys with unauthenticated account, ignoring...") args.include_keys = False filters = {} - if IsAuthorized: + if IS_AUTHORIZED: # Get all repos filters = { 'affiliation': ','.join(args.affiliation), @@ -259,7 +259,7 @@ def process_account(gh, account, args): with codecs.open(account_file, 'w', encoding='utf-8') as f: json_dump(account.raw_data, f) - if IsAuthorized: + if IS_AUTHORIZED: emails_file = os.path.join(dir, 'emails.json') with codecs.open(emails_file, 'w', encoding='utf-8') as f: json_dump(list(account.get_emails()), f) @@ -326,7 +326,7 @@ class RepositoryBackup(object): if self.is_gist: url = repo.git_pull_url - elif args.type == 'http' or not IsAuthorized: + elif args.type == 'http' or not IS_AUTHORIZED: url = repo.clone_url elif args.type == 'ssh': url = repo.ssh_url From 9e5493cd1337d4634c11f0e2732ad084009ad031 Mon Sep 17 00:00:00 2001 From: crass Date: Wed, 23 Oct 2019 01:29:53 -0500 Subject: [PATCH 20/22] Add requests to requirements.txt --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 65bf71b..28112b6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ -PyGitHub +requests +PyGitHub \ No newline at end of file From 2ec391ae7e9c5facc480a49ae85f773f3e112190 Mon Sep 17 00:00:00 2001 From: crass Date: Wed, 23 Oct 2019 02:22:53 -0500 Subject: [PATCH 21/22] Make sure to decode request content as encoding in response of utf-8 if no encoding in the response. Add debug logging of request in fetch_url. --- github-backup.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/github-backup.py b/github-backup.py index 2e9e03f..7986f59 100755 --- a/github-backup.py +++ b/github-backup.py @@ -246,7 +246,9 @@ def fetch_url(url, outfile): "User-Agent": "PyGithub/Python" } with open(outfile, 'w') as f: - f.write(requests.get(url, headers=headers).content) + resp = requests.get(url, headers=headers) + LOGGER.debug("GET %s %r ==> %d %r", url, headers, resp.status_code, resp.headers) + f.write(resp.content.decode(resp.encoding or 'utf-8')) def process_account(gh, account, args): LOGGER.info("Processing account: %s", account.login) From 41892d62be6777c2dd68df74f9c57014fa430dd7 Mon Sep 17 00:00:00 2001 From: crass Date: Wed, 23 Oct 2019 02:23:42 -0500 Subject: [PATCH 22/22] Add myself as an author. --- github-backup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/github-backup.py b/github-backup.py index 7986f59..8bedbb7 100755 --- a/github-backup.py +++ b/github-backup.py @@ -3,6 +3,7 @@ """ Authors: Anthony Gargiulo (anthony@agargiulo.com) Steffen Vogel (post@steffenvogel.de) + Glenn Washburn (development@efficientek.com) Created: Fri Jun 15 2012 """