2012-06-23 14:59:50 -04:00
#!/usr/bin/env python
2012-06-16 02:24:45 -04:00
2013-08-02 01:21:55 +02:00
"""
Authors : Anthony Gargiulo ( anthony @agargiulo.com )
Steffen Vogel ( post @steffenvogel.de )
2019-10-23 02:23:42 -05:00
Glenn Washburn ( development @efficientek.com )
2013-08-02 01:21:55 +02:00
Created : Fri Jun 15 2012
"""
2012-06-16 02:24:45 -04:00
2019-05-05 01:56:22 +02:00
2019-09-18 00:55:10 -05:00
import os
import errno
import codecs
import json
2019-09-28 01:16:35 -05:00
import itertools
2013-11-14 18:56:12 +01:00
import subprocess
2019-05-05 03:22:37 +02:00
import logging
2019-09-28 02:10:23 -05:00
import getpass
2021-04-18 13:16:14 +01:00
from datetime import datetime , timezone
import time
2019-09-28 02:10:23 -05:00
try : #PY3
from configparser import SafeConfigParser as ConfigParser
except ImportError :
from ConfigParser import SafeConfigParser as ConfigParser
2019-09-18 00:55:10 -05:00
from argparse import ArgumentParser
import requests
2019-09-16 20:39:05 -05:00
import github
2021-04-18 13:16:14 +01:00
from github import RateLimitExceededException
2012-06-16 02:24:45 -04:00
2019-05-05 03:22:37 +02:00
LOGGER = logging . getLogger ( ' github-backup ' )
2019-05-05 01:56:22 +02:00
2019-10-23 01:29:33 -05:00
IS_AUTHORIZED = False
2019-09-28 02:10:23 -05:00
CONFFILE = os . path . join ( os . getenv ( ' HOME ' ) , ' .github-backup.conf ' )
2019-09-18 00:45:39 -05:00
2012-06-18 18:18:54 -04:00
def main ( ) :
2019-10-23 01:29:33 -05:00
global IS_AUTHORIZED
2019-05-05 03:22:37 +02:00
logging . basicConfig ( level = logging . INFO )
2019-05-05 01:48:21 +02:00
parser = init_parser ( )
args = parser . parse_args ( )
2012-06-16 02:24:45 -04:00
2019-05-05 03:22:37 +02:00
if args . quiet :
LOGGER . setLevel ( logging . WARN )
elif args . debug :
LOGGER . setLevel ( logging . DEBUG )
2019-09-16 20:39:05 -05:00
github . enable_console_debug_logging ( )
2013-11-14 18:56:12 +01:00
2019-05-05 01:48:21 +02:00
# Process args
2019-05-05 03:22:37 +02:00
if args . quiet :
2019-05-05 01:48:21 +02:00
args . git . append ( " --quiet " )
2019-09-18 00:55:10 -05:00
if args . include_everything :
args . account = True
args . include_starred = True
args . include_watched = True
args . include_followers = True
args . include_following = True
2019-09-20 00:43:44 -05:00
args . include_issues = True
args . include_issue_comments = True
args . include_issue_events = True
args . include_pulls = True
args . include_pull_comments = True
args . include_pull_commits = True
2019-09-28 22:34:39 -05:00
args . include_keys = True
2019-09-20 00:31:49 -05:00
args . include_releases = True
args . include_assets = True
2019-09-18 01:32:10 -05:00
args . include_wiki = True
2019-09-18 00:55:10 -05:00
if args . include_starred or args . include_watched or args . include_followers \
2019-09-28 22:34:39 -05:00
or args . include_following or args . include_keys :
2019-09-18 00:55:10 -05:00
args . account = True
2013-08-02 01:21:55 +02:00
2019-05-05 01:48:21 +02:00
args . backupdir = args . backupdir . rstrip ( " / " )
2013-10-25 10:58:22 +02:00
2019-05-05 03:22:37 +02:00
# Make the connection to Github here.
2019-09-16 21:15:31 -05:00
config = { }
2019-09-28 02:10:23 -05:00
if args . password == False :
# no password option given, continue unauthenticated
2019-09-16 21:15:31 -05:00
# unauthenticated users can only use http git method
args . type = ' http '
2019-09-28 02:10:23 -05:00
elif args . password == None :
# password option given, but no password value given
config = { ' login_or_token ' : args . login_or_token }
if os . path . isfile ( CONFFILE ) :
cfg = ConfigParser ( )
cfg . read ( CONFFILE )
try :
config [ ' password ' ] = cfg . get ( ' github-backup ' , ' APITOKEN ' )
except :
config [ ' password ' ] = cfg . get ( ' github-backup ' , ' PASSWORD ' )
else :
password = getpass . getpass ( ' Enter password for {} : ' . format ( config [ ' login_or_token ' ] ) )
if password :
config [ ' password ' ] = password
else :
config = { ' login_or_token ' : args . login_or_token }
config [ ' password ' ] = args . password
2013-10-15 01:07:34 +11:00
2019-09-28 02:10:23 -05:00
LOGGER . debug ( " Github config: %r " , config )
2021-04-18 13:16:14 +01:00
global gh
2019-09-16 20:39:05 -05:00
gh = github . Github ( * * config )
2012-06-18 01:15:54 -04:00
2019-05-05 03:22:37 +02:00
# Check that backup dir exists
if not os . path . exists ( args . backupdir ) :
2019-09-18 00:55:10 -05:00
mkdir_p ( args . backupdir )
2019-05-05 03:22:37 +02:00
if args . organization :
2019-09-16 21:15:31 -05:00
if args . password :
2021-04-19 08:43:30 +02:00
account = gh . get_organization ( args . organization )
2019-09-16 21:15:31 -05:00
else :
account = gh . get_organization ( args . login_or_token )
2019-05-05 03:22:37 +02:00
else :
2019-09-28 02:10:23 -05:00
if args . username :
2019-09-18 00:45:39 -05:00
account = gh . get_user ( args . username )
2019-09-28 02:10:23 -05:00
elif config . get ( ' password ' , None ) :
account = gh . get_user ( )
2019-09-16 21:15:31 -05:00
else :
2019-09-28 02:10:23 -05:00
account = gh . get_user ( args . login_or_token )
2019-09-18 00:45:39 -05:00
2019-10-23 01:29:33 -05:00
IS_AUTHORIZED = isinstance ( account , github . AuthenticatedUser . AuthenticatedUser )
assert not ( bool ( config . get ( ' password ' , None ) ) ^ IS_AUTHORIZED ) , account
2019-05-05 03:22:37 +02:00
2019-10-23 01:29:33 -05:00
if args . include_keys and not IS_AUTHORIZED :
2019-09-28 22:34:39 -05:00
LOGGER . info ( " Cannot backup keys with unauthenticated account, ignoring... " )
args . include_keys = False
2019-09-18 00:45:39 -05:00
filters = { }
2019-10-23 01:29:33 -05:00
if IS_AUTHORIZED :
2019-09-18 00:45:39 -05:00
# Get all repos
filters = {
' affiliation ' : ' , ' . join ( args . affiliation ) ,
' visibility ' : args . visibility
}
2019-09-18 00:55:10 -05:00
if args . account :
process_account ( gh , account , args )
2019-09-19 11:29:59 -05:00
if args . include_gists :
2021-04-18 13:16:14 +01:00
for gist in get_account_gists ( account ) :
2019-09-19 11:29:59 -05:00
RepositoryBackup ( gist , args ) . backup ( )
if args . include_starred_gists and hasattr ( account , ' get_starred_gists ' ) :
2021-04-18 13:16:14 +01:00
for gist in get_account_starred_gists ( account ) :
2019-09-19 11:29:59 -05:00
RepositoryBackup ( gist , args ) . backup ( )
2019-09-18 01:23:14 -05:00
if not args . skip_repos :
2021-04-18 13:16:14 +01:00
repos = get_account_repos ( account , * * filters )
2019-09-18 01:23:14 -05:00
for repo in repos :
if args . skip_forks and repo . fork :
continue
2019-02-02 21:27:20 +01:00
2019-09-19 11:02:08 -05:00
RepositoryBackup ( repo , args ) . backup ( )
2019-05-05 03:22:37 +02:00
2021-04-18 13:16:14 +01:00
def rate_limited_retry ( ) :
def decorator ( func ) :
def ret ( * args , * * kwargs ) :
for _ in range ( 3 ) :
try :
return func ( * args , * * kwargs )
except RateLimitExceededException :
limits = gh . get_rate_limit ( )
print ( f " Rate limit exceeded " )
print ( " Search: " , limits . search , " Core: " , limits . core , " GraphQl: " , limits . graphql )
if limits . search . remaining == 0 :
limited = limits . search
elif limits . graphql . remaining == 0 :
limited = limits . graphql
else :
limited = limits . core
reset = limited . reset . replace ( tzinfo = timezone . utc )
now = datetime . now ( timezone . utc )
seconds = ( reset - now ) . total_seconds ( ) + 30
print ( f " Reset is in { seconds } seconds. " )
if seconds > 0.0 :
print ( f " Waiting for { seconds } seconds... " )
time . sleep ( seconds )
print ( " Done waiting - resume! " )
raise Exception ( " Failed too many times " )
return ret
return decorator
@rate_limited_retry ( )
def get_search_issues ( gh , author , type ) :
2021-04-18 16:26:01 +01:00
_issues = list ( gh . search_issues ( ' ' , author = author , type = type ) )
2021-04-18 13:16:14 +01:00
return _issues
@rate_limited_retry ( )
def get_issue_comments ( issue ) :
2021-04-18 16:26:01 +01:00
return list ( issue . get_comments ( ) )
2021-04-18 13:16:14 +01:00
@rate_limited_retry ( )
def get_issue_events ( issue ) :
2021-04-18 16:26:01 +01:00
return list ( issue . get_events ( ) )
2021-04-18 13:16:14 +01:00
@rate_limited_retry ( )
def get_issue_as_pull_request ( issue ) :
return issue . as_pull_request ( )
@rate_limited_retry ( )
def get_issue_commits ( issue ) :
2021-04-18 16:26:01 +01:00
return list ( issue . get_commits ( ) )
2021-04-18 13:16:14 +01:00
@rate_limited_retry ( )
def get_repo_releases ( repo ) :
2021-04-18 16:26:01 +01:00
return list ( repo . get_releases ( ) )
2021-04-18 13:16:14 +01:00
@rate_limited_retry ( )
def get_release_assets ( release ) :
2021-04-18 16:26:01 +01:00
return list ( release . get_assets ( ) )
2021-04-18 13:16:14 +01:00
@rate_limited_retry ( )
def get_repo_issues ( repo , state ) :
2021-04-18 16:26:01 +01:00
return list ( repo . get_issues ( state = state ) )
2021-04-18 13:16:14 +01:00
@rate_limited_retry ( )
def get_repo_pulls ( repo , state ) :
2021-04-18 16:26:01 +01:00
return list ( repo . get_pulls ( state = state ) )
2021-04-18 13:16:14 +01:00
@rate_limited_retry ( )
def get_account_login ( account ) :
return account . login
@rate_limited_retry ( )
def get_comment_raw_data ( comment ) :
return comment . raw_data
@rate_limited_retry ( )
def get_release_raw_data ( release ) :
return release . raw_data
@rate_limited_retry ( )
def get_commit_raw_data ( commit ) :
return commit . raw_data
@rate_limited_retry ( )
def get_repo_raw_data ( repo ) :
return repo . raw_data
@rate_limited_retry ( )
def get_event_raw_data ( event ) :
return event . raw_data
@rate_limited_retry ( )
def get_account_raw_data ( account ) :
return account . raw_data
@rate_limited_retry ( )
def get_key_raw_data ( key ) :
return key . raw_data
@rate_limited_retry ( )
def get_issue_raw_data ( issue ) :
return issue . raw_data
@rate_limited_retry ( )
def get_account_emails ( account ) :
return account . get_emails ( )
@rate_limited_retry ( )
def get_account_starred_urls ( account ) :
return account . starred_url
@rate_limited_retry ( )
def get_account_subscriptions_url ( account ) :
return account . subscriptions_url
@rate_limited_retry ( )
def get_account_followers_url ( account ) :
return account . followers_url
@rate_limited_retry ( )
def get_account_following_url ( account ) :
return account . following_url
@rate_limited_retry ( )
def get_account_keys ( account ) :
2021-04-18 16:26:01 +01:00
return list ( account . get_keys ( ) )
2021-04-18 13:16:14 +01:00
@rate_limited_retry ( )
def get_account_gists ( account ) :
2021-04-18 16:26:01 +01:00
return list ( account . get_gists ( ) )
2021-04-18 13:16:14 +01:00
@rate_limited_retry ( )
def get_account_starred_gists ( account ) :
2021-04-18 16:26:01 +01:00
return list ( account . get_starred_gists ( ) )
2021-04-18 13:16:14 +01:00
@rate_limited_retry ( )
def get_account_repos ( account , * * filters ) :
2021-04-18 16:26:01 +01:00
return list ( account . get_repos ( * * filters ) )
2021-04-18 13:16:14 +01:00
2012-06-23 15:48:44 -04:00
def init_parser ( ) :
2019-05-05 03:22:37 +02:00
""" Set up the argument parser. """
2019-05-05 01:48:21 +02:00
2019-09-18 00:55:10 -05:00
parser = ArgumentParser ( description = " makes a backup of a github user ' s account " )
2019-05-05 01:48:21 +02:00
2019-09-18 00:45:39 -05:00
parser . add_argument ( " login_or_token " , help = " A Github username or token for authenticating " )
2019-05-05 01:48:21 +02:00
parser . add_argument ( " backupdir " , help = " The folder where you want your backups to go " )
2019-05-05 03:22:37 +02:00
parser . add_argument ( " -v " , " --visibility " , help = " Filter repos by their visibility " , choices = [ ' all ' , ' public ' , ' private ' ] , default = ' all ' )
parser . add_argument ( " -a " , " --affiliation " , help = " Filter repos by their affiliation " , action = ' append ' , type = str , default = [ ' owner ' ] , choices = [ ' owner ' , ' collaborator ' , ' organization_member ' ] )
parser . add_argument ( " -d " , " --debug " , help = " Show debug info " , action = " store_true " )
parser . add_argument ( " -q " , " --quiet " , help = " Only show errors " , action = " store_true " )
2019-05-05 01:48:21 +02:00
parser . add_argument ( " -m " , " --mirror " , help = " Create a bare mirror " , action = " store_true " )
parser . add_argument ( " -f " , " --skip-forks " , help = " Skip forks " , action = " store_true " )
2019-09-18 01:23:14 -05:00
parser . add_argument ( " --skip-repos " , help = " Skip backing up repositories " , action = " store_true " )
2019-05-05 03:22:37 +02:00
parser . add_argument ( " -g " , " --git " , nargs = " + " , help = " Pass extra arguments to git " , type = list , default = [ ] , metavar = " ARGS " )
parser . add_argument ( " -t " , " --type " , help = " Select the protocol for cloning " , choices = [ ' git ' , ' http ' , ' ssh ' ] , default = ' ssh ' )
2019-05-05 01:48:21 +02:00
parser . add_argument ( " -s " , " --suffix " , help = " Add suffix to repository directory names " , default = " " )
2019-09-18 00:45:39 -05:00
parser . add_argument ( " -u " , " --username " , help = " Backup USER account " , metavar = " USER " )
2019-09-28 02:10:23 -05:00
parser . add_argument ( " -p " , " --password " , help = " Authenticate with Github API (give no argument to check ~/.github-backup.conf or prompt for a password) " , nargs = " ? " , default = False )
2019-05-05 01:48:21 +02:00
parser . add_argument ( " -P " , " --prefix " , help = " Add prefix to repository directory names " , default = " " )
parser . add_argument ( " -o " , " --organization " , help = " Backup Organizational repositories " , metavar = " ORG " )
2019-09-18 00:55:10 -05:00
parser . add_argument ( " -A " , " --account " , help = " Backup account data " , action = ' store_true ' )
parser . add_argument ( ' --all ' ,
action = ' store_true ' ,
dest = ' include_everything ' ,
help = ' include everything in backup (not including [*]) ' )
parser . add_argument ( ' --starred ' ,
action = ' store_true ' ,
dest = ' include_starred ' ,
help = ' include JSON output of starred repositories in backup ' )
parser . add_argument ( ' --watched ' ,
action = ' store_true ' ,
dest = ' include_watched ' ,
help = ' include JSON output of watched repositories in backup ' )
parser . add_argument ( ' --followers ' ,
action = ' store_true ' ,
dest = ' include_followers ' ,
help = ' include JSON output of followers in backup ' )
parser . add_argument ( ' --following ' ,
action = ' store_true ' ,
dest = ' include_following ' ,
help = ' include JSON output of following users in backup ' )
2019-09-20 00:43:44 -05:00
parser . add_argument ( ' --issues ' ,
action = ' store_true ' ,
dest = ' include_issues ' ,
help = ' include issues in backup ' )
parser . add_argument ( ' --issue-comments ' ,
action = ' store_true ' ,
dest = ' include_issue_comments ' ,
help = ' include issue comments in backup ' )
parser . add_argument ( ' --issue-events ' ,
action = ' store_true ' ,
dest = ' include_issue_events ' ,
help = ' include issue events in backup ' )
parser . add_argument ( ' --pulls ' ,
action = ' store_true ' ,
dest = ' include_pulls ' ,
help = ' include pull requests in backup ' )
parser . add_argument ( ' --pull-comments ' ,
action = ' store_true ' ,
dest = ' include_pull_comments ' ,
help = ' include pull request review comments in backup ' )
parser . add_argument ( ' --pull-commits ' ,
action = ' store_true ' ,
dest = ' include_pull_commits ' ,
help = ' include pull request commits in backup ' )
2019-09-28 22:34:39 -05:00
parser . add_argument ( ' --keys ' ,
action = ' store_true ' ,
dest = ' include_keys ' ,
help = ' include ssh keys in backup ' )
2019-09-18 01:32:10 -05:00
parser . add_argument ( ' --wikis ' ,
action = ' store_true ' ,
dest = ' include_wiki ' ,
help = ' include wiki clone in backup ' )
2019-09-19 11:29:59 -05:00
parser . add_argument ( ' --gists ' ,
action = ' store_true ' ,
dest = ' include_gists ' ,
help = ' include gists in backup [*] ' )
parser . add_argument ( ' --starred-gists ' ,
action = ' store_true ' ,
dest = ' include_starred_gists ' ,
help = ' include starred gists in backup [*] ' )
2019-09-20 00:31:49 -05:00
parser . add_argument ( ' --releases ' ,
action = ' store_true ' ,
dest = ' include_releases ' ,
help = ' include release information, not including assets or binaries '
)
parser . add_argument ( ' --assets ' ,
action = ' store_true ' ,
dest = ' include_assets ' ,
help = ' include assets alongside release information; only applies if including releases ' )
2019-05-05 01:48:21 +02:00
return parser
2012-06-23 15:48:44 -04:00
2019-09-18 00:55:10 -05:00
def fetch_url ( url , outfile ) :
headers = {
" User-Agent " : " PyGithub/Python "
}
with open ( outfile , ' w ' ) as f :
2019-10-23 02:22:53 -05:00
resp = requests . get ( url , headers = headers )
LOGGER . debug ( " GET %s %r ==> %d %r " , url , headers , resp . status_code , resp . headers )
f . write ( resp . content . decode ( resp . encoding or ' utf-8 ' ) )
2019-09-18 00:55:10 -05:00
def process_account ( gh , account , args ) :
2021-04-18 13:16:14 +01:00
LOGGER . info ( " Processing account: %s " , get_account_login ( account ) )
2019-09-18 00:55:10 -05:00
dir = os . path . join ( args . backupdir , ' account ' )
if not os . access ( dir , os . F_OK ) :
mkdir_p ( dir )
account_file = os . path . join ( dir , ' account.json ' )
with codecs . open ( account_file , ' w ' , encoding = ' utf-8 ' ) as f :
2021-04-18 13:16:14 +01:00
json_dump ( get_account_raw_data ( account ) , f )
2019-09-18 00:55:10 -05:00
2019-10-23 01:29:33 -05:00
if IS_AUTHORIZED :
2019-09-18 00:55:10 -05:00
emails_file = os . path . join ( dir , ' emails.json ' )
with codecs . open ( emails_file , ' w ' , encoding = ' utf-8 ' ) as f :
2021-04-18 13:16:14 +01:00
json_dump ( list ( get_account_emails ( account ) ) , f )
2019-09-18 00:55:10 -05:00
if args . include_starred :
2019-09-28 00:34:27 -05:00
LOGGER . info ( " Getting starred repository list " )
2021-04-18 13:16:14 +01:00
fetch_url ( get_account_starred_urls ( account ) , os . path . join ( dir , ' starred.json ' ) )
2019-09-18 00:55:10 -05:00
if args . include_watched :
2019-09-28 00:34:27 -05:00
LOGGER . info ( " Getting watched repository list " )
2021-04-18 13:16:14 +01:00
fetch_url ( get_account_subscriptions_url ( account ) , os . path . join ( dir , ' watched.json ' ) )
2019-09-18 00:55:10 -05:00
if args . include_followers :
2019-09-28 00:34:27 -05:00
LOGGER . info ( " Getting followers repository list " )
2021-04-18 13:16:14 +01:00
fetch_url ( get_account_followers_url ( account ) , os . path . join ( dir , ' followers.json ' ) )
2019-09-18 00:55:10 -05:00
if args . include_following :
2019-09-28 00:34:27 -05:00
LOGGER . info ( " Getting following repository list " )
2021-04-18 13:16:14 +01:00
fetch_url ( get_account_following_url ( account ) , os . path . join ( dir , ' following.json ' ) )
2019-09-18 00:55:10 -05:00
2019-09-28 22:34:39 -05:00
if args . include_keys :
LOGGER . info ( " Getting keys " )
2021-04-18 13:16:14 +01:00
for key in get_account_keys ( account ) :
2019-09-28 22:34:39 -05:00
key_dir = os . path . join ( dir , ' keys ' )
if not os . access ( key_dir , os . F_OK ) :
mkdir_p ( key_dir )
key_file = os . path . join ( key_dir , key . title + ' .json ' )
with codecs . open ( key_file , ' w ' , encoding = ' utf-8 ' ) as f :
2021-04-18 13:16:14 +01:00
json_dump ( get_key_raw_data ( key ) , f )
2019-09-28 22:34:39 -05:00
2019-09-28 01:16:35 -05:00
filters = ( ' assigned ' , ' created ' )
2019-09-20 00:43:44 -05:00
if args . include_issues :
2021-04-18 13:16:14 +01:00
LOGGER . info ( " Getting issues for user %s " , get_account_login ( account ) )
2019-09-28 01:16:35 -05:00
issues = [ ]
for filter in filters :
2021-04-18 13:16:14 +01:00
_issues = get_search_issues ( gh , get_account_login ( account ) , ' issue ' )
2019-09-28 01:16:35 -05:00
issues = itertools . chain ( issues , _issues )
2019-09-20 00:43:44 -05:00
RepositoryBackup . _backup_issues ( issues , args , dir )
if args . include_pulls :
2021-04-18 13:16:14 +01:00
LOGGER . info ( " Getting pull requests for user %s " , get_account_login ( account ) )
2019-09-28 01:16:35 -05:00
issues = [ ]
for filter in filters :
2021-04-18 13:16:14 +01:00
_issues = get_search_issues ( gh , get_account_login ( account ) , ' pr ' )
2019-09-28 01:16:35 -05:00
issues = itertools . chain ( issues , _issues )
2019-09-20 00:43:44 -05:00
RepositoryBackup . _backup_pulls ( issues , args , dir )
2019-05-05 01:56:22 +02:00
2019-09-19 11:02:08 -05:00
class RepositoryBackup ( object ) :
def __init__ ( self , repo , args ) :
self . repo = repo
self . args = args
2019-09-19 11:29:59 -05:00
self . is_gist = isinstance ( repo , github . Gist . Gist )
if self . is_gist :
dir = os . path . join ( args . backupdir , ' gists ' , repo . id )
else :
dir = os . path . join ( args . backupdir , ' repositories ' , args . prefix + repo . name + args . suffix , ' repository ' )
2019-09-19 11:02:08 -05:00
self . dir = dir
2019-09-19 11:29:59 -05:00
if self . is_gist :
url = repo . git_pull_url
2019-10-23 01:29:33 -05:00
elif args . type == ' http ' or not IS_AUTHORIZED :
2019-09-19 11:02:08 -05:00
url = repo . clone_url
elif args . type == ' ssh ' :
url = repo . ssh_url
elif args . type == ' git ' :
url = repo . git_url
self . url = url
self . wiki_url = None
if args . include_wiki and repo . has_wiki :
self . wiki_url = self . url . replace ( ' .git ' , ' .wiki.git ' )
self . wiki_dir = os . path . join ( args . backupdir , ' repositories ' , args . prefix + repo . name + args . suffix , ' wiki ' )
def backup ( self ) :
2019-09-19 11:29:59 -05:00
if self . is_gist :
LOGGER . info ( " Processing gist: %s " , self . repo . id )
else :
LOGGER . info ( " Processing repo: %s " , self . repo . full_name )
2019-09-19 11:02:08 -05:00
config = os . path . join ( self . dir , " config " if self . args . mirror else " .git/config " )
if not os . access ( os . path . dirname ( self . dir ) , os . F_OK ) :
mkdir_p ( os . path . dirname ( self . dir ) )
if not os . access ( config , os . F_OK ) :
2020-12-04 00:42:39 -05:00
LOGGER . info ( " Repo doesn ' t exist, lets clone it " )
2019-09-19 11:02:08 -05:00
self . clone_repo ( self . url , self . dir )
else :
LOGGER . info ( " Repo already exists, let ' s try to update it instead " )
self . update_repo ( self . dir )
if self . wiki_url :
config = os . path . join ( self . wiki_dir , " config " if self . args . mirror else " .git/config " )
if not os . access ( os . path . dirname ( self . wiki_dir ) , os . F_OK ) :
mkdir_p ( os . path . dirname ( self . wiki_dir ) )
if not os . access ( config , os . F_OK ) :
2020-12-04 00:42:39 -05:00
LOGGER . info ( " Wiki repo doesn ' t exist, lets clone it " )
2019-09-19 11:02:08 -05:00
self . clone_repo ( self . wiki_url , self . wiki_dir )
else :
LOGGER . info ( " Wiki repo already exists, let ' s try to update it instead " )
self . update_repo ( self . wiki_dir )
2019-09-19 11:29:59 -05:00
if self . is_gist :
# Save extra gist info
gist_file = os . path . join ( os . path . dirname ( self . dir ) , self . repo . id + ' .json ' )
with codecs . open ( gist_file , ' w ' , encoding = ' utf-8 ' ) as f :
2021-04-18 13:16:14 +01:00
json_dump ( get_repo_raw_data ( self . repo ) , f )
2019-09-20 00:31:49 -05:00
else :
if self . args . include_releases :
self . _backup_releases ( )
2019-09-19 11:29:59 -05:00
2019-09-20 00:43:44 -05:00
if self . args . include_issues :
2019-09-28 00:34:27 -05:00
LOGGER . info ( " Getting issues for repo %s " , self . repo . name )
2021-04-18 13:16:14 +01:00
#self._backup_issues(self.repo.get_issues(state='all'), self.args, os.path.dirname(self.dir))
self . _backup_issues ( get_repo_issues ( self . repo , ' all ' ) , self . args , os . path . dirname ( self . dir ) )
2019-09-20 00:43:44 -05:00
if self . args . include_pulls :
2019-09-28 00:34:27 -05:00
LOGGER . info ( " Getting pull requests for repo %s " , self . repo . name )
2021-04-18 13:16:14 +01:00
#self._backup_pulls(self.repo.get_pulls(state='all'), self.args, os.path.dirname(self.dir))
self . _backup_pulls ( get_repo_pulls ( self . repo , ' all ' ) , self . args , os . path . dirname ( self . dir ) )
2019-09-20 00:43:44 -05:00
2019-09-19 11:02:08 -05:00
def clone_repo ( self , url , dir ) :
git_args = [ url , os . path . basename ( dir ) ]
if self . args . mirror :
git_args . insert ( 0 , ' --mirror ' )
git ( " clone " , git_args , self . args . git , os . path . dirname ( dir ) )
def update_repo ( self , dir ) :
# GitHub => Local
# TODO: use subprocess package and fork git into
# background (major performance boost expected)
args , repo = self . args , self . repo
if args . mirror :
git ( " fetch " , [ " --prune " ] , args . git , dir )
else :
git ( " pull " , gargs = args . git , gdir = dir )
2019-05-05 01:48:21 +02:00
2019-09-19 11:02:08 -05:00
# Fetch description and owner (useful for gitweb, cgit etc.)
if repo . description :
git ( " config " , [ " --local " , " gitweb.description " ,
repo . description . encode ( " utf-8 " ) ] , gdir = dir )
2019-05-05 01:56:22 +02:00
2019-09-19 11:02:08 -05:00
if repo . owner . name and repo . owner . email :
owner = " %s < %s > " % ( repo . owner . name . encode ( " utf-8 " ) ,
repo . owner . email . encode ( " utf-8 " ) )
git ( " config " , [ " --local " , " gitweb.owner " , owner ] , gdir = dir )
2019-05-05 01:56:22 +02:00
2019-09-19 11:29:59 -05:00
if self . is_gist :
git ( " config " , [ " --local " , " cgit.name " , str ( repo . id ) ] , gdir = dir )
git ( " config " , [ " --local " , " cgit.clone-url " , str ( repo . git_pull_url ) ] , gdir = dir )
else :
git ( " config " , [ " --local " , " cgit.name " , str ( repo . name ) ] , gdir = dir )
git ( " config " , [ " --local " , " cgit.defbranch " , str ( repo . default_branch ) ] , gdir = dir )
git ( " config " , [ " --local " , " cgit.clone-url " , str ( repo . clone_url ) ] , gdir = dir )
2013-11-14 18:56:12 +01:00
2019-09-20 00:43:44 -05:00
@classmethod
def _backup_issues ( cls , issues , args , dir ) :
for issue in issues :
2019-09-28 01:13:16 -05:00
project = os . path . basename ( os . path . dirname ( os . path . dirname ( issue . url ) ) )
2021-04-18 13:16:14 +01:00
issue_data = get_issue_raw_data ( issue ) . copy ( )
2019-09-28 01:13:16 -05:00
LOGGER . info ( " * %s [ %s ]: %s " , project , issue . number , issue . title )
2019-09-20 00:43:44 -05:00
if args . include_issue_comments and issue . comments :
2021-04-18 13:16:14 +01:00
#for comment in issue.get_comments():
for comment in get_issue_comments ( issue ) :
issue_data . setdefault ( ' comment_data ' , [ ] ) . append ( get_comment_raw_data ( comment ) )
2019-09-20 00:43:44 -05:00
if args . include_issue_events :
2021-04-18 13:16:14 +01:00
for event in get_issue_events ( issue ) :
issue_data . setdefault ( ' event_data ' , [ ] ) . append ( get_event_raw_data ( event ) )
2019-09-20 00:43:44 -05:00
issue_file = os . path . join ( dir , ' issues ' , " {0} : {1} .json " . format ( project , issue . number ) )
if not os . access ( os . path . dirname ( issue_file ) , os . F_OK ) :
mkdir_p ( os . path . dirname ( issue_file ) )
with codecs . open ( issue_file , ' w ' , encoding = ' utf-8 ' ) as f :
json_dump ( issue_data , f )
@classmethod
def _backup_pulls ( cls , issues , args , dir ) :
for issue in issues :
2019-09-28 01:13:16 -05:00
project = os . path . basename ( os . path . dirname ( os . path . dirname ( issue . url ) ) )
2019-09-20 00:43:44 -05:00
if isinstance ( issue , github . Issue . Issue ) :
2021-04-18 13:16:14 +01:00
issue = get_issue_as_pull_request ( issue )
issue_data = get_issue_raw_data ( issue ) . copy ( )
2019-09-28 01:13:16 -05:00
LOGGER . info ( " * %s [ %s ]: %s " , project , issue . number , issue . title )
2019-09-20 00:43:44 -05:00
if args . include_pull_comments and issue . comments :
2021-04-18 13:16:14 +01:00
for comment in get_issue_comments ( issue ) :
issue_data . setdefault ( ' comment_data ' , [ ] ) . append ( get_comment_raw_data ( comment ) )
2019-09-20 00:43:44 -05:00
if args . include_pull_commits and issue . commits :
2021-04-18 13:16:14 +01:00
for commit in get_issue_commits ( issue ) :
issue_data . setdefault ( ' commit_data ' , [ ] ) . append ( get_commit_raw_data ( commit ) )
2019-09-20 00:43:44 -05:00
issue_file = os . path . join ( dir , ' pull-requests ' , " {0} : {1} .json " . format ( project , issue . number ) )
if not os . access ( os . path . dirname ( issue_file ) , os . F_OK ) :
mkdir_p ( os . path . dirname ( issue_file ) )
with codecs . open ( issue_file , ' w ' , encoding = ' utf-8 ' ) as f :
json_dump ( issue_data , f )
2019-09-20 00:31:49 -05:00
def _backup_releases ( self ) :
2021-04-18 13:16:14 +01:00
for release in get_repo_releases ( self . repo ) :
2019-09-20 00:31:49 -05:00
rel_dir = os . path . join ( os . path . dirname ( self . dir ) , ' releases ' )
rel_file = os . path . join ( rel_dir , release . tag_name + ' .json ' )
if not os . access ( rel_dir , os . F_OK ) :
mkdir_p ( rel_dir )
with codecs . open ( rel_file , ' w ' , encoding = ' utf-8 ' ) as f :
2021-04-18 13:16:14 +01:00
json_dump ( get_release_raw_data ( release ) , f )
2019-09-20 00:31:49 -05:00
if self . args . include_assets :
2021-04-18 13:16:14 +01:00
for asset in get_release_assets ( release ) :
2019-09-20 00:31:49 -05:00
asset_dir = os . path . join ( rel_dir , release . tag_name )
asset_file = os . path . join ( asset_dir , asset . name )
if not os . access ( asset_dir , os . F_OK ) :
mkdir_p ( asset_dir )
fetch_url ( asset . browser_download_url , asset_file )
assert asset . size == os . path . getsize ( asset_file )
2019-05-05 01:56:22 +02:00
2013-11-14 18:56:12 +01:00
def git ( gcmd , args = [ ] , gargs = [ ] , gdir = " " ) :
2019-05-05 01:48:21 +02:00
cmd = [ " git " ]
if gdir :
2019-05-05 03:22:37 +02:00
cmd . extend ( [ " -C " , gdir ] )
2019-05-05 01:48:21 +02:00
cmd . append ( gcmd )
cmd . extend ( gargs )
cmd . extend ( args )
2013-11-14 18:56:12 +01:00
2019-05-05 03:22:37 +02:00
print ( cmd )
2019-05-05 01:48:21 +02:00
subprocess . call ( cmd )
2013-08-27 15:17:50 +02:00
2019-09-18 00:55:10 -05:00
def json_dump ( data , output_file ) :
json . dump ( data ,
output_file ,
ensure_ascii = False ,
sort_keys = True ,
indent = 4 ,
separators = ( ' , ' , ' : ' ) )
def mkdir_p ( path ) :
head , tail = os . path . split ( path )
if head and not os . access ( head , os . F_OK ) :
mkdir_p ( head )
try :
os . makedirs ( path )
except OSError as exc : # Python >2.5
if exc . errno == errno . EEXIST and os . path . isdir ( path ) :
pass
else :
raise
2012-06-18 18:18:54 -04:00
if __name__ == " __main__ " :
2019-05-05 01:48:21 +02:00
main ( )