#!/usr/bin/env python

"""
Authors: Anthony Gargiulo (anthony@agargiulo.com)
         Steffen Vogel (post@steffenvogel.de)
         Glenn Washburn (development@efficientek.com)

Created: Fri Jun 15 2012
"""


import os
import errno
import codecs
import json
import itertools
import subprocess
import logging
import getpass
from datetime import datetime, timezone
import time
try: #PY3
    from configparser import SafeConfigParser as ConfigParser
except ImportError:
    from ConfigParser import SafeConfigParser as ConfigParser
from argparse import ArgumentParser

import requests
import github
from github import RateLimitExceededException

LOGGER = logging.getLogger('github-backup')

IS_AUTHORIZED = False
CONFFILE = os.path.join(os.getenv('HOME'), '.github-backup.conf')

def main():
    global IS_AUTHORIZED
    logging.basicConfig(level=logging.INFO)


    parser = init_parser()
    args = parser.parse_args()

    if args.quiet:
        LOGGER.setLevel(logging.WARN)
    elif args.debug:
        LOGGER.setLevel(logging.DEBUG)
        github.enable_console_debug_logging()

    # Process args
    if args.quiet:
        args.git.append("--quiet")
    if args.include_everything:
        args.account = True
        args.include_starred = True
        args.include_watched = True
        args.include_followers = True
        args.include_following = True
        args.include_issues = True
        args.include_issue_comments = True
        args.include_issue_events = True
        args.include_pulls = True
        args.include_pull_comments = True
        args.include_pull_commits = True
        args.include_keys = True
        args.include_releases = True
        args.include_assets = True
        args.include_wiki = True
    if args.include_starred or args.include_watched or args.include_followers \
       or args.include_following or args.include_keys:
        args.account = True

    args.backupdir = args.backupdir.rstrip("/")

    # Make the connection to Github here.
    config = {}
    if args.password == False:
        # no password option given, continue unauthenticated
        # unauthenticated users can only use http git method
        args.type = 'http'
    elif args.password == None:
        # password option given, but no password value given
        config = {'login_or_token': args.login_or_token}
        if os.path.isfile(CONFFILE):
            cfg = ConfigParser()
            cfg.read(CONFFILE)
            try:
                config['password'] = cfg.get('github-backup', 'APITOKEN')
            except:
                config['password'] = cfg.get('github-backup', 'PASSWORD')
        else:
            password = getpass.getpass('Enter password for {}: '.format(config['login_or_token']))
            if password:
                config['password'] = password
    else:
        config = {'login_or_token': args.login_or_token}
        config['password'] = args.password

    LOGGER.debug("Github config: %r", config)
    global gh
    gh = github.Github(**config)

    # Check that backup dir exists
    if not os.path.exists(args.backupdir):
        mkdir_p(args.backupdir)

    if args.organization:
        if args.password:
            account = gh.get_organization(args.org)
        else:
            account = gh.get_organization(args.login_or_token)
    else:
        if args.username:
            account = gh.get_user(args.username)
        elif config.get('password', None):
            account = gh.get_user()
        else:
            account = gh.get_user(args.login_or_token)

    IS_AUTHORIZED = isinstance(account, github.AuthenticatedUser.AuthenticatedUser)
    assert not (bool(config.get('password', None)) ^ IS_AUTHORIZED), account

    if args.include_keys and not IS_AUTHORIZED:
        LOGGER.info("Cannot backup keys with unauthenticated account, ignoring...")
        args.include_keys = False

    filters = {}
    if IS_AUTHORIZED:
        # Get all repos
        filters = {
            'affiliation': ','.join(args.affiliation),
            'visibility': args.visibility
        }

    if args.account:
        process_account(gh, account, args)

    if args.include_gists:
        for gist in get_account_gists(account):
            RepositoryBackup(gist, args).backup()

    if args.include_starred_gists and hasattr(account, 'get_starred_gists'):
        for gist in get_account_starred_gists(account):
            RepositoryBackup(gist, args).backup()

    if not args.skip_repos:
        repos = get_account_repos(account, **filters) 
        for repo in repos:
            if args.skip_forks and repo.fork:
                continue

            RepositoryBackup(repo, args).backup()

def rate_limited_retry():
    def decorator(func):
        def ret(*args, **kwargs):
            for _ in range(3):
                try:
                    return func(*args, **kwargs)
                except RateLimitExceededException:
                    limits = gh.get_rate_limit()
                    print(f"Rate limit exceeded")
                    print("Search:", limits.search, "Core:", limits.core, "GraphQl:", limits.graphql)
                    
                    if limits.search.remaining == 0:
                        limited = limits.search
                    elif limits.graphql.remaining == 0:
                        limited = limits.graphql
                    else:
                        limited = limits.core
                    reset = limited.reset.replace(tzinfo=timezone.utc)
                    now = datetime.now(timezone.utc)
                    seconds = (reset - now).total_seconds() + 30
                    print(f"Reset is in {seconds} seconds.")
                    if seconds > 0.0:
                        print(f"Waiting for {seconds} seconds...")
                        time.sleep(seconds)
                        print("Done waiting - resume!")
            raise Exception("Failed too many times")
        return ret
    return decorator

@rate_limited_retry()
def get_search_issues(gh, author, type):
    _issues = gh.search_issues('', author=author, type=type)
    return _issues

@rate_limited_retry()
def get_issue_comments(issue):
    return issue.get_comments()

@rate_limited_retry()
def get_issue_events(issue):
    return issue.get_events()

@rate_limited_retry()
def get_issue_as_pull_request(issue):
    return issue.as_pull_request()

@rate_limited_retry()
def get_issue_commits(issue):
    return issue.get_commits()

@rate_limited_retry()
def get_repo_releases(repo):
    return repo.get_releases()

@rate_limited_retry()
def get_release_assets(release):
    return release.get_assets()

@rate_limited_retry()
def get_repo_issues(repo, state):
    return repo.get_issues(state=state)

@rate_limited_retry()
def get_repo_pulls(repo, state):
    return repo.get_pulls(state=state)

@rate_limited_retry()
def get_account_login(account):
    return account.login

@rate_limited_retry()
def get_comment_raw_data(comment):
    return comment.raw_data

@rate_limited_retry()
def get_release_raw_data(release):
    return release.raw_data

@rate_limited_retry()
def get_commit_raw_data(commit):
    return commit.raw_data

@rate_limited_retry()
def get_repo_raw_data(repo):
    return repo.raw_data

@rate_limited_retry()
def get_event_raw_data(event):
    return event.raw_data

@rate_limited_retry()
def get_account_raw_data(account):
    return account.raw_data

@rate_limited_retry()
def get_key_raw_data(key):
    return key.raw_data

@rate_limited_retry()
def get_issue_raw_data(issue):
    return issue.raw_data

@rate_limited_retry()
def get_account_emails(account):
    return account.get_emails()

@rate_limited_retry()
def get_account_starred_urls(account):
    return account.starred_url

@rate_limited_retry()
def get_account_subscriptions_url(account):
    return account.subscriptions_url

@rate_limited_retry()
def get_account_followers_url(account):
    return account.followers_url

@rate_limited_retry()
def get_account_following_url(account):
    return account.following_url

@rate_limited_retry()
def get_account_keys(account):
    return account.get_keys()

@rate_limited_retry()
def get_account_gists(account):
    return account.get_gists()

@rate_limited_retry()
def get_account_starred_gists(account):
    return account.get_starred_gists()

@rate_limited_retry()
def get_account_repos(account, **filters):
    return account.get_repos(**filters)

def init_parser():
    """Set up the argument parser."""

    parser = ArgumentParser(description="makes a backup of a github user's account")

    parser.add_argument("login_or_token", help="A Github username or token for authenticating")
    parser.add_argument("backupdir", help="The folder where you want your backups to go")
    parser.add_argument("-v", "--visibility", help="Filter repos by their visibility", choices=['all', 'public', 'private'], default='all')
    parser.add_argument("-a", "--affiliation", help="Filter repos by their affiliation", action='append', type=str, default=['owner'], choices=['owner', 'collaborator', 'organization_member'])
    parser.add_argument("-d", "--debug", help="Show debug info", action="store_true")
    parser.add_argument("-q", "--quiet", help="Only show errors", action="store_true")
    parser.add_argument("-m", "--mirror", help="Create a bare mirror", action="store_true")
    parser.add_argument("-f", "--skip-forks", help="Skip forks", action="store_true")
    parser.add_argument("--skip-repos", help="Skip backing up repositories", action="store_true")
    parser.add_argument("-g", "--git", nargs="+", help="Pass extra arguments to git", type=list, default=[], metavar="ARGS")
    parser.add_argument("-t", "--type", help="Select the protocol for cloning", choices=['git', 'http', 'ssh'], default='ssh')
    parser.add_argument("-s", "--suffix", help="Add suffix to repository directory names", default="")
    parser.add_argument("-u", "--username", help="Backup USER account", metavar="USER")
    parser.add_argument("-p", "--password", help="Authenticate with Github API (give no argument to check ~/.github-backup.conf or prompt for a password)", nargs="?", default=False)
    parser.add_argument("-P", "--prefix", help="Add prefix to repository directory names", default="")
    parser.add_argument("-o", "--organization", help="Backup Organizational repositories", metavar="ORG")
    parser.add_argument("-A", "--account", help="Backup account data", action='store_true')
    parser.add_argument('--all',
                        action='store_true',
                        dest='include_everything',
                        help='include everything in backup (not including [*])')
    parser.add_argument('--starred',
                        action='store_true',
                        dest='include_starred',
                        help='include JSON output of starred repositories in backup')
    parser.add_argument('--watched',
                        action='store_true',
                        dest='include_watched',
                        help='include JSON output of watched repositories in backup')
    parser.add_argument('--followers',
                        action='store_true',
                        dest='include_followers',
                        help='include JSON output of followers in backup')
    parser.add_argument('--following',
                        action='store_true',
                        dest='include_following',
                        help='include JSON output of following users in backup')
    parser.add_argument('--issues',
                        action='store_true',
                        dest='include_issues',
                        help='include issues in backup')
    parser.add_argument('--issue-comments',
                        action='store_true',
                        dest='include_issue_comments',
                        help='include issue comments in backup')
    parser.add_argument('--issue-events',
                        action='store_true',
                        dest='include_issue_events',
                        help='include issue events in backup')
    parser.add_argument('--pulls',
                        action='store_true',
                        dest='include_pulls',
                        help='include pull requests in backup')
    parser.add_argument('--pull-comments',
                        action='store_true',
                        dest='include_pull_comments',
                        help='include pull request review comments in backup')
    parser.add_argument('--pull-commits',
                        action='store_true',
                        dest='include_pull_commits',
                        help='include pull request commits in backup')
    parser.add_argument('--keys',
                        action='store_true',
                        dest='include_keys',
                        help='include ssh keys in backup')
    parser.add_argument('--wikis',
                        action='store_true',
                        dest='include_wiki',
                        help='include wiki clone in backup')
    parser.add_argument('--gists',
                        action='store_true',
                        dest='include_gists',
                        help='include gists in backup [*]')
    parser.add_argument('--starred-gists',
                        action='store_true',
                        dest='include_starred_gists',
                        help='include starred gists in backup [*]')
    parser.add_argument('--releases',
                        action='store_true',
                        dest='include_releases',
                        help='include release information, not including assets or binaries'
                        )
    parser.add_argument('--assets',
                        action='store_true',
                        dest='include_assets',
                        help='include assets alongside release information; only applies if including releases')

    return parser

def fetch_url(url, outfile):
    headers = {
        "User-Agent": "PyGithub/Python"
    }
    with open(outfile, 'w') as f:
        resp = requests.get(url, headers=headers)
        LOGGER.debug("GET %s %r ==> %d %r", url, headers, resp.status_code, resp.headers)
        f.write(resp.content.decode(resp.encoding or 'utf-8'))

def process_account(gh, account, args):
    LOGGER.info("Processing account: %s", get_account_login(account))

    dir = os.path.join(args.backupdir, 'account')
    if not os.access(dir, os.F_OK):
        mkdir_p(dir)

    account_file = os.path.join(dir, 'account.json')
    with codecs.open(account_file, 'w', encoding='utf-8') as f:
        json_dump(get_account_raw_data(account), f)

    if IS_AUTHORIZED:
        emails_file = os.path.join(dir, 'emails.json')
        with codecs.open(emails_file, 'w', encoding='utf-8') as f:
            json_dump(list(get_account_emails(account)), f)

    if args.include_starred:
        LOGGER.info("    Getting starred repository list")
        fetch_url(get_account_starred_urls(account), os.path.join(dir, 'starred.json'))

    if args.include_watched:
        LOGGER.info("    Getting watched repository list")
        fetch_url(get_account_subscriptions_url(account), os.path.join(dir, 'watched.json'))

    if args.include_followers:
        LOGGER.info("    Getting followers repository list")
        fetch_url(get_account_followers_url(account), os.path.join(dir, 'followers.json'))

    if args.include_following:
        LOGGER.info("    Getting following repository list")
        fetch_url(get_account_following_url(account), os.path.join(dir, 'following.json'))

    if args.include_keys:
        LOGGER.info("    Getting keys")
        for key in get_account_keys(account):
            key_dir = os.path.join(dir, 'keys')
            if not os.access(key_dir, os.F_OK):
                mkdir_p(key_dir)
            key_file = os.path.join(key_dir, key.title+'.json')
            with codecs.open(key_file, 'w', encoding='utf-8') as f:
                json_dump(get_key_raw_data(key), f)

    filters = ('assigned', 'created')

    if args.include_issues:
        LOGGER.info("    Getting issues for user %s", get_account_login(account))
        issues = []
        for filter in filters:
            _issues = get_search_issues(gh, get_account_login(account), 'issue')
            issues = itertools.chain(issues, _issues)

        RepositoryBackup._backup_issues(issues, args, dir)

    if args.include_pulls:
        LOGGER.info("    Getting pull requests for user %s", get_account_login(account))
        issues = []
        for filter in filters:
            _issues = get_search_issues(gh, get_account_login(account), 'pr')
            issues = itertools.chain(issues, _issues)

        RepositoryBackup._backup_pulls(issues, args, dir)


class RepositoryBackup(object):
    def __init__(self, repo, args):
        self.repo = repo
        self.args = args

        self.is_gist = isinstance(repo, github.Gist.Gist)

        if self.is_gist:
            dir = os.path.join(args.backupdir, 'gists', repo.id)
        else:
            dir = os.path.join(args.backupdir, 'repositories', args.prefix + repo.name + args.suffix, 'repository')
        self.dir = dir

        if self.is_gist:
            url = repo.git_pull_url
        elif args.type == 'http' or not IS_AUTHORIZED:
            url = repo.clone_url
        elif args.type == 'ssh':
            url = repo.ssh_url
        elif args.type == 'git':
            url = repo.git_url
        self.url = url

        self.wiki_url = None
        if args.include_wiki and repo.has_wiki:
            self.wiki_url = self.url.replace('.git', '.wiki.git')
            self.wiki_dir = os.path.join(args.backupdir, 'repositories', args.prefix + repo.name + args.suffix, 'wiki')

    def backup(self):
        if self.is_gist:
            LOGGER.info("Processing gist: %s", self.repo.id)
        else:
            LOGGER.info("Processing repo: %s", self.repo.full_name)

        config = os.path.join(self.dir, "config" if self.args.mirror else ".git/config")
        if not os.access(os.path.dirname(self.dir), os.F_OK):
            mkdir_p(os.path.dirname(self.dir))
        if not os.access(config, os.F_OK):
            LOGGER.info("Repo doesn't exist, lets clone it")
            self.clone_repo(self.url, self.dir)
        else:
            LOGGER.info("Repo already exists, let's try to update it instead")
            self.update_repo(self.dir)

        if self.wiki_url:
            config = os.path.join(self.wiki_dir, "config" if self.args.mirror else ".git/config")
            if not os.access(os.path.dirname(self.wiki_dir), os.F_OK):
                mkdir_p(os.path.dirname(self.wiki_dir))
            if not os.access(config, os.F_OK):
                LOGGER.info("Wiki repo doesn't exist, lets clone it")
                self.clone_repo(self.wiki_url, self.wiki_dir)
            else:
                LOGGER.info("Wiki repo already exists, let's try to update it instead")
                self.update_repo(self.wiki_dir)

        if self.is_gist:
            # Save extra gist info
            gist_file = os.path.join(os.path.dirname(self.dir), self.repo.id+'.json')
            with codecs.open(gist_file, 'w', encoding='utf-8') as f:
                json_dump(get_repo_raw_data(self.repo), f)
        else:
            if self.args.include_releases:
                self._backup_releases()

            if self.args.include_issues:
                LOGGER.info("    Getting issues for repo %s", self.repo.name)
                #self._backup_issues(self.repo.get_issues(state='all'), self.args, os.path.dirname(self.dir))
                self._backup_issues(get_repo_issues(self.repo, 'all'), self.args, os.path.dirname(self.dir))

            if self.args.include_pulls:
                LOGGER.info("    Getting pull requests for repo %s", self.repo.name)
                #self._backup_pulls(self.repo.get_pulls(state='all'), self.args, os.path.dirname(self.dir))
                self._backup_pulls(get_repo_pulls(self.repo, 'all'), self.args, os.path.dirname(self.dir))

    def clone_repo(self, url, dir):
        git_args = [url, os.path.basename(dir)]
        if self.args.mirror:
            git_args.insert(0, '--mirror')

        git("clone", git_args, self.args.git, os.path.dirname(dir))

    def update_repo(self, dir):
        # GitHub => Local
        # TODO: use subprocess package and fork git into
        #       background (major performance boost expected)
        args, repo = self.args, self.repo
        if args.mirror:
            git("fetch", ["--prune"], args.git, dir)
        else:
            git("pull", gargs=args.git, gdir=dir)

        # Fetch description and owner (useful for gitweb, cgit etc.)
        if repo.description:
            git("config", ["--local", "gitweb.description",
                repo.description.encode("utf-8")], gdir=dir)

        if repo.owner.name and repo.owner.email:
            owner = "%s <%s>" % (repo.owner.name.encode("utf-8"),
                                 repo.owner.email.encode("utf-8"))
            git("config", ["--local", "gitweb.owner", owner], gdir=dir)

        if self.is_gist:
            git("config", ["--local", "cgit.name", str(repo.id)], gdir=dir)
            git("config", ["--local", "cgit.clone-url", str(repo.git_pull_url)], gdir=dir)
        else:
            git("config", ["--local", "cgit.name", str(repo.name)], gdir=dir)
            git("config", ["--local", "cgit.defbranch", str(repo.default_branch)], gdir=dir)
            git("config", ["--local", "cgit.clone-url", str(repo.clone_url)], gdir=dir)

    @classmethod
    def _backup_issues(cls, issues, args, dir):
        for issue in issues:
            project = os.path.basename(os.path.dirname(os.path.dirname(issue.url)))
            issue_data = get_issue_raw_data(issue).copy()
            LOGGER.info("     * %s[%s]: %s", project, issue.number, issue.title)
            if args.include_issue_comments and issue.comments:
                #for comment in issue.get_comments():
                for comment in get_issue_comments(issue):
                    issue_data.setdefault('comment_data', []).append(get_comment_raw_data(comment))
            if args.include_issue_events:
                for event in get_issue_events(issue):
                    issue_data.setdefault('event_data', []).append(get_event_raw_data(event))

            issue_file = os.path.join(dir, 'issues', "{0}:{1}.json".format(project, issue.number))
            if not os.access(os.path.dirname(issue_file), os.F_OK):
                mkdir_p(os.path.dirname(issue_file))
            with codecs.open(issue_file, 'w', encoding='utf-8') as f:
                json_dump(issue_data, f)

    @classmethod
    def _backup_pulls(cls, issues, args, dir):
        for issue in issues:
            project = os.path.basename(os.path.dirname(os.path.dirname(issue.url)))
            if isinstance(issue, github.Issue.Issue):
                issue = get_issue_as_pull_request(issue)
            issue_data = get_issue_raw_data(issue).copy()
            LOGGER.info("     * %s[%s]: %s", project, issue.number, issue.title)
            if args.include_pull_comments and issue.comments:
                for comment in get_issue_comments(issue):
                    issue_data.setdefault('comment_data', []).append(get_comment_raw_data(comment))
            if args.include_pull_commits and issue.commits:
                for commit in get_issue_commits(issue):
                    issue_data.setdefault('commit_data', []).append(get_commit_raw_data(commit))

            issue_file = os.path.join(dir, 'pull-requests', "{0}:{1}.json".format(project, issue.number))
            if not os.access(os.path.dirname(issue_file), os.F_OK):
                mkdir_p(os.path.dirname(issue_file))
            with codecs.open(issue_file, 'w', encoding='utf-8') as f:
                json_dump(issue_data, f)

    def _backup_releases(self):
        for release in get_repo_releases(self.repo):
            rel_dir = os.path.join(os.path.dirname(self.dir), 'releases')
            rel_file = os.path.join(rel_dir, release.tag_name+'.json')
            if not os.access(rel_dir, os.F_OK):
                mkdir_p(rel_dir)
            with codecs.open(rel_file, 'w', encoding='utf-8') as f:
                json_dump(get_release_raw_data(release), f)

            if self.args.include_assets:
                for asset in get_release_assets(release):
                    asset_dir = os.path.join(rel_dir, release.tag_name)
                    asset_file = os.path.join(asset_dir, asset.name)
                    if not os.access(asset_dir, os.F_OK):
                        mkdir_p(asset_dir)
                    fetch_url(asset.browser_download_url, asset_file)
                    assert asset.size == os.path.getsize(asset_file)


def git(gcmd, args=[], gargs=[], gdir=""):
    cmd = ["git"]
    if gdir:
        cmd.extend(["-C", gdir])
    cmd.append(gcmd)
    cmd.extend(gargs)
    cmd.extend(args)

    print(cmd)
    subprocess.call(cmd)

def json_dump(data, output_file):
    json.dump(data,
              output_file,
              ensure_ascii=False,
              sort_keys=True,
              indent=4,
              separators=(',', ': '))

def mkdir_p(path):
    head, tail = os.path.split(path)
    if head and not os.access(head, os.F_OK):
        mkdir_p(head)

    try:
        os.makedirs(path)
    except OSError as exc:  # Python >2.5
        if exc.errno == errno.EEXIST and os.path.isdir(path):
            pass
        else:
            raise

if __name__ == "__main__":
    main()