diff --git a/pagure-loadjson/README.rst b/pagure-loadjson/README.rst new file mode 100644 index 0000000..d2223d5 --- /dev/null +++ b/pagure-loadjson/README.rst @@ -0,0 +1,13 @@ +Pagure loadjson +=============== + +This is the service loads into the database the JSON files representing +issues or pull-requests. + +This service is triggered by a git hook, sending a notification that a push +happened. This service receive the notification and find the list of file +that changed and load them into the database. + + * Run:: + + PAGURE_CONFIG=/path/to/config PYTHONPATH=. python pagure-loadjson/pagure_loadjson_server.py diff --git a/pagure-loadjson/pagure_loadjson.service b/pagure-loadjson/pagure_loadjson.service new file mode 100644 index 0000000..2c963a6 --- /dev/null +++ b/pagure-loadjson/pagure_loadjson.service @@ -0,0 +1,14 @@ +[Unit] +Description=Pagure service loading JSON files into the DB +After=redis.target +Documentation=https://pagure.io/pagure + +[Service] +ExecStart=/usr/libexec/pagure-loadjson/pagure_loadjson_server.py +Type=simple +User=git +Group=git +Restart=on-failure + +[Install] +WantedBy=multi-user.target diff --git a/pagure-loadjson/pagure_loadjson_server.py b/pagure-loadjson/pagure_loadjson_server.py new file mode 100644 index 0000000..8848b08 --- /dev/null +++ b/pagure-loadjson/pagure_loadjson_server.py @@ -0,0 +1,230 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" + (c) 2017 - Copyright Red Hat Inc + + Authors: + Pierre-Yves Chibon + + +This server listens to message sent to redis via post commits hook and find +the list of files modified by the commits listed in the message and sync +them into the database. + +Using this mechanism, we no longer need to block the git push until all the +files have been uploaded (which when migrating some large projects over to +pagure can be really time-consuming). + +""" + +import json +import logging +import os + +import requests +import trollius +import trollius_redis + + +_log = logging.getLogger(__name__) + +if 'PAGURE_CONFIG' not in os.environ \ + and os.path.exists('/etc/pagure/pagure.cfg'): + print 'Using configuration file `/etc/pagure/pagure.cfg`' + os.environ['PAGURE_CONFIG'] = '/etc/pagure/pagure.cfg' + + +import pagure +import pagure.lib + + +def get_files_to_load(title, new_commits_list, abspath): + + _log.info('%s: Retrieve the list of files changed' % title) + file_list = [] + new_commits_list.reverse() + n = len(new_commits_list) + for idx, commit in enumerate(new_commits_list): + if (idx % 100) == 0: + _log.info( + 'Loading files change in commits for %s: %s/%s', + title, idx, n) + if commit == new_commits_list[0]: + filenames = pagure.lib.git.read_git_lines( + ['diff-tree', '--no-commit-id', '--name-only', '-r', '--root', + commit], abspath) + else: + filenames = pagure.lib.git.read_git_lines( + ['diff-tree', '--no-commit-id', '--name-only', '-r', commit], + abspath) + for line in filenames: + if line.strip(): + file_list.append(line.strip()) + + return file_list + + +@trollius.coroutine +def handle_messages(): + ''' Handles connecting to redis and acting upon messages received. + In this case, it means logging into the DB the commits specified in the + message for the specified repo. + + The currently accepted message format looks like: + + :: + + { + "project": { + "name": "foo", + "namespace": null, + "parent": null, + "username": { + "name": "user" + } + }, + "abspath": "/srv/git/repositories/pagure.git", + "commits": [ + "b7b4059c44d692d7df3227ce58ce01191e5407bd", + "f8d0899bb6654590ffdef66b539fd3b8cf873b35", + "9b6fdc48d3edab82d3de28953271ea52b0a96117" + ] + } + + ''' + + host = pagure.APP.config.get('REDIS_HOST', '0.0.0.0') + port = pagure.APP.config.get('REDIS_PORT', 6379) + dbname = pagure.APP.config.get('REDIS_DB', 0) + connection = yield trollius.From(trollius_redis.Connection.create( + host=host, port=port, db=dbname)) + + # Create subscriber. + subscriber = yield trollius.From(connection.start_subscribe()) + + # Subscribe to channel. + yield trollius.From(subscriber.subscribe(['pagure.loadjson'])) + + # Inside a while loop, wait for incoming events. + while True: + reply = yield trollius.From(subscriber.next_published()) + _log.info( + 'Received: %s on channel: %s', + repr(reply.value), reply.channel) + data = json.loads(reply.value) + + commits = data['commits'] + abspath = data['abspath'] + repo = data['project']['name'] + username = data['project']['username']['name'] \ + if data['project']['parent'] else None + namespace = data['project']['namespace'] + data_type = data['data_type'] + + if data_type not in ['ticket', 'pull-request']: + _log.info('Invalid data_type retrieved: %s', data_type) + continue + + session = pagure.lib.create_session(pagure.APP.config['DB_URL']) + + _log.info('Looking for project: %s%s of user: %s', + '%s/' % namespacerepo if namespace else '', + repo, username) + project = pagure.lib.get_project( + session, repo, user=username, namespace=namespace) + + if not project: + _log.info('No project found') + continue + + _log.info('Found project: %s', project.fullname) + + _log.info( + '%s: Processing %s commits in %s', project.fullname, + len(commits), abspath) + + file_list = set(get_files_to_load(project.fullname, commits, abspath)) + n = len(file_list) + _log.info('%s files to process' % n) + + for idx, filename in enumerate(file_list): + _log.info('Loading: %s -- %s/%s', filename, idx, n) + json_data = None + data = ''.join( + pagure.lib.git.read_git_lines( + ['show', 'HEAD:%s' % filename], abspath)) + if data and not filename.startswith('files/'): + try: + json_data = json.loads(data) + except: + pass + if json_data: + try: + if data_type == 'ticket': + pagure.lib.git.update_ticket_from_git( + session, + reponame=repo, + namespace=namespace, + username=username, + issue_uid=filename, + json_data=json_data + ) + except Exception as err: + _log.info('data: %s', json_data) + session.rollback() + _log.exception(err) + break + + try: + session.commit() + except SQLAlchemyError as err: # pragma: no cover + session.rollback() + finally: + session.close() + _log.info('Ready for another') + + +def main(): + ''' Start the main async loop. ''' + + try: + loop = trollius.get_event_loop() + tasks = [ + trollius.async(handle_messages()), + ] + loop.run_until_complete(trollius.wait(tasks)) + loop.run_forever() + except KeyboardInterrupt: + pass + except trollius.ConnectionResetError: + pass + + _log.info("End Connection") + loop.close() + _log.info("End") + + +if __name__ == '__main__': + formatter = logging.Formatter( + "%(asctime)s %(levelname)s [%(module)s:%(lineno)d] %(message)s") + + logging.basicConfig(level=logging.DEBUG) + + # setup console logging + _log.setLevel(logging.DEBUG) + shellhandler = logging.StreamHandler() + shellhandler.setLevel(logging.DEBUG) + + aslog = logging.getLogger("asyncio") + aslog.setLevel(logging.DEBUG) + aslog = logging.getLogger("trollius") + aslog.setLevel(logging.DEBUG) + + # Turn down the logs coming from python-markdown + mklog = logging.getLogger("MARKDOWN") + mklog.setLevel(logging.WARN) + + shellhandler.setFormatter(formatter) + _log.addHandler(shellhandler) + main() diff --git a/pagure/hooks/files/pagure_hook_tickets.py b/pagure/hooks/files/pagure_hook_tickets.py old mode 100755 new mode 100644 index 18847db..f684ce3 --- a/pagure/hooks/files/pagure_hook_tickets.py +++ b/pagure/hooks/files/pagure_hook_tickets.py @@ -4,6 +4,7 @@ """Pagure specific hook to update tickets stored in the database based on the information pushed in the tickets git repository. """ +from __future__ import print_function import json import os @@ -20,81 +21,57 @@ if 'PAGURE_CONFIG' not in os.environ \ import pagure import pagure.lib.git - -abspath = os.path.abspath(os.environ['GIT_DIR']) +from pagure.lib import REDIS -def get_files_to_load(new_commits_list): +abspath = os.path.abspath(os.environ['GIT_DIR']) - print 'Files changed by new commits:\n' - file_list = [] - new_commits_list.reverse() - for commit in new_commits_list: - if commit == new_commits_list[0]: - filenames = pagure.lib.git.read_git_lines( - ['diff-tree', '--no-commit-id', '--name-only', '-r', '--root', - commit], abspath) - else: - filenames = pagure.lib.git.read_git_lines( - ['diff-tree', '--no-commit-id', '--name-only', '-r', commit], - abspath) - for line in filenames: - if line.strip(): - file_list.append(line.strip()) - return file_list +def run_as_post_receive_hook(): + repo = pagure.lib.git.get_repo_name(abspath) + username = pagure.lib.git.get_username(abspath) + namespace = pagure.lib.git.get_repo_namespace( + abspath, gitfolder=pagure.APP.config['TICKETS_FOLDER']) + if pagure.APP.config.get('HOOK_DEBUG', False): + print('repo:', repo) + print('user:', username) + print('namespace:', namespace) -def run_as_post_receive_hook(): + project = pagure.lib.get_project( + pagure.SESSION, repo, user=username, namespace=namespace) - file_list = set() for line in sys.stdin: if pagure.APP.config.get('HOOK_DEBUG', False): - print line + print(line) (oldrev, newrev, refname) = line.strip().split(' ', 2) if pagure.APP.config.get('HOOK_DEBUG', False): - print ' -- Old rev' - print oldrev - print ' -- New rev' - print newrev - print ' -- Ref name' - print refname + print(' -- Old rev') + print(oldrev) + print(' -- New rev') + print(newrev) + print(' -- Ref name') + print(refname) if set(newrev) == set(['0']): - print "Deleting a reference/branch, so we won't run the "\ - "pagure hook" + print("Deleting a reference/branch, so we won't run the " + "pagure hook") return - tmp = set(get_files_to_load( - pagure.lib.git.get_revs_between(oldrev, newrev, abspath, refname))) - file_list = file_list.union(tmp) - - reponame = pagure.lib.git.get_repo_name(abspath) - username = pagure.lib.git.get_username(abspath) - namespace = pagure.lib.git.get_repo_namespace( - abspath, gitfolder=pagure.APP.config['TICKETS_FOLDER']) - print 'repo:', reponame, username, namespace - - for filename in file_list: - print 'To load: %s' % filename - json_data = None - data = ''.join( - pagure.lib.git.read_git_lines( - ['show', 'HEAD:%s' % filename], abspath)) - if data and 'files' not in filename: - try: - json_data = json.loads(data) - except: - pass - if json_data: - pagure.lib.git.update_ticket_from_git( - pagure.SESSION, - reponame=reponame, - namespace=namespace, - username=username, - issue_uid=filename, - json_data=json_data) + commits = pagure.lib.git.get_revs_between( + oldrev, newrev, abspath, refname) + + if REDIS: + print('Sending to redis to log activity') + REDIS.publish('pagure.loadjson', + json.dumps({ + 'project': project.to_json(public=True), + 'abspath': abspath, + 'commits': commits, + 'data_type': 'ticket', + } + )) def main(args):