# -*- coding: utf-8 -*-
"""
(c) 2014-2018 - Copyright Red Hat Inc
Authors:
Patrick Uiterwijk <puiterwijk@redhat.com>
"""
from __future__ import unicode_literals, absolute_import
import logging
import subprocess
import tempfile
import os
import flask
import requests
import werkzeug
import pagure.exceptions
import pagure.lib.git
import pagure.lib.mimetype
import pagure.lib.plugins
import pagure.lib.query
import pagure.lib.tasks
import pagure.forms
import pagure.ui.plugins
from pagure.config import config as pagure_config
from pagure.ui import UI_NS
_log = logging.getLogger(__name__)
def proxy_raw_git():
""" Proxy a request to Git or gitolite3 via a subprocess.
This should get called after it is determined the requested project
is not on repoSpanner.
"""
# We are going to shell out to gitolite-shell. Prepare the env it needs.
gitenv = {
"PATH": os.environ["PATH"],
# These are the vars git-http-backend needs
"PATH_INFO": flask.request.path,
"REMOTE_USER": flask.request.remote_user,
"REMOTE_ADDR": flask.request.remote_addr,
"CONTENT_TYPE": flask.request.content_type,
"QUERY_STRING": flask.request.query_string,
"REQUEST_METHOD": flask.request.method,
"GIT_PROJECT_ROOT": pagure_config["GIT_FOLDER"],
# We perform access checks, so can bypass that of Git
"GIT_HTTP_EXPORT_ALL": "true",
# This might be needed by hooks
"PAGURE_CONFIG": os.environ.get("PAGURE_CONFIG"),
"PYTHONPATH": os.environ.get("PYTHONPATH"),
# Some HTTP headers that we want to pass through because they
# impact the request/response. Only add headers here that are
# "safe", as in they don't allow for other issues.
"HTTP_CONTENT_ENCODING": flask.request.content_encoding,
}
gitolite = pagure_config["HTTP_REPO_ACCESS_GITOLITE"]
if gitolite:
gitenv.update(
{
# These are the additional vars gitolite needs
# Fun fact: REQUEST_URI is not even mentioned in RFC3875
"REQUEST_URI": flask.request.full_path,
"GITOLITE_HTTP_HOME": pagure_config["GITOLITE_HOME"],
"HOME": pagure_config["GITOLITE_HOME"],
}
)
elif flask.request.remote_user:
gitenv.update({"GL_USER": flask.request.remote_user})
# These keys are optional
for key in (
"REMOTE_USER",
"REMOTE_ADDR",
"CONTENT_TYPE",
"QUERY_STRING",
"PYTHONPATH",
"PATH",
"HTTP_CONTENT_ENCODING",
):
if not gitenv[key]:
del gitenv[key]
for key in gitenv:
if not gitenv[key]:
raise ValueError("Value for key %s unknown" % key)
if gitolite:
cmd = [gitolite]
else:
cmd = ["/usr/bin/git", "http-backend"]
# Note: using a temporary files to buffer the input contents
# is non-ideal, but it is a way to make sure we don't need to have
# the full input (which can be very long) in memory.
# Ideally, we'd directly stream, but that's an RFE for the future,
# since that needs to happen in other threads so as to not block.
# (See the warnings in the subprocess module)
with tempfile.SpooledTemporaryFile() as infile:
while True:
block = flask.request.stream.read(4096)
if not block:
break
infile.write(block)
infile.seek(0)
proc = subprocess.Popen(
cmd, stdin=infile, stdout=subprocess.PIPE, stderr=None, env=gitenv
)
out = proc.stdout
# First, gather the response head
headers = {}
while True:
line = out.readline()
if not line:
raise Exception("End of file while reading headers?")
# This strips the \n, meaning end-of-headers
line = line.strip()
if not line:
break
header = line.split(b": ", 1)
headers[header[0].lower()] = header[1]
if len(headers) == 0:
raise Exception("No response at all received")
if "status" not in headers:
# If no status provided, assume 200 OK as per RFC3875
headers["status"] = "200 OK"
respcode, respmsg = headers.pop("status").split(" ", 1)
wrapout = werkzeug.wsgi.wrap_file(flask.request.environ, out)
return flask.Response(
wrapout,
status=int(respcode),
headers=headers,
direct_passthrough=True,
)
def proxy_repospanner(project, service):
""" Proxy a request to repoSpanner.
Args:
project (model.Project): The project being accessed
service (String): The service as indicated by ?Service= in /info/refs
"""
oper = os.path.basename(flask.request.path)
if oper == "refs":
oper = "info/refs?service=%s" % service
regionurl, regioninfo = project.repospanner_repo_info("main")
url = "%s/%s" % (regionurl, oper)
# Older flask/werkzeug versions don't support both an input and output
# stream: this results in a blank upload.
# So, we optimize for the direction the majority of the data will likely
# flow.
streamargs = {}
if service == "git-receive-pack":
# This is a Push operation, optimize for data from the client
streamargs["data"] = flask.request.stream
streamargs["stream"] = False
else:
# This is a Pull operation, optimize for data from the server
streamargs["data"] = flask.request.data
streamargs["stream"] = True
resp = requests.request(
flask.request.method,
url,
verify=regioninfo["ca"],
cert=(regioninfo["push_cert"]["cert"], regioninfo["push_cert"]["key"]),
headers={
"Content-Encoding": flask.request.content_encoding,
"Content-Type": flask.request.content_type,
"X-Extra-Username": flask.request.remote_user,
"X-Extra-Repotype": "main",
"X-Extra-project_name": project.name,
"x-Extra-project_user": project.user if project.is_fork else "",
"X-Extra-project_namespace": project.namespace,
},
**streamargs
)
# Strip out any headers that cause problems
for name in ("transfer-encoding",):
if name in resp.headers:
del resp.headers[name]
return flask.Response(
resp.iter_content(chunk_size=128),
status=resp.status_code,
headers=dict(resp.headers),
direct_passthrough=True,
)
def clone_proxy(project, username=None, namespace=None):
""" Proxy the /info/refs endpoint for HTTP pull/push.
Note that for the clone endpoints, it's very explicit that <repo> has been
renamed to <project>, to avoid the automatic repo searching from flask_app.
This means that we have a chance to trust REMOTE_USER to verify the users'
access to the attempted repository.
"""
if not pagure_config["ALLOW_HTTP_PULL_PUSH"]:
flask.abort(403, description="HTTP pull/push is not allowed")
service = None
if flask.request.path.endswith("/info/refs"):
service = flask.request.args.get("service")
if not service:
# This is a Git client older than 1.6.6, and it doesn't work with
# the smart protocol. We do not support the old protocol via HTTP.
flask.abort(400, description="Please switch to newer Git client")
if service not in ("git-upload-pack", "git-receive-pack"):
flask.abort(400, description="Unknown service requested")
if "git-receive-pack" in flask.request.full_path:
if not pagure_config["ALLOW_HTTP_PUSH"]:
# Pushing (git-receive-pack) over HTTP is not allowed
flask.abort(403, description="HTTP pushing disabled")
if not flask.request.remote_user:
# Anonymous pushing... nope
flask.abort(403, description="Unauthenticated push not allowed")
project = pagure.lib.query.get_authorized_project(
flask.g.session,
project,
user=username,
namespace=namespace,
asuser=flask.request.remote_user,
)
if not project:
flask.abort(404, description="Project not found")
if project.is_on_repospanner:
return proxy_repospanner(project, service)
else:
return proxy_raw_git()
def add_clone_proxy_cmds():
""" This function adds flask routes for all possible clone paths.
This comes down to:
/(fork/<username>/)(<namespace>/)<project>(.git)
with an operation following, where operation is one of:
- /info/refs (generic)
- /git-upload-pack (pull)
- /git-receive-pack (push)
"""
for prefix in (
"<project>",
"<namespace>/<project>",
"forks/<username>/<project>",
"forks/<username>/<namespace>/<project>",
):
for suffix in ("", ".git"):
for oper in ("info/refs", "git-receive-pack", "git-upload-pack"):
route = "/%s%s/%s" % (prefix, suffix, oper)
methods = ("GET",) if oper == "info/refs" else ("POST",)
UI_NS.add_url_rule(
route, view_func=clone_proxy, methods=methods
)