diff --git a/pagure/lib/mimetype.py b/pagure/lib/mimetype.py new file mode 100644 index 0000000..f70443b --- /dev/null +++ b/pagure/lib/mimetype.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- +import logging +import mimetypes +import kitchen.text.converters as ktc +import pagure.lib.encoding_utils + + +_log = logging.getLogger(__name__) + + +def guess_type(filename, data): + ''' + Guess the type of a file based on its filename and data. + + Return value is a tuple (type, encoding) where type or encoding is None + if it can't be guessed. + + :param filename: file name string + :param data: file data string + ''' + mimetype = None + encoding = None + if filename: + mimetype, encoding = mimetypes.guess_type(filename) + if data: + if not mimetype: + if '\0' in data: + mimetype = 'application/octet-stream' + else: + mimetype = 'text/plain' + + if mimetype.startswith('text/') and not encoding: + try: + encoding = pagure.lib.encoding_utils.guess_encoding( + ktc.to_bytes(data)) + except pagure.exceptions.PagureException: # pragma: no cover + # We cannot decode the file, so bail but warn the admins + _log.exception('File could not be decoded') + + return mimetype, encoding + + +def get_type_headers(filename, data): + ''' + Get the HTTP headers used for downloading or previewing the file. + + If the file is html, it will return headers which make browser start + downloading. + + :param filename: file name string + :param data: file data string + ''' + mimetype, encoding = guess_type(filename, data) + if not mimetype: + return None + headers = {'X-Content-Type-Options': 'nosniff'} + if 'html' in mimetype or 'javascript' in mimetype: + mimetype = 'application/octet-stream' + headers['Content-Disposition'] = 'attachment' + if encoding: + mimetype += '; charset={encoding}'.format(encoding=encoding) + headers['Content-Type'] = mimetype + return headers diff --git a/pagure/ui/issues.py b/pagure/ui/issues.py index bb376c7..f6395fd 100644 --- a/pagure/ui/issues.py +++ b/pagure/ui/issues.py @@ -28,13 +28,10 @@ import werkzeug.datastructures from sqlalchemy.exc import SQLAlchemyError from binaryornot.helpers import is_binary_string -import kitchen.text.converters as ktc -import mimetypes - import pagure.doc_utils import pagure.exceptions import pagure.lib -import pagure.lib.encoding_utils +import pagure.lib.mimetype import pagure.forms from pagure import (APP, SESSION, __get_file_in_tree, login_required, authenticated, urlpattern) @@ -1343,7 +1340,8 @@ def view_issue_raw_file( repo = flask.g.repo - mimetype, encoding = mimetypes.guess_type(filename) + if not repo.settings.get('issue_tracker', True): + flask.abort(404, 'No issue tracker found for this project') attachdir = os.path.join(APP.config['ATTACHMENTS_FOLDER'], repo.fullname) attachpath = os.path.join(attachdir, filename) @@ -1397,32 +1395,7 @@ def view_issue_raw_file( form=pagure.forms.ConfirmationForm(), ) - if not mimetype and data[:2] == '#!': - mimetype = 'text/plain' - - headers = {} - if not mimetype: - if '\0' in data: - mimetype = 'application/octet-stream' - else: - mimetype = 'text/plain' - elif 'html' in mimetype: - mimetype = 'application/octet-stream' - headers['Content-Disposition'] = 'attachment' - - if mimetype.startswith('text/') and not encoding: - try: - encoding = pagure.lib.encoding_utils.guess_encoding( - ktc.to_bytes(data)) - except pagure.exceptions.PagureException: - # We cannot decode the file, so bail but warn the admins - _log.exception('File could not be decoded') - - if encoding: - mimetype += '; charset={encoding}'.format(encoding=encoding) - headers['Content-Type'] = mimetype - - return (data, 200, headers) + return (data, 200, pagure.lib.mimetype.get_type_headers(filename, data)) @APP.route('//issue//comment//edit', diff --git a/pagure/ui/repo.py b/pagure/ui/repo.py index 6c567c9..86ae659 100644 --- a/pagure/ui/repo.py +++ b/pagure/ui/repo.py @@ -38,13 +38,12 @@ from pygments.util import ClassNotFound from pygments.filters import VisibleWhitespaceFilter from sqlalchemy.exc import SQLAlchemyError -import mimetypes - from binaryornot.helpers import is_binary_string import pagure.exceptions import pagure.lib import pagure.lib.git +import pagure.lib.mimetype import pagure.lib.plugins import pagure.lib.tasks import pagure.forms @@ -614,8 +613,6 @@ def view_raw_file( if isinstance(commit, pygit2.Tag): commit = commit.get_object() - mimetype = None - encoding = None if filename: if isinstance(commit, pygit2.Blob): content = commit @@ -625,7 +622,6 @@ def view_raw_file( if not content or isinstance(content, pygit2.Tree): flask.abort(404, 'File not found') - mimetype, encoding = mimetypes.guess_type(filename) data = repo_obj[content.oid].data else: if commit.parents: @@ -644,31 +640,7 @@ def view_raw_file( if not data: flask.abort(404, 'No content found') - if not mimetype and data[:2] == '#!': - mimetype = 'text/plain' - - headers = {} - if not mimetype: - if '\0' in data: - mimetype = 'application/octet-stream' - else: - mimetype = 'text/plain' - elif 'html' in mimetype: - mimetype = 'application/octet-stream' - headers['Content-Disposition'] = 'attachment' - - if mimetype.startswith('text/') and not encoding: - try: - encoding = encoding_utils.guess_encoding(ktc.to_bytes(data)) - except pagure.exceptions.PagureException: - # We cannot decode the file, so bail but warn the admins - _log.exception('File could not be decoded') - - if encoding: - mimetype += '; charset={encoding}'.format(encoding=encoding) - headers['Content-Type'] = mimetype - - return (data, 200, headers) + return (data, 200, pagure.lib.mimetype.get_type_headers(filename, data)) @APP.route('//blame/') diff --git a/tests/test_pagure_lib_mimetype.py b/tests/test_pagure_lib_mimetype.py new file mode 100644 index 0000000..366af42 --- /dev/null +++ b/tests/test_pagure_lib_mimetype.py @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- +""" +Tests for :module:`pagure.lib.mimetype`. +""" + +import os +import unittest +import sys + +sys.path.insert(0, os.path.join(os.path.dirname( + os.path.abspath(__file__)), '..')) + +from pagure.lib import mimetype +from pagure import exceptions + + +class TestMIMEType(unittest.TestCase): + def test_guess_type(self): + dataset = [ + ('hello.html', None, 'text/html', None), + ('hello.html', '#!', 'text/html', 'ascii'), + ('hello', '#!', 'text/plain', 'ascii'), + ('hello.jpg', None, 'image/jpeg', None), + ('hello.jpg', '#!', 'image/jpeg', None), + ('hello.jpg', '\0', 'image/jpeg', None), + (None, '😋', 'text/plain', 'utf-8'), + ('hello', '\0', 'application/octet-stream', None), + ('hello', None, None, None) + ] + for data in dataset: + result = mimetype.guess_type(data[0], data[1]) + self.assertEqual((data[2], data[3]), result) + + def test_get_html_file_headers(self): + result = mimetype.get_type_headers('hello.html', None) + expected = { + 'Content-Type': 'application/octet-stream', + 'Content-Disposition': 'attachment', + 'X-Content-Type-Options': 'nosniff' + } + self.assertEqual(result, expected) + + def test_get_normal_headers(self): + dataset = [ + ('hello', '#!', 'text/plain; charset=ascii'), + ('hello.jpg', None, 'image/jpeg'), + ('hello.jpg', '#!', 'image/jpeg'), + ('hello.jpg', '\0', 'image/jpeg'), + (None, '😋', 'text/plain; charset=utf-8'), + ('hello', '\0', 'application/octet-stream') + ] + for data in dataset: + result = mimetype.get_type_headers(data[0], data[1]) + self.assertEqual(result['Content-Type'], data[2]) + + def test_get_none_header(self): + self.assertIsNone(mimetype.get_type_headers('hello', None)) + + +if __name__ == '__main__': + unittest.main(verbosity=2)