Blob Blame Raw
# -*- coding: utf-8 -*-

from __future__ import unicode_literals, absolute_import

import logging
import mimetypes
import kitchen.text.converters as ktc
import six

import pagure.lib.encoding_utils


_log = logging.getLogger(__name__)


def guess_type(filename, data):
    """
    Guess the type of a file based on its filename and data.

    Return value is a tuple (type, encoding) where type or encoding is None
    if it can't be guessed.

    :param filename: file name string
    :param data: file data string
    """
    mimetype = None
    encoding = None
    if filename:
        mimetype, encoding = mimetypes.guess_type(filename)
    if data:
        if not mimetype:
            if not isinstance(data, six.text_type) and b"\0" in data:
                mimetype = "application/octet-stream"
            else:
                mimetype = "text/plain"

        if mimetype.startswith("text/") and not encoding:
            try:
                encoding = pagure.lib.encoding_utils.guess_encoding(
                    ktc.to_bytes(data)
                )
            except pagure.exceptions.PagureException:  # pragma: no cover
                # We cannot decode the file, so bail but warn the admins
                _log.exception("File could not be decoded")

    return mimetype, encoding


def get_type_headers(filename, data):
    """
    Get the HTTP headers used for downloading or previewing the file.

    If the file is html, it will return headers which make browser start
    downloading.

    :param filename: file name string
    :param data: file data string
    """
    mimetype, encoding = guess_type(filename, data)
    if not mimetype:
        return None
    headers = {str("X-Content-Type-Options"): "nosniff"}
    if "html" in mimetype or "javascript" in mimetype or "svg" in mimetype:
        mimetype = "application/octet-stream"
        headers[str("Content-Disposition")] = "attachment"
    if encoding:
        mimetype += "; charset={encoding}".format(encoding=encoding)
    headers[str("Content-Type")] = mimetype
    return headers