Blame pagure/lib/mimetype.py

Shengjing Zhu c28196
# -*- coding: utf-8 -*-
Aurélien Bompard 831553
Pierre-Yves Chibon 67d1cc
from __future__ import unicode_literals, absolute_import
Aurélien Bompard 831553
Shengjing Zhu c28196
import logging
Shengjing Zhu c28196
import mimetypes
Shengjing Zhu c28196
import kitchen.text.converters as ktc
Aurélien Bompard 831553
import six
Aurélien Bompard 831553
Shengjing Zhu c28196
import pagure.lib.encoding_utils
Shengjing Zhu c28196
Shengjing Zhu c28196
Shengjing Zhu c28196
_log = logging.getLogger(__name__)
Shengjing Zhu c28196
Shengjing Zhu c28196
Shengjing Zhu c28196
def guess_type(filename, data):
Pierre-Yves Chibon 9c2953
    """
Shengjing Zhu c28196
    Guess the type of a file based on its filename and data.
Shengjing Zhu c28196
Shengjing Zhu c28196
    Return value is a tuple (type, encoding) where type or encoding is None
Shengjing Zhu c28196
    if it can't be guessed.
Shengjing Zhu c28196
Shengjing Zhu c28196
    :param filename: file name string
Shengjing Zhu c28196
    :param data: file data string
Pierre-Yves Chibon 9c2953
    """
Shengjing Zhu c28196
    mimetype = None
Shengjing Zhu c28196
    encoding = None
Shengjing Zhu c28196
    if filename:
Shengjing Zhu c28196
        mimetype, encoding = mimetypes.guess_type(filename)
Shengjing Zhu c28196
    if data:
Shengjing Zhu c28196
        if not mimetype:
Pierre-Yves Chibon 9c2953
            if not isinstance(data, six.text_type) and b"\0" in data:
Pierre-Yves Chibon 9c2953
                mimetype = "application/octet-stream"
Shengjing Zhu c28196
            else:
Pierre-Yves Chibon 9c2953
                mimetype = "text/plain"
Shengjing Zhu c28196
Pierre-Yves Chibon 9c2953
        if mimetype.startswith("text/") and not encoding:
Shengjing Zhu c28196
            try:
Shengjing Zhu c28196
                encoding = pagure.lib.encoding_utils.guess_encoding(
Pierre-Yves Chibon 9c2953
                    ktc.to_bytes(data)
Pierre-Yves Chibon 9c2953
                )
Shengjing Zhu c28196
            except pagure.exceptions.PagureException:  # pragma: no cover
Shengjing Zhu c28196
                # We cannot decode the file, so bail but warn the admins
Pierre-Yves Chibon 9c2953
                _log.exception("File could not be decoded")
Shengjing Zhu c28196
Shengjing Zhu c28196
    return mimetype, encoding
Shengjing Zhu c28196
Shengjing Zhu c28196
Shengjing Zhu c28196
def get_type_headers(filename, data):
Pierre-Yves Chibon 9c2953
    """
Shengjing Zhu c28196
    Get the HTTP headers used for downloading or previewing the file.
Shengjing Zhu c28196
Shengjing Zhu c28196
    If the file is html, it will return headers which make browser start
Shengjing Zhu c28196
    downloading.
Shengjing Zhu c28196
Shengjing Zhu c28196
    :param filename: file name string
Shengjing Zhu c28196
    :param data: file data string
Pierre-Yves Chibon 9c2953
    """
Shengjing Zhu c28196
    mimetype, encoding = guess_type(filename, data)
Shengjing Zhu c28196
    if not mimetype:
Shengjing Zhu c28196
        return None
Pierre-Yves Chibon 9c2953
    headers = {str("X-Content-Type-Options"): "nosniff"}
Pierre-Yves Chibon 9c2953
    if "html" in mimetype or "javascript" in mimetype or "svg" in mimetype:
Pierre-Yves Chibon 9c2953
        mimetype = "application/octet-stream"
Pierre-Yves Chibon 9c2953
        headers[str("Content-Disposition")] = "attachment"
Shengjing Zhu c28196
    if encoding:
Pierre-Yves Chibon 9c2953
        mimetype += "; charset={encoding}".format(encoding=encoding)
Pierre-Yves Chibon 9c2953
    headers[str("Content-Type")] = mimetype
Shengjing Zhu c28196
    return headers