From 4bcf65f77c4df7144ca07b7f7622395a354a5430 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Oct 21 2016 17:49:24 +0000 Subject: Handle empty files in detect_encodings If there's no data, chardet returns ``None``. This just returns ASCII with 100% confidence if the file is empty. Signed-off-by: Jeremy Cline --- diff --git a/pagure/lib/encoding_utils.py b/pagure/lib/encoding_utils.py index 7f6d0a8..23eb4ef 100644 --- a/pagure/lib/encoding_utils.py +++ b/pagure/lib/encoding_utils.py @@ -71,6 +71,10 @@ def detect_encodings(data): :return: A dictionary mapping possible encodings to confidence levels :rtype: dict """ + if not data: + # It's an empty string so we can safely say it's ascii + return {'ascii': 1.0} + # We can't use ``chardet.detect`` because we want to dig in the internals # of the detector to bias the utf-8 result. detector = universaldetector.UniversalDetector() diff --git a/tests/test_pagure_lib_encoding_utils.py b/tests/test_pagure_lib_encoding_utils.py index 0060595..8c63d4a 100644 --- a/tests/test_pagure_lib_encoding_utils.py +++ b/tests/test_pagure_lib_encoding_utils.py @@ -29,6 +29,13 @@ class TestGuessEncoding(unittest.TestCase): self.assertEqual(result, 'utf-8') self.assertEqual(chardet_result['encoding'], 'ISO-8859-2') + def test_guess_encoding_no_data(self): + result = encoding_utils.guess_encoding(u''.encode('utf-8')) + self.assertEqual(result, 'ascii') + + +class TestDecode(unittest.TestCase): + def test_decode(self): data = u'Ĺ abata' self.assertEqual(data, encoding_utils.decode(data.encode('utf-8')))