From 9e5ef7ed0f512e953951075010d86d9a5cd3e88a Mon Sep 17 00:00:00 2001 From: Pierre-Yves Chibon Date: Dec 22 2016 14:07:17 +0000 Subject: Move the regex to validate an URL to the pagure module so it can easily be re-used --- diff --git a/pagure/__init__.py b/pagure/__init__.py index 488f1ce..493a4c1 100644 --- a/pagure/__init__.py +++ b/pagure/__init__.py @@ -19,6 +19,7 @@ __api_version__ = '0.10' import datetime import logging import os +import re import subprocess import urlparse from logging.handlers import SMTPHandler @@ -608,6 +609,52 @@ def get_remote_repo_path(remote_git, branch_from, loop=False): return repopath +ip_middle_octet = u"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5]))" +ip_last_octet = u"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))" + +""" +regex based on https://github.com/kvesteri/validators/blob/master/validators/url.py +""" +urlregex = re.compile( + u"^" + # protocol identifier + u"(?:(?:https?|ftp)://)" + # user:pass authentication + u"(?:\S+(?::\S*)?@)?" + u"(?:" + u"(?P" + # IP address exclusion + # private & local networks + u"(?:(?:10|127)" + ip_middle_octet + u"{2}" + ip_last_octet + u")|" + u"(?:(?:169\.254|192\.168)" + ip_middle_octet + ip_last_octet + u")|" + u"(?:172\.(?:1[6-9]|2\d|3[0-1])" + ip_middle_octet + ip_last_octet + u"))" + u"|" + # IP address dotted notation octets + # excludes loopback network 0.0.0.0 + # excludes reserved space >= 224.0.0.0 + # excludes network & broadcast addresses + # (first & last IP address of each class) + u"(?P" + u"(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])" + u"" + ip_middle_octet + u"{2}" + u"" + ip_last_octet + u")" + u"|" + # host name + u"(?:(?:[a-z\u00a1-\uffff0-9]-?)*[a-z\u00a1-\uffff0-9]+)" + # domain name + u"(?:\.(?:[a-z\u00a1-\uffff0-9]-?)*[a-z\u00a1-\uffff0-9]+)*" + # TLD identifier + u"(?:\.(?:[a-z\u00a1-\uffff]{2,}))" + u")" + # port number + u"(?::\d{2,5})?" + # resource path + u"(?:/\S*)?" + u"$", + re.UNICODE | re.IGNORECASE +) +urlpattern = re.compile(urlregex) + # Import the application import pagure.ui.app import pagure.ui.admin diff --git a/pagure/ui/issues.py b/pagure/ui/issues.py index 9f9fb23..0aad641 100644 --- a/pagure/ui/issues.py +++ b/pagure/ui/issues.py @@ -26,7 +26,6 @@ from binaryornot.helpers import is_binary_string import kitchen.text.converters as ktc import mimetypes -import re import pagure.doc_utils import pagure.exceptions @@ -34,54 +33,7 @@ import pagure.lib import pagure.lib.encoding_utils import pagure.forms from pagure import (APP, SESSION, LOG, __get_file_in_tree, - login_required, authenticated) - - -ip_middle_octet = u"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5]))" -ip_last_octet = u"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))" - -""" -regex based on https://github.com/kvesteri/validators/blob/master/validators/url.py -""" -urlregex = re.compile( - u"^" - # protocol identifier - u"(?:(?:https?|ftp)://)" - # user:pass authentication - u"(?:\S+(?::\S*)?@)?" - u"(?:" - u"(?P" - # IP address exclusion - # private & local networks - u"(?:(?:10|127)" + ip_middle_octet + u"{2}" + ip_last_octet + u")|" - u"(?:(?:169\.254|192\.168)" + ip_middle_octet + ip_last_octet + u")|" - u"(?:172\.(?:1[6-9]|2\d|3[0-1])" + ip_middle_octet + ip_last_octet + u"))" - u"|" - # IP address dotted notation octets - # excludes loopback network 0.0.0.0 - # excludes reserved space >= 224.0.0.0 - # excludes network & broadcast addresses - # (first & last IP address of each class) - u"(?P" - u"(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])" - u"" + ip_middle_octet + u"{2}" - u"" + ip_last_octet + u")" - u"|" - # host name - u"(?:(?:[a-z\u00a1-\uffff0-9]-?)*[a-z\u00a1-\uffff0-9]+)" - # domain name - u"(?:\.(?:[a-z\u00a1-\uffff0-9]-?)*[a-z\u00a1-\uffff0-9]+)*" - # TLD identifier - u"(?:\.(?:[a-z\u00a1-\uffff]{2,}))" - u")" - # port number - u"(?::\d{2,5})?" - # resource path - u"(?:/\S*)?" - u"$", - re.UNICODE | re.IGNORECASE -) -urlpattern = re.compile(urlregex) + login_required, authenticated, urlpattern) # URLs