diff --git a/pagure/utils.py b/pagure/utils.py index 142a994..d2c3852 100644 --- a/pagure/utils.py +++ b/pagure/utils.py @@ -319,13 +319,18 @@ urlregex = re.compile( # protocol identifier "(?:(?:https?|ftp|git)://)" # user:pass authentication - "(?:\S+(?::\S*)?@)?" "(?:" "(?P" + "(?:[-a-z\u00a1-\uffff0-9._~%!$&'()*+,;=:]+" + "(?::[-a-z0-9._~%!$&'()*+,;=:]*)?@)?" + "(?:" + "(?P" # IP address exclusion # private & local networks "(?:(?:10|127)" + ip_middle_octet + "{2}" + ip_last_octet + ")|" "(?:(?:169\.254|192\.168)" + ip_middle_octet + ip_last_octet + ")|" "(?:172\.(?:1[6-9]|2\d|3[0-1])" + ip_middle_octet + ip_last_octet + "))" "|" + # private & local hosts + "(?P" "(?:localhost))" "|" # IP address dotted notation octets # excludes loopback network 0.0.0.0 # excludes reserved space >= 224.0.0.0 @@ -336,6 +341,39 @@ urlregex = re.compile( "" + ip_middle_octet + "{2}" "" + ip_last_octet + ")" "|" + # IPv6 RegEx from https://stackoverflow.com/a/17871737 + "\[(" + # 1:2:3:4:5:6:7:8 + "([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|" + # 1:: 1:2:3:4:5:6:7:: + "([0-9a-fA-F]{1,4}:){1,7}:|" + # 1::8 1:2:3:4:5:6::8 1:2:3:4:5:6::8 + "([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|" + # 1::7:8 1:2:3:4:5::7:8 1:2:3:4:5::8 + "([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|" + # 1::6:7:8 1:2:3:4::6:7:8 1:2:3:4::8 + "([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|" + # 1::5:6:7:8 1:2:3::5:6:7:8 1:2:3::8 + "([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|" + # 1::4:5:6:7:8 1:2::4:5:6:7:8 1:2::8 + "([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|" + # 1::3:4:5:6:7:8 1::3:4:5:6:7:8 1::8 + "[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|" + # ::2:3:4:5:6:7:8 ::2:3:4:5:6:7:8 ::8 :: + ":((:[0-9a-fA-F]{1,4}){1,7}|:)|" + # fe80::7:8%eth0 fe80::7:8%1 + # (link-local IPv6 addresses with zone index) + "fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|" + "::(ffff(:0{1,4}){0,1}:){0,1}" + "((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}" + # ::255.255.255.255 ::ffff:255.255.255.255 ::ffff:0:255.255.255.255 + # (IPv4-mapped IPv6 addresses and IPv4-translated addresses) + "(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|" + "([0-9a-fA-F]{1,4}:){1,4}:" + "((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}" + # 2001:db8:3:4::192.0.2.33 64:ff9b::192.0.2.33 + # (IPv4-Embedded IPv6 Address) + "(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])" ")\]|" # host name "(?:(?:[a-z\u00a1-\uffff0-9]-?)*[a-z\u00a1-\uffff0-9]+)" # domain name @@ -345,7 +383,11 @@ urlregex = re.compile( # port number "(?::\d{2,5})?" # resource path - "(?:/\S*)?" "$", + "(?:/[-a-z\u00a1-\uffff0-9._~%!$&'()*+,;=:@/]*)?" + # query string + "(?:\?\S*)?" + # fragment + "(?:#\S*)?" "$", re.UNICODE | re.IGNORECASE, ) urlpattern = re.compile(urlregex) @@ -354,15 +396,20 @@ urlpattern = re.compile(urlregex) ssh_urlregex = re.compile( "^" # protocol identifier - "(?:(?:(git\+)?ssh)://)" - # user:pass authentication - "(?:\S+(?::\S*)?@)?" "(?:" "(?P" + "(?:(?:ssh|git\+ssh)://)?" + # user@ authentication + "[-a-z\u00a1-\uffff0-9._~%!$&'()*+,;=:]+@" + # Opening section about host + "(?:" # IP address exclusion + "(?P" # private & local networks "(?:(?:10|127)" + ip_middle_octet + "{2}" + ip_last_octet + ")|" "(?:(?:169\.254|192\.168)" + ip_middle_octet + ip_last_octet + ")|" "(?:172\.(?:1[6-9]|2\d|3[0-1])" + ip_middle_octet + ip_last_octet + "))" "|" + # private & local hosts + "(?P" "(?:localhost))" "|" # IP address dotted notation octets # excludes loopback network 0.0.0.0 # excludes reserved space >= 224.0.0.0 @@ -373,16 +420,55 @@ ssh_urlregex = re.compile( "" + ip_middle_octet + "{2}" "" + ip_last_octet + ")" "|" + # IPv6 RegEx from https://stackoverflow.com/a/17871737 + "\[(" + # 1:2:3:4:5:6:7:8 + "([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|" + # 1:: 1:2:3:4:5:6:7:: + "([0-9a-fA-F]{1,4}:){1,7}:|" + # 1::8 1:2:3:4:5:6::8 1:2:3:4:5:6::8 + "([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|" + # 1::7:8 1:2:3:4:5::7:8 1:2:3:4:5::8 + "([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|" + # 1::6:7:8 1:2:3:4::6:7:8 1:2:3:4::8 + "([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|" + # 1::5:6:7:8 1:2:3::5:6:7:8 1:2:3::8 + "([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|" + # 1::4:5:6:7:8 1:2::4:5:6:7:8 1:2::8 + "([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|" + # 1::3:4:5:6:7:8 1::3:4:5:6:7:8 1::8 + "[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|" + # ::2:3:4:5:6:7:8 ::2:3:4:5:6:7:8 ::8 :: + ":((:[0-9a-fA-F]{1,4}){1,7}|:)|" + # fe80::7:8%eth0 fe80::7:8%1 + # (link-local IPv6 addresses with zone index) + "fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|" + "::(ffff(:0{1,4}){0,1}:){0,1}" + "((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}" + # ::255.255.255.255 ::ffff:255.255.255.255 ::ffff:0:255.255.255.255 + # (IPv4-mapped IPv6 addresses and IPv4-translated addresses) + "(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|" + "([0-9a-fA-F]{1,4}:){1,4}:" + "((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}" + # 2001:db8:3:4::192.0.2.33 64:ff9b::192.0.2.33 + # (IPv4-Embedded IPv6 Address) + "(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])" ")\]|" # host name "(?:(?:[a-z\u00a1-\uffff0-9]-?)*[a-z\u00a1-\uffff0-9]+)" # domain name "(?:\.(?:[a-z\u00a1-\uffff0-9]-?)*[a-z\u00a1-\uffff0-9]+)*" # TLD identifier - "(?:\.(?:[a-z\u00a1-\uffff]{2,}))" ")" + "(?:\.(?:[a-z\u00a1-\uffff]{2,}))" + # Closing the entire section about host + ")" # port number "(?::\d{2,5})?" # resource path - "(?:/\S*)?" "$", + "(?:[:/][-a-z\u00a1-\uffff0-9._~%!$&'()*+,;=:@/]*)?" + # query string + "(?:\?\S*)?" + # fragment + "(?:#\S*)?" "$", re.UNICODE | re.IGNORECASE, ) ssh_urlpattern = re.compile(ssh_urlregex) diff --git a/tests/test_pagure_flask_ui_plugins_mirror.py b/tests/test_pagure_flask_ui_plugins_mirror.py index 137a803..bcc7538 100644 --- a/tests/test_pagure_flask_ui_plugins_mirror.py +++ b/tests/test_pagure_flask_ui_plugins_mirror.py @@ -22,6 +22,7 @@ sys.path.insert(0, os.path.join(os.path.dirname( os.path.abspath(__file__)), '..')) import pagure.lib +import pagure.utils import tests @@ -35,6 +36,22 @@ class PagureFlaskPluginMirrortests(tests.Modeltests): tests.create_projects(self.session) tests.create_projects_git(os.path.join(self.path, 'repos')) + def test_valid_ssh_url_pattern(self): + """ Check a number of valide ssh target that the pattern should let + through. + """ + entries = [ + 'ssh://user@host.lcl:/path/to/repo.git', + 'git@github.com:user/project.git', + 'ssh://user@host.org/target', + 'git+ssh://user@host.org/target', + 'git+ssh://user@host.lcl:/path/to/repo.git', + ] + for el in entries: + print(el) + self.assertIsNotNone(pagure.utils.ssh_urlpattern.match(el)) + + def test_plugin_mirror_no_csrf(self): """ Test setting up the mirror plugin with no csrf. """