1 | # -*- coding: utf-8 -*-
|
---|
2 | # Copyright (c) 2014 Rackspace
|
---|
3 | # Licensed under the Apache License, Version 2.0 (the "License");
|
---|
4 | # you may not use this file except in compliance with the License.
|
---|
5 | # You may obtain a copy of the License at
|
---|
6 | #
|
---|
7 | # http://www.apache.org/licenses/LICENSE-2.0
|
---|
8 | #
|
---|
9 | # Unless required by applicable law or agreed to in writing, software
|
---|
10 | # distributed under the License is distributed on an "AS IS" BASIS,
|
---|
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
---|
12 | # implied.
|
---|
13 | # See the License for the specific language governing permissions and
|
---|
14 | # limitations under the License.
|
---|
15 | """
|
---|
16 | Module containing compiled regular expressions and constants.
|
---|
17 |
|
---|
18 | This module contains important constants, patterns, and compiled regular
|
---|
19 | expressions for parsing and validating URIs and their components.
|
---|
20 | """
|
---|
21 |
|
---|
22 | import re
|
---|
23 |
|
---|
24 | from . import abnf_regexp
|
---|
25 |
|
---|
26 | # These are enumerated for the named tuple used as a superclass of
|
---|
27 | # URIReference
|
---|
28 | URI_COMPONENTS = ["scheme", "authority", "path", "query", "fragment"]
|
---|
29 |
|
---|
30 | important_characters = {
|
---|
31 | "generic_delimiters": abnf_regexp.GENERIC_DELIMITERS,
|
---|
32 | "sub_delimiters": abnf_regexp.SUB_DELIMITERS,
|
---|
33 | # We need to escape the '*' in this case
|
---|
34 | "re_sub_delimiters": abnf_regexp.SUB_DELIMITERS_RE,
|
---|
35 | "unreserved_chars": abnf_regexp.UNRESERVED_CHARS,
|
---|
36 | # We need to escape the '-' in this case:
|
---|
37 | "re_unreserved": abnf_regexp.UNRESERVED_RE,
|
---|
38 | }
|
---|
39 |
|
---|
40 | # For details about delimiters and reserved characters, see:
|
---|
41 | # http://tools.ietf.org/html/rfc3986#section-2.2
|
---|
42 | GENERIC_DELIMITERS = abnf_regexp.GENERIC_DELIMITERS_SET
|
---|
43 | SUB_DELIMITERS = abnf_regexp.SUB_DELIMITERS_SET
|
---|
44 | RESERVED_CHARS = abnf_regexp.RESERVED_CHARS_SET
|
---|
45 | # For details about unreserved characters, see:
|
---|
46 | # http://tools.ietf.org/html/rfc3986#section-2.3
|
---|
47 | UNRESERVED_CHARS = abnf_regexp.UNRESERVED_CHARS_SET
|
---|
48 | NON_PCT_ENCODED = abnf_regexp.NON_PCT_ENCODED_SET
|
---|
49 |
|
---|
50 | URI_MATCHER = re.compile(abnf_regexp.URL_PARSING_RE)
|
---|
51 |
|
---|
52 | SUBAUTHORITY_MATCHER = re.compile(
|
---|
53 | (
|
---|
54 | "^(?:(?P<userinfo>{0})@)?" # userinfo
|
---|
55 | "(?P<host>{1})" # host
|
---|
56 | ":?(?P<port>{2})?$" # port
|
---|
57 | ).format(
|
---|
58 | abnf_regexp.USERINFO_RE, abnf_regexp.HOST_PATTERN, abnf_regexp.PORT_RE
|
---|
59 | )
|
---|
60 | )
|
---|
61 |
|
---|
62 |
|
---|
63 | HOST_MATCHER = re.compile("^" + abnf_regexp.HOST_RE + "$")
|
---|
64 | IPv4_MATCHER = re.compile("^" + abnf_regexp.IPv4_RE + "$")
|
---|
65 | IPv6_MATCHER = re.compile(r"^\[" + abnf_regexp.IPv6_ADDRZ_RFC4007_RE + r"\]$")
|
---|
66 |
|
---|
67 | # Used by host validator
|
---|
68 | IPv6_NO_RFC4007_MATCHER = re.compile(
|
---|
69 | r"^\[%s\]$" % (abnf_regexp.IPv6_ADDRZ_RE)
|
---|
70 | )
|
---|
71 |
|
---|
72 | # Matcher used to validate path components
|
---|
73 | PATH_MATCHER = re.compile(abnf_regexp.PATH_RE)
|
---|
74 |
|
---|
75 |
|
---|
76 | # ##################################
|
---|
77 | # Query and Fragment Matcher Section
|
---|
78 | # ##################################
|
---|
79 |
|
---|
80 | QUERY_MATCHER = re.compile(abnf_regexp.QUERY_RE)
|
---|
81 |
|
---|
82 | FRAGMENT_MATCHER = QUERY_MATCHER
|
---|
83 |
|
---|
84 | # Scheme validation, see: http://tools.ietf.org/html/rfc3986#section-3.1
|
---|
85 | SCHEME_MATCHER = re.compile("^{0}$".format(abnf_regexp.SCHEME_RE))
|
---|
86 |
|
---|
87 | RELATIVE_REF_MATCHER = re.compile(
|
---|
88 | r"^%s(\?%s)?(#%s)?$"
|
---|
89 | % (
|
---|
90 | abnf_regexp.RELATIVE_PART_RE,
|
---|
91 | abnf_regexp.QUERY_RE,
|
---|
92 | abnf_regexp.FRAGMENT_RE,
|
---|
93 | )
|
---|
94 | )
|
---|
95 |
|
---|
96 | # See http://tools.ietf.org/html/rfc3986#section-4.3
|
---|
97 | ABSOLUTE_URI_MATCHER = re.compile(
|
---|
98 | r"^%s:%s(\?%s)?$"
|
---|
99 | % (
|
---|
100 | abnf_regexp.COMPONENT_PATTERN_DICT["scheme"],
|
---|
101 | abnf_regexp.HIER_PART_RE,
|
---|
102 | abnf_regexp.QUERY_RE[1:-1],
|
---|
103 | )
|
---|
104 | )
|
---|
105 |
|
---|
106 | # ###############
|
---|
107 | # IRIs / RFC 3987
|
---|
108 | # ###############
|
---|
109 |
|
---|
110 | IRI_MATCHER = re.compile(abnf_regexp.URL_PARSING_RE, re.UNICODE)
|
---|
111 |
|
---|
112 | ISUBAUTHORITY_MATCHER = re.compile(
|
---|
113 | (
|
---|
114 | u"^(?:(?P<userinfo>{0})@)?" # iuserinfo
|
---|
115 | u"(?P<host>{1})" # ihost
|
---|
116 | u":?(?P<port>{2})?$" # port
|
---|
117 | ).format(
|
---|
118 | abnf_regexp.IUSERINFO_RE, abnf_regexp.IHOST_RE, abnf_regexp.PORT_RE
|
---|
119 | ),
|
---|
120 | re.UNICODE,
|
---|
121 | )
|
---|
122 |
|
---|
123 |
|
---|
124 | # Path merger as defined in http://tools.ietf.org/html/rfc3986#section-5.2.3
|
---|
125 | def merge_paths(base_uri, relative_path):
|
---|
126 | """Merge a base URI's path with a relative URI's path."""
|
---|
127 | if base_uri.path is None and base_uri.authority is not None:
|
---|
128 | return "/" + relative_path
|
---|
129 | else:
|
---|
130 | path = base_uri.path or ""
|
---|
131 | index = path.rfind("/")
|
---|
132 | return path[:index] + "/" + relative_path
|
---|
133 |
|
---|
134 |
|
---|
135 | UseExisting = object()
|
---|