1 | # -*- coding: utf-8 -*-
|
---|
2 | # Copyright (c) 2014 Rackspace
|
---|
3 | # Licensed under the Apache License, Version 2.0 (the "License");
|
---|
4 | # you may not use this file except in compliance with the License.
|
---|
5 | # You may obtain a copy of the License at
|
---|
6 | #
|
---|
7 | # http://www.apache.org/licenses/LICENSE-2.0
|
---|
8 | #
|
---|
9 | # Unless required by applicable law or agreed to in writing, software
|
---|
10 | # distributed under the License is distributed on an "AS IS" BASIS,
|
---|
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
---|
12 | # implied.
|
---|
13 | # See the License for the specific language governing permissions and
|
---|
14 | # limitations under the License.
|
---|
15 | """Module with functions to normalize components."""
|
---|
16 | import re
|
---|
17 |
|
---|
18 | from . import compat
|
---|
19 | from . import misc
|
---|
20 |
|
---|
21 |
|
---|
22 | def normalize_scheme(scheme):
|
---|
23 | """Normalize the scheme component."""
|
---|
24 | return scheme.lower()
|
---|
25 |
|
---|
26 |
|
---|
27 | def normalize_authority(authority):
|
---|
28 | """Normalize an authority tuple to a string."""
|
---|
29 | userinfo, host, port = authority
|
---|
30 | result = ""
|
---|
31 | if userinfo:
|
---|
32 | result += normalize_percent_characters(userinfo) + "@"
|
---|
33 | if host:
|
---|
34 | result += normalize_host(host)
|
---|
35 | if port:
|
---|
36 | result += ":" + port
|
---|
37 | return result
|
---|
38 |
|
---|
39 |
|
---|
40 | def normalize_username(username):
|
---|
41 | """Normalize a username to make it safe to include in userinfo."""
|
---|
42 | return compat.urlquote(username)
|
---|
43 |
|
---|
44 |
|
---|
45 | def normalize_password(password):
|
---|
46 | """Normalize a password to make safe for userinfo."""
|
---|
47 | return compat.urlquote(password)
|
---|
48 |
|
---|
49 |
|
---|
50 | def normalize_host(host):
|
---|
51 | """Normalize a host string."""
|
---|
52 | if misc.IPv6_MATCHER.match(host):
|
---|
53 | percent = host.find("%")
|
---|
54 | if percent != -1:
|
---|
55 | percent_25 = host.find("%25")
|
---|
56 |
|
---|
57 | # Replace RFC 4007 IPv6 Zone ID delimiter '%' with '%25'
|
---|
58 | # from RFC 6874. If the host is '[<IPv6 addr>%25]' then we
|
---|
59 | # assume RFC 4007 and normalize to '[<IPV6 addr>%2525]'
|
---|
60 | if (
|
---|
61 | percent_25 == -1
|
---|
62 | or percent < percent_25
|
---|
63 | or (percent == percent_25 and percent_25 == len(host) - 4)
|
---|
64 | ):
|
---|
65 | host = host.replace("%", "%25", 1)
|
---|
66 |
|
---|
67 | # Don't normalize the casing of the Zone ID
|
---|
68 | return host[:percent].lower() + host[percent:]
|
---|
69 |
|
---|
70 | return host.lower()
|
---|
71 |
|
---|
72 |
|
---|
73 | def normalize_path(path):
|
---|
74 | """Normalize the path string."""
|
---|
75 | if not path:
|
---|
76 | return path
|
---|
77 |
|
---|
78 | path = normalize_percent_characters(path)
|
---|
79 | return remove_dot_segments(path)
|
---|
80 |
|
---|
81 |
|
---|
82 | def normalize_query(query):
|
---|
83 | """Normalize the query string."""
|
---|
84 | if not query:
|
---|
85 | return query
|
---|
86 | return normalize_percent_characters(query)
|
---|
87 |
|
---|
88 |
|
---|
89 | def normalize_fragment(fragment):
|
---|
90 | """Normalize the fragment string."""
|
---|
91 | if not fragment:
|
---|
92 | return fragment
|
---|
93 | return normalize_percent_characters(fragment)
|
---|
94 |
|
---|
95 |
|
---|
96 | PERCENT_MATCHER = re.compile("%[A-Fa-f0-9]{2}")
|
---|
97 |
|
---|
98 |
|
---|
99 | def normalize_percent_characters(s):
|
---|
100 | """All percent characters should be upper-cased.
|
---|
101 |
|
---|
102 | For example, ``"%3afoo%DF%ab"`` should be turned into ``"%3Afoo%DF%AB"``.
|
---|
103 | """
|
---|
104 | matches = set(PERCENT_MATCHER.findall(s))
|
---|
105 | for m in matches:
|
---|
106 | if not m.isupper():
|
---|
107 | s = s.replace(m, m.upper())
|
---|
108 | return s
|
---|
109 |
|
---|
110 |
|
---|
111 | def remove_dot_segments(s):
|
---|
112 | """Remove dot segments from the string.
|
---|
113 |
|
---|
114 | See also Section 5.2.4 of :rfc:`3986`.
|
---|
115 | """
|
---|
116 | # See http://tools.ietf.org/html/rfc3986#section-5.2.4 for pseudo-code
|
---|
117 | segments = s.split("/") # Turn the path into a list of segments
|
---|
118 | output = [] # Initialize the variable to use to store output
|
---|
119 |
|
---|
120 | for segment in segments:
|
---|
121 | # '.' is the current directory, so ignore it, it is superfluous
|
---|
122 | if segment == ".":
|
---|
123 | continue
|
---|
124 | # Anything other than '..', should be appended to the output
|
---|
125 | elif segment != "..":
|
---|
126 | output.append(segment)
|
---|
127 | # In this case segment == '..', if we can, we should pop the last
|
---|
128 | # element
|
---|
129 | elif output:
|
---|
130 | output.pop()
|
---|
131 |
|
---|
132 | # If the path starts with '/' and the output is empty or the first string
|
---|
133 | # is non-empty
|
---|
134 | if s.startswith("/") and (not output or output[0]):
|
---|
135 | output.insert(0, "")
|
---|
136 |
|
---|
137 | # If the path starts with '/.' or '/..' ensure we add one more empty
|
---|
138 | # string to add a trailing '/'
|
---|
139 | if s.endswith(("/.", "/..")):
|
---|
140 | output.append("")
|
---|
141 |
|
---|
142 | return "/".join(output)
|
---|
143 |
|
---|
144 |
|
---|
145 | def encode_component(uri_component, encoding):
|
---|
146 | """Encode the specific component in the provided encoding."""
|
---|
147 | if uri_component is None:
|
---|
148 | return uri_component
|
---|
149 |
|
---|
150 | # Try to see if the component we're encoding is already percent-encoded
|
---|
151 | # so we can skip all '%' characters but still encode all others.
|
---|
152 | percent_encodings = len(
|
---|
153 | PERCENT_MATCHER.findall(compat.to_str(uri_component, encoding))
|
---|
154 | )
|
---|
155 |
|
---|
156 | uri_bytes = compat.to_bytes(uri_component, encoding)
|
---|
157 | is_percent_encoded = percent_encodings == uri_bytes.count(b"%")
|
---|
158 |
|
---|
159 | encoded_uri = bytearray()
|
---|
160 |
|
---|
161 | for i in range(0, len(uri_bytes)):
|
---|
162 | # Will return a single character bytestring on both Python 2 & 3
|
---|
163 | byte = uri_bytes[i : i + 1]
|
---|
164 | byte_ord = ord(byte)
|
---|
165 | if (is_percent_encoded and byte == b"%") or (
|
---|
166 | byte_ord < 128 and byte.decode() in misc.NON_PCT_ENCODED
|
---|
167 | ):
|
---|
168 | encoded_uri.extend(byte)
|
---|
169 | continue
|
---|
170 | encoded_uri.extend("%{0:02x}".format(byte_ord).encode().upper())
|
---|
171 |
|
---|
172 | return encoded_uri.decode(encoding)
|
---|