[230] | 1 | # -*- coding: utf-8 -*-
|
---|
| 2 | # Copyright (c) 2014 Rackspace
|
---|
| 3 | # Licensed under the Apache License, Version 2.0 (the "License");
|
---|
| 4 | # you may not use this file except in compliance with the License.
|
---|
| 5 | # You may obtain a copy of the License at
|
---|
| 6 | #
|
---|
| 7 | # http://www.apache.org/licenses/LICENSE-2.0
|
---|
| 8 | #
|
---|
| 9 | # Unless required by applicable law or agreed to in writing, software
|
---|
| 10 | # distributed under the License is distributed on an "AS IS" BASIS,
|
---|
| 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
---|
| 12 | # implied.
|
---|
| 13 | # See the License for the specific language governing permissions and
|
---|
| 14 | # limitations under the License.
|
---|
| 15 | """Module with functions to normalize components."""
|
---|
| 16 | import re
|
---|
| 17 |
|
---|
| 18 | from . import compat
|
---|
| 19 | from . import misc
|
---|
| 20 |
|
---|
| 21 |
|
---|
| 22 | def normalize_scheme(scheme):
|
---|
| 23 | """Normalize the scheme component."""
|
---|
| 24 | return scheme.lower()
|
---|
| 25 |
|
---|
| 26 |
|
---|
| 27 | def normalize_authority(authority):
|
---|
| 28 | """Normalize an authority tuple to a string."""
|
---|
| 29 | userinfo, host, port = authority
|
---|
| 30 | result = ""
|
---|
| 31 | if userinfo:
|
---|
| 32 | result += normalize_percent_characters(userinfo) + "@"
|
---|
| 33 | if host:
|
---|
| 34 | result += normalize_host(host)
|
---|
| 35 | if port:
|
---|
| 36 | result += ":" + port
|
---|
| 37 | return result
|
---|
| 38 |
|
---|
| 39 |
|
---|
| 40 | def normalize_username(username):
|
---|
| 41 | """Normalize a username to make it safe to include in userinfo."""
|
---|
| 42 | return compat.urlquote(username)
|
---|
| 43 |
|
---|
| 44 |
|
---|
| 45 | def normalize_password(password):
|
---|
| 46 | """Normalize a password to make safe for userinfo."""
|
---|
| 47 | return compat.urlquote(password)
|
---|
| 48 |
|
---|
| 49 |
|
---|
| 50 | def normalize_host(host):
|
---|
| 51 | """Normalize a host string."""
|
---|
| 52 | if misc.IPv6_MATCHER.match(host):
|
---|
| 53 | percent = host.find("%")
|
---|
| 54 | if percent != -1:
|
---|
| 55 | percent_25 = host.find("%25")
|
---|
| 56 |
|
---|
| 57 | # Replace RFC 4007 IPv6 Zone ID delimiter '%' with '%25'
|
---|
| 58 | # from RFC 6874. If the host is '[<IPv6 addr>%25]' then we
|
---|
| 59 | # assume RFC 4007 and normalize to '[<IPV6 addr>%2525]'
|
---|
| 60 | if (
|
---|
| 61 | percent_25 == -1
|
---|
| 62 | or percent < percent_25
|
---|
| 63 | or (percent == percent_25 and percent_25 == len(host) - 4)
|
---|
| 64 | ):
|
---|
| 65 | host = host.replace("%", "%25", 1)
|
---|
| 66 |
|
---|
| 67 | # Don't normalize the casing of the Zone ID
|
---|
| 68 | return host[:percent].lower() + host[percent:]
|
---|
| 69 |
|
---|
| 70 | return host.lower()
|
---|
| 71 |
|
---|
| 72 |
|
---|
| 73 | def normalize_path(path):
|
---|
| 74 | """Normalize the path string."""
|
---|
| 75 | if not path:
|
---|
| 76 | return path
|
---|
| 77 |
|
---|
| 78 | path = normalize_percent_characters(path)
|
---|
| 79 | return remove_dot_segments(path)
|
---|
| 80 |
|
---|
| 81 |
|
---|
| 82 | def normalize_query(query):
|
---|
| 83 | """Normalize the query string."""
|
---|
| 84 | if not query:
|
---|
| 85 | return query
|
---|
| 86 | return normalize_percent_characters(query)
|
---|
| 87 |
|
---|
| 88 |
|
---|
| 89 | def normalize_fragment(fragment):
|
---|
| 90 | """Normalize the fragment string."""
|
---|
| 91 | if not fragment:
|
---|
| 92 | return fragment
|
---|
| 93 | return normalize_percent_characters(fragment)
|
---|
| 94 |
|
---|
| 95 |
|
---|
| 96 | PERCENT_MATCHER = re.compile("%[A-Fa-f0-9]{2}")
|
---|
| 97 |
|
---|
| 98 |
|
---|
| 99 | def normalize_percent_characters(s):
|
---|
| 100 | """All percent characters should be upper-cased.
|
---|
| 101 |
|
---|
| 102 | For example, ``"%3afoo%DF%ab"`` should be turned into ``"%3Afoo%DF%AB"``.
|
---|
| 103 | """
|
---|
| 104 | matches = set(PERCENT_MATCHER.findall(s))
|
---|
| 105 | for m in matches:
|
---|
| 106 | if not m.isupper():
|
---|
| 107 | s = s.replace(m, m.upper())
|
---|
| 108 | return s
|
---|
| 109 |
|
---|
| 110 |
|
---|
| 111 | def remove_dot_segments(s):
|
---|
| 112 | """Remove dot segments from the string.
|
---|
| 113 |
|
---|
| 114 | See also Section 5.2.4 of :rfc:`3986`.
|
---|
| 115 | """
|
---|
| 116 | # See http://tools.ietf.org/html/rfc3986#section-5.2.4 for pseudo-code
|
---|
| 117 | segments = s.split("/") # Turn the path into a list of segments
|
---|
| 118 | output = [] # Initialize the variable to use to store output
|
---|
| 119 |
|
---|
| 120 | for segment in segments:
|
---|
| 121 | # '.' is the current directory, so ignore it, it is superfluous
|
---|
| 122 | if segment == ".":
|
---|
| 123 | continue
|
---|
| 124 | # Anything other than '..', should be appended to the output
|
---|
| 125 | elif segment != "..":
|
---|
| 126 | output.append(segment)
|
---|
| 127 | # In this case segment == '..', if we can, we should pop the last
|
---|
| 128 | # element
|
---|
| 129 | elif output:
|
---|
| 130 | output.pop()
|
---|
| 131 |
|
---|
| 132 | # If the path starts with '/' and the output is empty or the first string
|
---|
| 133 | # is non-empty
|
---|
| 134 | if s.startswith("/") and (not output or output[0]):
|
---|
| 135 | output.insert(0, "")
|
---|
| 136 |
|
---|
| 137 | # If the path starts with '/.' or '/..' ensure we add one more empty
|
---|
| 138 | # string to add a trailing '/'
|
---|
| 139 | if s.endswith(("/.", "/..")):
|
---|
| 140 | output.append("")
|
---|
| 141 |
|
---|
| 142 | return "/".join(output)
|
---|
| 143 |
|
---|
| 144 |
|
---|
| 145 | def encode_component(uri_component, encoding):
|
---|
| 146 | """Encode the specific component in the provided encoding."""
|
---|
| 147 | if uri_component is None:
|
---|
| 148 | return uri_component
|
---|
| 149 |
|
---|
| 150 | # Try to see if the component we're encoding is already percent-encoded
|
---|
| 151 | # so we can skip all '%' characters but still encode all others.
|
---|
| 152 | percent_encodings = len(
|
---|
| 153 | PERCENT_MATCHER.findall(compat.to_str(uri_component, encoding))
|
---|
| 154 | )
|
---|
| 155 |
|
---|
| 156 | uri_bytes = compat.to_bytes(uri_component, encoding)
|
---|
| 157 | is_percent_encoded = percent_encodings == uri_bytes.count(b"%")
|
---|
| 158 |
|
---|
| 159 | encoded_uri = bytearray()
|
---|
| 160 |
|
---|
| 161 | for i in range(0, len(uri_bytes)):
|
---|
| 162 | # Will return a single character bytestring on both Python 2 & 3
|
---|
| 163 | byte = uri_bytes[i : i + 1]
|
---|
| 164 | byte_ord = ord(byte)
|
---|
| 165 | if (is_percent_encoded and byte == b"%") or (
|
---|
| 166 | byte_ord < 128 and byte.decode() in misc.NON_PCT_ENCODED
|
---|
| 167 | ):
|
---|
| 168 | encoded_uri.extend(byte)
|
---|
| 169 | continue
|
---|
| 170 | encoded_uri.extend("%{0:02x}".format(byte_ord).encode().upper())
|
---|
| 171 |
|
---|
| 172 | return encoded_uri.decode(encoding)
|
---|