1 | # -*- coding: utf-8 -*-
|
---|
2 | # Copyright (c) 2015 Ian Stapleton Cordasco
|
---|
3 | # Licensed under the Apache License, Version 2.0 (the "License");
|
---|
4 | # you may not use this file except in compliance with the License.
|
---|
5 | # You may obtain a copy of the License at
|
---|
6 | #
|
---|
7 | # http://www.apache.org/licenses/LICENSE-2.0
|
---|
8 | #
|
---|
9 | # Unless required by applicable law or agreed to in writing, software
|
---|
10 | # distributed under the License is distributed on an "AS IS" BASIS,
|
---|
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
---|
12 | # implied.
|
---|
13 | # See the License for the specific language governing permissions and
|
---|
14 | # limitations under the License.
|
---|
15 | """Module containing the urlparse compatibility logic."""
|
---|
16 | from collections import namedtuple
|
---|
17 |
|
---|
18 | from . import compat
|
---|
19 | from . import exceptions
|
---|
20 | from . import misc
|
---|
21 | from . import normalizers
|
---|
22 | from . import uri
|
---|
23 |
|
---|
24 | __all__ = ("ParseResult", "ParseResultBytes")
|
---|
25 |
|
---|
26 | PARSED_COMPONENTS = (
|
---|
27 | "scheme",
|
---|
28 | "userinfo",
|
---|
29 | "host",
|
---|
30 | "port",
|
---|
31 | "path",
|
---|
32 | "query",
|
---|
33 | "fragment",
|
---|
34 | )
|
---|
35 |
|
---|
36 |
|
---|
37 | class ParseResultMixin(object):
|
---|
38 | def _generate_authority(self, attributes):
|
---|
39 | # I swear I did not align the comparisons below. That's just how they
|
---|
40 | # happened to align based on pep8 and attribute lengths.
|
---|
41 | userinfo, host, port = (
|
---|
42 | attributes[p] for p in ("userinfo", "host", "port")
|
---|
43 | )
|
---|
44 | if (
|
---|
45 | self.userinfo != userinfo
|
---|
46 | or self.host != host
|
---|
47 | or self.port != port
|
---|
48 | ):
|
---|
49 | if port:
|
---|
50 | port = "{0}".format(port)
|
---|
51 | return normalizers.normalize_authority(
|
---|
52 | (
|
---|
53 | compat.to_str(userinfo, self.encoding),
|
---|
54 | compat.to_str(host, self.encoding),
|
---|
55 | port,
|
---|
56 | )
|
---|
57 | )
|
---|
58 | if isinstance(self.authority, bytes):
|
---|
59 | return self.authority.decode("utf-8")
|
---|
60 | return self.authority
|
---|
61 |
|
---|
62 | def geturl(self):
|
---|
63 | """Shim to match the standard library method."""
|
---|
64 | return self.unsplit()
|
---|
65 |
|
---|
66 | @property
|
---|
67 | def hostname(self):
|
---|
68 | """Shim to match the standard library."""
|
---|
69 | return self.host
|
---|
70 |
|
---|
71 | @property
|
---|
72 | def netloc(self):
|
---|
73 | """Shim to match the standard library."""
|
---|
74 | return self.authority
|
---|
75 |
|
---|
76 | @property
|
---|
77 | def params(self):
|
---|
78 | """Shim to match the standard library."""
|
---|
79 | return self.query
|
---|
80 |
|
---|
81 |
|
---|
82 | class ParseResult(
|
---|
83 | namedtuple("ParseResult", PARSED_COMPONENTS), ParseResultMixin
|
---|
84 | ):
|
---|
85 | """Implementation of urlparse compatibility class.
|
---|
86 |
|
---|
87 | This uses the URIReference logic to handle compatibility with the
|
---|
88 | urlparse.ParseResult class.
|
---|
89 | """
|
---|
90 |
|
---|
91 | slots = ()
|
---|
92 |
|
---|
93 | def __new__(
|
---|
94 | cls,
|
---|
95 | scheme,
|
---|
96 | userinfo,
|
---|
97 | host,
|
---|
98 | port,
|
---|
99 | path,
|
---|
100 | query,
|
---|
101 | fragment,
|
---|
102 | uri_ref,
|
---|
103 | encoding="utf-8",
|
---|
104 | ):
|
---|
105 | """Create a new ParseResult."""
|
---|
106 | parse_result = super(ParseResult, cls).__new__(
|
---|
107 | cls,
|
---|
108 | scheme or None,
|
---|
109 | userinfo or None,
|
---|
110 | host,
|
---|
111 | port or None,
|
---|
112 | path or None,
|
---|
113 | query,
|
---|
114 | fragment,
|
---|
115 | )
|
---|
116 | parse_result.encoding = encoding
|
---|
117 | parse_result.reference = uri_ref
|
---|
118 | return parse_result
|
---|
119 |
|
---|
120 | @classmethod
|
---|
121 | def from_parts(
|
---|
122 | cls,
|
---|
123 | scheme=None,
|
---|
124 | userinfo=None,
|
---|
125 | host=None,
|
---|
126 | port=None,
|
---|
127 | path=None,
|
---|
128 | query=None,
|
---|
129 | fragment=None,
|
---|
130 | encoding="utf-8",
|
---|
131 | ):
|
---|
132 | """Create a ParseResult instance from its parts."""
|
---|
133 | authority = ""
|
---|
134 | if userinfo is not None:
|
---|
135 | authority += userinfo + "@"
|
---|
136 | if host is not None:
|
---|
137 | authority += host
|
---|
138 | if port is not None:
|
---|
139 | authority += ":{0}".format(port)
|
---|
140 | uri_ref = uri.URIReference(
|
---|
141 | scheme=scheme,
|
---|
142 | authority=authority,
|
---|
143 | path=path,
|
---|
144 | query=query,
|
---|
145 | fragment=fragment,
|
---|
146 | encoding=encoding,
|
---|
147 | ).normalize()
|
---|
148 | userinfo, host, port = authority_from(uri_ref, strict=True)
|
---|
149 | return cls(
|
---|
150 | scheme=uri_ref.scheme,
|
---|
151 | userinfo=userinfo,
|
---|
152 | host=host,
|
---|
153 | port=port,
|
---|
154 | path=uri_ref.path,
|
---|
155 | query=uri_ref.query,
|
---|
156 | fragment=uri_ref.fragment,
|
---|
157 | uri_ref=uri_ref,
|
---|
158 | encoding=encoding,
|
---|
159 | )
|
---|
160 |
|
---|
161 | @classmethod
|
---|
162 | def from_string(
|
---|
163 | cls, uri_string, encoding="utf-8", strict=True, lazy_normalize=True
|
---|
164 | ):
|
---|
165 | """Parse a URI from the given unicode URI string.
|
---|
166 |
|
---|
167 | :param str uri_string: Unicode URI to be parsed into a reference.
|
---|
168 | :param str encoding: The encoding of the string provided
|
---|
169 | :param bool strict: Parse strictly according to :rfc:`3986` if True.
|
---|
170 | If False, parse similarly to the standard library's urlparse
|
---|
171 | function.
|
---|
172 | :returns: :class:`ParseResult` or subclass thereof
|
---|
173 | """
|
---|
174 | reference = uri.URIReference.from_string(uri_string, encoding)
|
---|
175 | if not lazy_normalize:
|
---|
176 | reference = reference.normalize()
|
---|
177 | userinfo, host, port = authority_from(reference, strict)
|
---|
178 |
|
---|
179 | return cls(
|
---|
180 | scheme=reference.scheme,
|
---|
181 | userinfo=userinfo,
|
---|
182 | host=host,
|
---|
183 | port=port,
|
---|
184 | path=reference.path,
|
---|
185 | query=reference.query,
|
---|
186 | fragment=reference.fragment,
|
---|
187 | uri_ref=reference,
|
---|
188 | encoding=encoding,
|
---|
189 | )
|
---|
190 |
|
---|
191 | @property
|
---|
192 | def authority(self):
|
---|
193 | """Return the normalized authority."""
|
---|
194 | return self.reference.authority
|
---|
195 |
|
---|
196 | def copy_with(
|
---|
197 | self,
|
---|
198 | scheme=misc.UseExisting,
|
---|
199 | userinfo=misc.UseExisting,
|
---|
200 | host=misc.UseExisting,
|
---|
201 | port=misc.UseExisting,
|
---|
202 | path=misc.UseExisting,
|
---|
203 | query=misc.UseExisting,
|
---|
204 | fragment=misc.UseExisting,
|
---|
205 | ):
|
---|
206 | """Create a copy of this instance replacing with specified parts."""
|
---|
207 | attributes = zip(
|
---|
208 | PARSED_COMPONENTS,
|
---|
209 | (scheme, userinfo, host, port, path, query, fragment),
|
---|
210 | )
|
---|
211 | attrs_dict = {}
|
---|
212 | for name, value in attributes:
|
---|
213 | if value is misc.UseExisting:
|
---|
214 | value = getattr(self, name)
|
---|
215 | attrs_dict[name] = value
|
---|
216 | authority = self._generate_authority(attrs_dict)
|
---|
217 | ref = self.reference.copy_with(
|
---|
218 | scheme=attrs_dict["scheme"],
|
---|
219 | authority=authority,
|
---|
220 | path=attrs_dict["path"],
|
---|
221 | query=attrs_dict["query"],
|
---|
222 | fragment=attrs_dict["fragment"],
|
---|
223 | )
|
---|
224 | return ParseResult(uri_ref=ref, encoding=self.encoding, **attrs_dict)
|
---|
225 |
|
---|
226 | def encode(self, encoding=None):
|
---|
227 | """Convert to an instance of ParseResultBytes."""
|
---|
228 | encoding = encoding or self.encoding
|
---|
229 | attrs = dict(
|
---|
230 | zip(
|
---|
231 | PARSED_COMPONENTS,
|
---|
232 | (
|
---|
233 | attr.encode(encoding) if hasattr(attr, "encode") else attr
|
---|
234 | for attr in self
|
---|
235 | ),
|
---|
236 | )
|
---|
237 | )
|
---|
238 | return ParseResultBytes(
|
---|
239 | uri_ref=self.reference, encoding=encoding, **attrs
|
---|
240 | )
|
---|
241 |
|
---|
242 | def unsplit(self, use_idna=False):
|
---|
243 | """Create a URI string from the components.
|
---|
244 |
|
---|
245 | :returns: The parsed URI reconstituted as a string.
|
---|
246 | :rtype: str
|
---|
247 | """
|
---|
248 | parse_result = self
|
---|
249 | if use_idna and self.host:
|
---|
250 | hostbytes = self.host.encode("idna")
|
---|
251 | host = hostbytes.decode(self.encoding)
|
---|
252 | parse_result = self.copy_with(host=host)
|
---|
253 | return parse_result.reference.unsplit()
|
---|
254 |
|
---|
255 |
|
---|
256 | class ParseResultBytes(
|
---|
257 | namedtuple("ParseResultBytes", PARSED_COMPONENTS), ParseResultMixin
|
---|
258 | ):
|
---|
259 | """Compatibility shim for the urlparse.ParseResultBytes object."""
|
---|
260 |
|
---|
261 | def __new__(
|
---|
262 | cls,
|
---|
263 | scheme,
|
---|
264 | userinfo,
|
---|
265 | host,
|
---|
266 | port,
|
---|
267 | path,
|
---|
268 | query,
|
---|
269 | fragment,
|
---|
270 | uri_ref,
|
---|
271 | encoding="utf-8",
|
---|
272 | lazy_normalize=True,
|
---|
273 | ):
|
---|
274 | """Create a new ParseResultBytes instance."""
|
---|
275 | parse_result = super(ParseResultBytes, cls).__new__(
|
---|
276 | cls,
|
---|
277 | scheme or None,
|
---|
278 | userinfo or None,
|
---|
279 | host,
|
---|
280 | port or None,
|
---|
281 | path or None,
|
---|
282 | query or None,
|
---|
283 | fragment or None,
|
---|
284 | )
|
---|
285 | parse_result.encoding = encoding
|
---|
286 | parse_result.reference = uri_ref
|
---|
287 | parse_result.lazy_normalize = lazy_normalize
|
---|
288 | return parse_result
|
---|
289 |
|
---|
290 | @classmethod
|
---|
291 | def from_parts(
|
---|
292 | cls,
|
---|
293 | scheme=None,
|
---|
294 | userinfo=None,
|
---|
295 | host=None,
|
---|
296 | port=None,
|
---|
297 | path=None,
|
---|
298 | query=None,
|
---|
299 | fragment=None,
|
---|
300 | encoding="utf-8",
|
---|
301 | lazy_normalize=True,
|
---|
302 | ):
|
---|
303 | """Create a ParseResult instance from its parts."""
|
---|
304 | authority = ""
|
---|
305 | if userinfo is not None:
|
---|
306 | authority += userinfo + "@"
|
---|
307 | if host is not None:
|
---|
308 | authority += host
|
---|
309 | if port is not None:
|
---|
310 | authority += ":{0}".format(int(port))
|
---|
311 | uri_ref = uri.URIReference(
|
---|
312 | scheme=scheme,
|
---|
313 | authority=authority,
|
---|
314 | path=path,
|
---|
315 | query=query,
|
---|
316 | fragment=fragment,
|
---|
317 | encoding=encoding,
|
---|
318 | )
|
---|
319 | if not lazy_normalize:
|
---|
320 | uri_ref = uri_ref.normalize()
|
---|
321 | to_bytes = compat.to_bytes
|
---|
322 | userinfo, host, port = authority_from(uri_ref, strict=True)
|
---|
323 | return cls(
|
---|
324 | scheme=to_bytes(scheme, encoding),
|
---|
325 | userinfo=to_bytes(userinfo, encoding),
|
---|
326 | host=to_bytes(host, encoding),
|
---|
327 | port=port,
|
---|
328 | path=to_bytes(path, encoding),
|
---|
329 | query=to_bytes(query, encoding),
|
---|
330 | fragment=to_bytes(fragment, encoding),
|
---|
331 | uri_ref=uri_ref,
|
---|
332 | encoding=encoding,
|
---|
333 | lazy_normalize=lazy_normalize,
|
---|
334 | )
|
---|
335 |
|
---|
336 | @classmethod
|
---|
337 | def from_string(
|
---|
338 | cls, uri_string, encoding="utf-8", strict=True, lazy_normalize=True
|
---|
339 | ):
|
---|
340 | """Parse a URI from the given unicode URI string.
|
---|
341 |
|
---|
342 | :param str uri_string: Unicode URI to be parsed into a reference.
|
---|
343 | :param str encoding: The encoding of the string provided
|
---|
344 | :param bool strict: Parse strictly according to :rfc:`3986` if True.
|
---|
345 | If False, parse similarly to the standard library's urlparse
|
---|
346 | function.
|
---|
347 | :returns: :class:`ParseResultBytes` or subclass thereof
|
---|
348 | """
|
---|
349 | reference = uri.URIReference.from_string(uri_string, encoding)
|
---|
350 | if not lazy_normalize:
|
---|
351 | reference = reference.normalize()
|
---|
352 | userinfo, host, port = authority_from(reference, strict)
|
---|
353 |
|
---|
354 | to_bytes = compat.to_bytes
|
---|
355 | return cls(
|
---|
356 | scheme=to_bytes(reference.scheme, encoding),
|
---|
357 | userinfo=to_bytes(userinfo, encoding),
|
---|
358 | host=to_bytes(host, encoding),
|
---|
359 | port=port,
|
---|
360 | path=to_bytes(reference.path, encoding),
|
---|
361 | query=to_bytes(reference.query, encoding),
|
---|
362 | fragment=to_bytes(reference.fragment, encoding),
|
---|
363 | uri_ref=reference,
|
---|
364 | encoding=encoding,
|
---|
365 | lazy_normalize=lazy_normalize,
|
---|
366 | )
|
---|
367 |
|
---|
368 | @property
|
---|
369 | def authority(self):
|
---|
370 | """Return the normalized authority."""
|
---|
371 | return self.reference.authority.encode(self.encoding)
|
---|
372 |
|
---|
373 | def copy_with(
|
---|
374 | self,
|
---|
375 | scheme=misc.UseExisting,
|
---|
376 | userinfo=misc.UseExisting,
|
---|
377 | host=misc.UseExisting,
|
---|
378 | port=misc.UseExisting,
|
---|
379 | path=misc.UseExisting,
|
---|
380 | query=misc.UseExisting,
|
---|
381 | fragment=misc.UseExisting,
|
---|
382 | lazy_normalize=True,
|
---|
383 | ):
|
---|
384 | """Create a copy of this instance replacing with specified parts."""
|
---|
385 | attributes = zip(
|
---|
386 | PARSED_COMPONENTS,
|
---|
387 | (scheme, userinfo, host, port, path, query, fragment),
|
---|
388 | )
|
---|
389 | attrs_dict = {}
|
---|
390 | for name, value in attributes:
|
---|
391 | if value is misc.UseExisting:
|
---|
392 | value = getattr(self, name)
|
---|
393 | if not isinstance(value, bytes) and hasattr(value, "encode"):
|
---|
394 | value = value.encode(self.encoding)
|
---|
395 | attrs_dict[name] = value
|
---|
396 | authority = self._generate_authority(attrs_dict)
|
---|
397 | to_str = compat.to_str
|
---|
398 | ref = self.reference.copy_with(
|
---|
399 | scheme=to_str(attrs_dict["scheme"], self.encoding),
|
---|
400 | authority=to_str(authority, self.encoding),
|
---|
401 | path=to_str(attrs_dict["path"], self.encoding),
|
---|
402 | query=to_str(attrs_dict["query"], self.encoding),
|
---|
403 | fragment=to_str(attrs_dict["fragment"], self.encoding),
|
---|
404 | )
|
---|
405 | if not lazy_normalize:
|
---|
406 | ref = ref.normalize()
|
---|
407 | return ParseResultBytes(
|
---|
408 | uri_ref=ref,
|
---|
409 | encoding=self.encoding,
|
---|
410 | lazy_normalize=lazy_normalize,
|
---|
411 | **attrs_dict
|
---|
412 | )
|
---|
413 |
|
---|
414 | def unsplit(self, use_idna=False):
|
---|
415 | """Create a URI bytes object from the components.
|
---|
416 |
|
---|
417 | :returns: The parsed URI reconstituted as a string.
|
---|
418 | :rtype: bytes
|
---|
419 | """
|
---|
420 | parse_result = self
|
---|
421 | if use_idna and self.host:
|
---|
422 | # self.host is bytes, to encode to idna, we need to decode it
|
---|
423 | # first
|
---|
424 | host = self.host.decode(self.encoding)
|
---|
425 | hostbytes = host.encode("idna")
|
---|
426 | parse_result = self.copy_with(host=hostbytes)
|
---|
427 | if self.lazy_normalize:
|
---|
428 | parse_result = parse_result.copy_with(lazy_normalize=False)
|
---|
429 | uri = parse_result.reference.unsplit()
|
---|
430 | return uri.encode(self.encoding)
|
---|
431 |
|
---|
432 |
|
---|
433 | def split_authority(authority):
|
---|
434 | # Initialize our expected return values
|
---|
435 | userinfo = host = port = None
|
---|
436 | # Initialize an extra var we may need to use
|
---|
437 | extra_host = None
|
---|
438 | # Set-up rest in case there is no userinfo portion
|
---|
439 | rest = authority
|
---|
440 |
|
---|
441 | if "@" in authority:
|
---|
442 | userinfo, rest = authority.rsplit("@", 1)
|
---|
443 |
|
---|
444 | # Handle IPv6 host addresses
|
---|
445 | if rest.startswith("["):
|
---|
446 | host, rest = rest.split("]", 1)
|
---|
447 | host += "]"
|
---|
448 |
|
---|
449 | if ":" in rest:
|
---|
450 | extra_host, port = rest.split(":", 1)
|
---|
451 | elif not host and rest:
|
---|
452 | host = rest
|
---|
453 |
|
---|
454 | if extra_host and not host:
|
---|
455 | host = extra_host
|
---|
456 |
|
---|
457 | return userinfo, host, port
|
---|
458 |
|
---|
459 |
|
---|
460 | def authority_from(reference, strict):
|
---|
461 | try:
|
---|
462 | subauthority = reference.authority_info()
|
---|
463 | except exceptions.InvalidAuthority:
|
---|
464 | if strict:
|
---|
465 | raise
|
---|
466 | userinfo, host, port = split_authority(reference.authority)
|
---|
467 | else:
|
---|
468 | # Thanks to Richard Barrell for this idea:
|
---|
469 | # https://twitter.com/0x2ba22e11/status/617338811975139328
|
---|
470 | userinfo, host, port = (
|
---|
471 | subauthority.get(p) for p in ("userinfo", "host", "port")
|
---|
472 | )
|
---|
473 |
|
---|
474 | if port:
|
---|
475 | try:
|
---|
476 | port = int(port)
|
---|
477 | except ValueError:
|
---|
478 | raise exceptions.InvalidPort(port)
|
---|
479 | return userinfo, host, port
|
---|