source: uri/rfc3986/parseresult.py@ 1351

Last change on this file since 1351 was 230, checked in by wouter, 3 years ago

#91 clone https://pypi.org/project/rfc3986/

File size: 14.4 KB
Line 
1# -*- coding: utf-8 -*-
2# Copyright (c) 2015 Ian Stapleton Cordasco
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
12# implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15"""Module containing the urlparse compatibility logic."""
16from collections import namedtuple
17
18from . import compat
19from . import exceptions
20from . import misc
21from . import normalizers
22from . import uri
23
24__all__ = ("ParseResult", "ParseResultBytes")
25
26PARSED_COMPONENTS = (
27 "scheme",
28 "userinfo",
29 "host",
30 "port",
31 "path",
32 "query",
33 "fragment",
34)
35
36
37class ParseResultMixin(object):
38 def _generate_authority(self, attributes):
39 # I swear I did not align the comparisons below. That's just how they
40 # happened to align based on pep8 and attribute lengths.
41 userinfo, host, port = (
42 attributes[p] for p in ("userinfo", "host", "port")
43 )
44 if (
45 self.userinfo != userinfo
46 or self.host != host
47 or self.port != port
48 ):
49 if port:
50 port = "{0}".format(port)
51 return normalizers.normalize_authority(
52 (
53 compat.to_str(userinfo, self.encoding),
54 compat.to_str(host, self.encoding),
55 port,
56 )
57 )
58 if isinstance(self.authority, bytes):
59 return self.authority.decode("utf-8")
60 return self.authority
61
62 def geturl(self):
63 """Shim to match the standard library method."""
64 return self.unsplit()
65
66 @property
67 def hostname(self):
68 """Shim to match the standard library."""
69 return self.host
70
71 @property
72 def netloc(self):
73 """Shim to match the standard library."""
74 return self.authority
75
76 @property
77 def params(self):
78 """Shim to match the standard library."""
79 return self.query
80
81
82class ParseResult(
83 namedtuple("ParseResult", PARSED_COMPONENTS), ParseResultMixin
84):
85 """Implementation of urlparse compatibility class.
86
87 This uses the URIReference logic to handle compatibility with the
88 urlparse.ParseResult class.
89 """
90
91 slots = ()
92
93 def __new__(
94 cls,
95 scheme,
96 userinfo,
97 host,
98 port,
99 path,
100 query,
101 fragment,
102 uri_ref,
103 encoding="utf-8",
104 ):
105 """Create a new ParseResult."""
106 parse_result = super(ParseResult, cls).__new__(
107 cls,
108 scheme or None,
109 userinfo or None,
110 host,
111 port or None,
112 path or None,
113 query,
114 fragment,
115 )
116 parse_result.encoding = encoding
117 parse_result.reference = uri_ref
118 return parse_result
119
120 @classmethod
121 def from_parts(
122 cls,
123 scheme=None,
124 userinfo=None,
125 host=None,
126 port=None,
127 path=None,
128 query=None,
129 fragment=None,
130 encoding="utf-8",
131 ):
132 """Create a ParseResult instance from its parts."""
133 authority = ""
134 if userinfo is not None:
135 authority += userinfo + "@"
136 if host is not None:
137 authority += host
138 if port is not None:
139 authority += ":{0}".format(port)
140 uri_ref = uri.URIReference(
141 scheme=scheme,
142 authority=authority,
143 path=path,
144 query=query,
145 fragment=fragment,
146 encoding=encoding,
147 ).normalize()
148 userinfo, host, port = authority_from(uri_ref, strict=True)
149 return cls(
150 scheme=uri_ref.scheme,
151 userinfo=userinfo,
152 host=host,
153 port=port,
154 path=uri_ref.path,
155 query=uri_ref.query,
156 fragment=uri_ref.fragment,
157 uri_ref=uri_ref,
158 encoding=encoding,
159 )
160
161 @classmethod
162 def from_string(
163 cls, uri_string, encoding="utf-8", strict=True, lazy_normalize=True
164 ):
165 """Parse a URI from the given unicode URI string.
166
167 :param str uri_string: Unicode URI to be parsed into a reference.
168 :param str encoding: The encoding of the string provided
169 :param bool strict: Parse strictly according to :rfc:`3986` if True.
170 If False, parse similarly to the standard library's urlparse
171 function.
172 :returns: :class:`ParseResult` or subclass thereof
173 """
174 reference = uri.URIReference.from_string(uri_string, encoding)
175 if not lazy_normalize:
176 reference = reference.normalize()
177 userinfo, host, port = authority_from(reference, strict)
178
179 return cls(
180 scheme=reference.scheme,
181 userinfo=userinfo,
182 host=host,
183 port=port,
184 path=reference.path,
185 query=reference.query,
186 fragment=reference.fragment,
187 uri_ref=reference,
188 encoding=encoding,
189 )
190
191 @property
192 def authority(self):
193 """Return the normalized authority."""
194 return self.reference.authority
195
196 def copy_with(
197 self,
198 scheme=misc.UseExisting,
199 userinfo=misc.UseExisting,
200 host=misc.UseExisting,
201 port=misc.UseExisting,
202 path=misc.UseExisting,
203 query=misc.UseExisting,
204 fragment=misc.UseExisting,
205 ):
206 """Create a copy of this instance replacing with specified parts."""
207 attributes = zip(
208 PARSED_COMPONENTS,
209 (scheme, userinfo, host, port, path, query, fragment),
210 )
211 attrs_dict = {}
212 for name, value in attributes:
213 if value is misc.UseExisting:
214 value = getattr(self, name)
215 attrs_dict[name] = value
216 authority = self._generate_authority(attrs_dict)
217 ref = self.reference.copy_with(
218 scheme=attrs_dict["scheme"],
219 authority=authority,
220 path=attrs_dict["path"],
221 query=attrs_dict["query"],
222 fragment=attrs_dict["fragment"],
223 )
224 return ParseResult(uri_ref=ref, encoding=self.encoding, **attrs_dict)
225
226 def encode(self, encoding=None):
227 """Convert to an instance of ParseResultBytes."""
228 encoding = encoding or self.encoding
229 attrs = dict(
230 zip(
231 PARSED_COMPONENTS,
232 (
233 attr.encode(encoding) if hasattr(attr, "encode") else attr
234 for attr in self
235 ),
236 )
237 )
238 return ParseResultBytes(
239 uri_ref=self.reference, encoding=encoding, **attrs
240 )
241
242 def unsplit(self, use_idna=False):
243 """Create a URI string from the components.
244
245 :returns: The parsed URI reconstituted as a string.
246 :rtype: str
247 """
248 parse_result = self
249 if use_idna and self.host:
250 hostbytes = self.host.encode("idna")
251 host = hostbytes.decode(self.encoding)
252 parse_result = self.copy_with(host=host)
253 return parse_result.reference.unsplit()
254
255
256class ParseResultBytes(
257 namedtuple("ParseResultBytes", PARSED_COMPONENTS), ParseResultMixin
258):
259 """Compatibility shim for the urlparse.ParseResultBytes object."""
260
261 def __new__(
262 cls,
263 scheme,
264 userinfo,
265 host,
266 port,
267 path,
268 query,
269 fragment,
270 uri_ref,
271 encoding="utf-8",
272 lazy_normalize=True,
273 ):
274 """Create a new ParseResultBytes instance."""
275 parse_result = super(ParseResultBytes, cls).__new__(
276 cls,
277 scheme or None,
278 userinfo or None,
279 host,
280 port or None,
281 path or None,
282 query or None,
283 fragment or None,
284 )
285 parse_result.encoding = encoding
286 parse_result.reference = uri_ref
287 parse_result.lazy_normalize = lazy_normalize
288 return parse_result
289
290 @classmethod
291 def from_parts(
292 cls,
293 scheme=None,
294 userinfo=None,
295 host=None,
296 port=None,
297 path=None,
298 query=None,
299 fragment=None,
300 encoding="utf-8",
301 lazy_normalize=True,
302 ):
303 """Create a ParseResult instance from its parts."""
304 authority = ""
305 if userinfo is not None:
306 authority += userinfo + "@"
307 if host is not None:
308 authority += host
309 if port is not None:
310 authority += ":{0}".format(int(port))
311 uri_ref = uri.URIReference(
312 scheme=scheme,
313 authority=authority,
314 path=path,
315 query=query,
316 fragment=fragment,
317 encoding=encoding,
318 )
319 if not lazy_normalize:
320 uri_ref = uri_ref.normalize()
321 to_bytes = compat.to_bytes
322 userinfo, host, port = authority_from(uri_ref, strict=True)
323 return cls(
324 scheme=to_bytes(scheme, encoding),
325 userinfo=to_bytes(userinfo, encoding),
326 host=to_bytes(host, encoding),
327 port=port,
328 path=to_bytes(path, encoding),
329 query=to_bytes(query, encoding),
330 fragment=to_bytes(fragment, encoding),
331 uri_ref=uri_ref,
332 encoding=encoding,
333 lazy_normalize=lazy_normalize,
334 )
335
336 @classmethod
337 def from_string(
338 cls, uri_string, encoding="utf-8", strict=True, lazy_normalize=True
339 ):
340 """Parse a URI from the given unicode URI string.
341
342 :param str uri_string: Unicode URI to be parsed into a reference.
343 :param str encoding: The encoding of the string provided
344 :param bool strict: Parse strictly according to :rfc:`3986` if True.
345 If False, parse similarly to the standard library's urlparse
346 function.
347 :returns: :class:`ParseResultBytes` or subclass thereof
348 """
349 reference = uri.URIReference.from_string(uri_string, encoding)
350 if not lazy_normalize:
351 reference = reference.normalize()
352 userinfo, host, port = authority_from(reference, strict)
353
354 to_bytes = compat.to_bytes
355 return cls(
356 scheme=to_bytes(reference.scheme, encoding),
357 userinfo=to_bytes(userinfo, encoding),
358 host=to_bytes(host, encoding),
359 port=port,
360 path=to_bytes(reference.path, encoding),
361 query=to_bytes(reference.query, encoding),
362 fragment=to_bytes(reference.fragment, encoding),
363 uri_ref=reference,
364 encoding=encoding,
365 lazy_normalize=lazy_normalize,
366 )
367
368 @property
369 def authority(self):
370 """Return the normalized authority."""
371 return self.reference.authority.encode(self.encoding)
372
373 def copy_with(
374 self,
375 scheme=misc.UseExisting,
376 userinfo=misc.UseExisting,
377 host=misc.UseExisting,
378 port=misc.UseExisting,
379 path=misc.UseExisting,
380 query=misc.UseExisting,
381 fragment=misc.UseExisting,
382 lazy_normalize=True,
383 ):
384 """Create a copy of this instance replacing with specified parts."""
385 attributes = zip(
386 PARSED_COMPONENTS,
387 (scheme, userinfo, host, port, path, query, fragment),
388 )
389 attrs_dict = {}
390 for name, value in attributes:
391 if value is misc.UseExisting:
392 value = getattr(self, name)
393 if not isinstance(value, bytes) and hasattr(value, "encode"):
394 value = value.encode(self.encoding)
395 attrs_dict[name] = value
396 authority = self._generate_authority(attrs_dict)
397 to_str = compat.to_str
398 ref = self.reference.copy_with(
399 scheme=to_str(attrs_dict["scheme"], self.encoding),
400 authority=to_str(authority, self.encoding),
401 path=to_str(attrs_dict["path"], self.encoding),
402 query=to_str(attrs_dict["query"], self.encoding),
403 fragment=to_str(attrs_dict["fragment"], self.encoding),
404 )
405 if not lazy_normalize:
406 ref = ref.normalize()
407 return ParseResultBytes(
408 uri_ref=ref,
409 encoding=self.encoding,
410 lazy_normalize=lazy_normalize,
411 **attrs_dict
412 )
413
414 def unsplit(self, use_idna=False):
415 """Create a URI bytes object from the components.
416
417 :returns: The parsed URI reconstituted as a string.
418 :rtype: bytes
419 """
420 parse_result = self
421 if use_idna and self.host:
422 # self.host is bytes, to encode to idna, we need to decode it
423 # first
424 host = self.host.decode(self.encoding)
425 hostbytes = host.encode("idna")
426 parse_result = self.copy_with(host=hostbytes)
427 if self.lazy_normalize:
428 parse_result = parse_result.copy_with(lazy_normalize=False)
429 uri = parse_result.reference.unsplit()
430 return uri.encode(self.encoding)
431
432
433def split_authority(authority):
434 # Initialize our expected return values
435 userinfo = host = port = None
436 # Initialize an extra var we may need to use
437 extra_host = None
438 # Set-up rest in case there is no userinfo portion
439 rest = authority
440
441 if "@" in authority:
442 userinfo, rest = authority.rsplit("@", 1)
443
444 # Handle IPv6 host addresses
445 if rest.startswith("["):
446 host, rest = rest.split("]", 1)
447 host += "]"
448
449 if ":" in rest:
450 extra_host, port = rest.split(":", 1)
451 elif not host and rest:
452 host = rest
453
454 if extra_host and not host:
455 host = extra_host
456
457 return userinfo, host, port
458
459
460def authority_from(reference, strict):
461 try:
462 subauthority = reference.authority_info()
463 except exceptions.InvalidAuthority:
464 if strict:
465 raise
466 userinfo, host, port = split_authority(reference.authority)
467 else:
468 # Thanks to Richard Barrell for this idea:
469 # https://twitter.com/0x2ba22e11/status/617338811975139328
470 userinfo, host, port = (
471 subauthority.get(p) for p in ("userinfo", "host", "port")
472 )
473
474 if port:
475 try:
476 port = int(port)
477 except ValueError:
478 raise exceptions.InvalidPort(port)
479 return userinfo, host, port
Note: See TracBrowser for help on using the repository browser.