source: uri/rfc3986/iri.py@ 364

Last change on this file since 364 was 230, checked in by wouter, 4 years ago

#91 clone https://pypi.org/project/rfc3986/

File size: 5.4 KB
Line 
1"""Module containing the implementation of the IRIReference class."""
2# -*- coding: utf-8 -*-
3# Copyright (c) 2014 Rackspace
4# Copyright (c) 2015 Ian Stapleton Cordasco
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
14# implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17from collections import namedtuple
18
19from . import compat
20from . import exceptions
21from . import misc
22from . import normalizers
23from . import uri
24
25
26try:
27 import idna
28except ImportError: # pragma: no cover
29 idna = None
30
31
32class IRIReference(
33 namedtuple("IRIReference", misc.URI_COMPONENTS), uri.URIMixin
34):
35 """Immutable object representing a parsed IRI Reference.
36
37 Can be encoded into an URIReference object via the procedure
38 specified in RFC 3987 Section 3.1
39
40 .. note::
41 The IRI submodule is a new interface and may possibly change in
42 the future. Check for changes to the interface when upgrading.
43 """
44
45 slots = ()
46
47 def __new__(
48 cls, scheme, authority, path, query, fragment, encoding="utf-8"
49 ):
50 """Create a new IRIReference."""
51 ref = super(IRIReference, cls).__new__(
52 cls,
53 scheme or None,
54 authority or None,
55 path or None,
56 query,
57 fragment,
58 )
59 ref.encoding = encoding
60 return ref
61
62 def __eq__(self, other):
63 """Compare this reference to another."""
64 other_ref = other
65 if isinstance(other, tuple):
66 other_ref = self.__class__(*other)
67 elif not isinstance(other, IRIReference):
68 try:
69 other_ref = self.__class__.from_string(other)
70 except TypeError:
71 raise TypeError(
72 "Unable to compare {0}() to {1}()".format(
73 type(self).__name__, type(other).__name__
74 )
75 )
76
77 # See http://tools.ietf.org/html/rfc3986#section-6.2
78 return tuple(self) == tuple(other_ref)
79
80 def _match_subauthority(self):
81 return misc.ISUBAUTHORITY_MATCHER.match(self.authority)
82
83 @classmethod
84 def from_string(cls, iri_string, encoding="utf-8"):
85 """Parse a IRI reference from the given unicode IRI string.
86
87 :param str iri_string: Unicode IRI to be parsed into a reference.
88 :param str encoding: The encoding of the string provided
89 :returns: :class:`IRIReference` or subclass thereof
90 """
91 iri_string = compat.to_str(iri_string, encoding)
92
93 split_iri = misc.IRI_MATCHER.match(iri_string).groupdict()
94 return cls(
95 split_iri["scheme"],
96 split_iri["authority"],
97 normalizers.encode_component(split_iri["path"], encoding),
98 normalizers.encode_component(split_iri["query"], encoding),
99 normalizers.encode_component(split_iri["fragment"], encoding),
100 encoding,
101 )
102
103 def encode(self, idna_encoder=None): # noqa: C901
104 """Encode an IRIReference into a URIReference instance.
105
106 If the ``idna`` module is installed or the ``rfc3986[idna]``
107 extra is used then unicode characters in the IRI host
108 component will be encoded with IDNA2008.
109
110 :param idna_encoder:
111 Function that encodes each part of the host component
112 If not given will raise an exception if the IRI
113 contains a host component.
114 :rtype: uri.URIReference
115 :returns: A URI reference
116 """
117 authority = self.authority
118 if authority:
119 if idna_encoder is None:
120 if idna is None: # pragma: no cover
121 raise exceptions.MissingDependencyError(
122 "Could not import the 'idna' module "
123 "and the IRI hostname requires encoding"
124 )
125
126 def idna_encoder(name):
127 if any(ord(c) > 128 for c in name):
128 try:
129 return idna.encode(
130 name.lower(), strict=True, std3_rules=True
131 )
132 except idna.IDNAError:
133 raise exceptions.InvalidAuthority(self.authority)
134 return name
135
136 authority = ""
137 if self.host:
138 authority = ".".join(
139 [
140 compat.to_str(idna_encoder(part))
141 for part in self.host.split(".")
142 ]
143 )
144
145 if self.userinfo is not None:
146 authority = (
147 normalizers.encode_component(self.userinfo, self.encoding)
148 + "@"
149 + authority
150 )
151
152 if self.port is not None:
153 authority += ":" + str(self.port)
154
155 return uri.URIReference(
156 self.scheme,
157 authority,
158 path=self.path,
159 query=self.query,
160 fragment=self.fragment,
161 encoding=self.encoding,
162 )
Note: See TracBrowser for help on using the repository browser.