1 | """Module containing the implementation of the IRIReference class."""
|
---|
2 | # -*- coding: utf-8 -*-
|
---|
3 | # Copyright (c) 2014 Rackspace
|
---|
4 | # Copyright (c) 2015 Ian Stapleton Cordasco
|
---|
5 | # Licensed under the Apache License, Version 2.0 (the "License");
|
---|
6 | # you may not use this file except in compliance with the License.
|
---|
7 | # You may obtain a copy of the License at
|
---|
8 | #
|
---|
9 | # http://www.apache.org/licenses/LICENSE-2.0
|
---|
10 | #
|
---|
11 | # Unless required by applicable law or agreed to in writing, software
|
---|
12 | # distributed under the License is distributed on an "AS IS" BASIS,
|
---|
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
---|
14 | # implied.
|
---|
15 | # See the License for the specific language governing permissions and
|
---|
16 | # limitations under the License.
|
---|
17 | from collections import namedtuple
|
---|
18 |
|
---|
19 | from . import compat
|
---|
20 | from . import exceptions
|
---|
21 | from . import misc
|
---|
22 | from . import normalizers
|
---|
23 | from . import uri
|
---|
24 |
|
---|
25 |
|
---|
26 | try:
|
---|
27 | import idna
|
---|
28 | except ImportError: # pragma: no cover
|
---|
29 | idna = None
|
---|
30 |
|
---|
31 |
|
---|
32 | class IRIReference(
|
---|
33 | namedtuple("IRIReference", misc.URI_COMPONENTS), uri.URIMixin
|
---|
34 | ):
|
---|
35 | """Immutable object representing a parsed IRI Reference.
|
---|
36 |
|
---|
37 | Can be encoded into an URIReference object via the procedure
|
---|
38 | specified in RFC 3987 Section 3.1
|
---|
39 |
|
---|
40 | .. note::
|
---|
41 | The IRI submodule is a new interface and may possibly change in
|
---|
42 | the future. Check for changes to the interface when upgrading.
|
---|
43 | """
|
---|
44 |
|
---|
45 | slots = ()
|
---|
46 |
|
---|
47 | def __new__(
|
---|
48 | cls, scheme, authority, path, query, fragment, encoding="utf-8"
|
---|
49 | ):
|
---|
50 | """Create a new IRIReference."""
|
---|
51 | ref = super(IRIReference, cls).__new__(
|
---|
52 | cls,
|
---|
53 | scheme or None,
|
---|
54 | authority or None,
|
---|
55 | path or None,
|
---|
56 | query,
|
---|
57 | fragment,
|
---|
58 | )
|
---|
59 | ref.encoding = encoding
|
---|
60 | return ref
|
---|
61 |
|
---|
62 | def __eq__(self, other):
|
---|
63 | """Compare this reference to another."""
|
---|
64 | other_ref = other
|
---|
65 | if isinstance(other, tuple):
|
---|
66 | other_ref = self.__class__(*other)
|
---|
67 | elif not isinstance(other, IRIReference):
|
---|
68 | try:
|
---|
69 | other_ref = self.__class__.from_string(other)
|
---|
70 | except TypeError:
|
---|
71 | raise TypeError(
|
---|
72 | "Unable to compare {0}() to {1}()".format(
|
---|
73 | type(self).__name__, type(other).__name__
|
---|
74 | )
|
---|
75 | )
|
---|
76 |
|
---|
77 | # See http://tools.ietf.org/html/rfc3986#section-6.2
|
---|
78 | return tuple(self) == tuple(other_ref)
|
---|
79 |
|
---|
80 | def _match_subauthority(self):
|
---|
81 | return misc.ISUBAUTHORITY_MATCHER.match(self.authority)
|
---|
82 |
|
---|
83 | @classmethod
|
---|
84 | def from_string(cls, iri_string, encoding="utf-8"):
|
---|
85 | """Parse a IRI reference from the given unicode IRI string.
|
---|
86 |
|
---|
87 | :param str iri_string: Unicode IRI to be parsed into a reference.
|
---|
88 | :param str encoding: The encoding of the string provided
|
---|
89 | :returns: :class:`IRIReference` or subclass thereof
|
---|
90 | """
|
---|
91 | iri_string = compat.to_str(iri_string, encoding)
|
---|
92 |
|
---|
93 | split_iri = misc.IRI_MATCHER.match(iri_string).groupdict()
|
---|
94 | return cls(
|
---|
95 | split_iri["scheme"],
|
---|
96 | split_iri["authority"],
|
---|
97 | normalizers.encode_component(split_iri["path"], encoding),
|
---|
98 | normalizers.encode_component(split_iri["query"], encoding),
|
---|
99 | normalizers.encode_component(split_iri["fragment"], encoding),
|
---|
100 | encoding,
|
---|
101 | )
|
---|
102 |
|
---|
103 | def encode(self, idna_encoder=None): # noqa: C901
|
---|
104 | """Encode an IRIReference into a URIReference instance.
|
---|
105 |
|
---|
106 | If the ``idna`` module is installed or the ``rfc3986[idna]``
|
---|
107 | extra is used then unicode characters in the IRI host
|
---|
108 | component will be encoded with IDNA2008.
|
---|
109 |
|
---|
110 | :param idna_encoder:
|
---|
111 | Function that encodes each part of the host component
|
---|
112 | If not given will raise an exception if the IRI
|
---|
113 | contains a host component.
|
---|
114 | :rtype: uri.URIReference
|
---|
115 | :returns: A URI reference
|
---|
116 | """
|
---|
117 | authority = self.authority
|
---|
118 | if authority:
|
---|
119 | if idna_encoder is None:
|
---|
120 | if idna is None: # pragma: no cover
|
---|
121 | raise exceptions.MissingDependencyError(
|
---|
122 | "Could not import the 'idna' module "
|
---|
123 | "and the IRI hostname requires encoding"
|
---|
124 | )
|
---|
125 |
|
---|
126 | def idna_encoder(name):
|
---|
127 | if any(ord(c) > 128 for c in name):
|
---|
128 | try:
|
---|
129 | return idna.encode(
|
---|
130 | name.lower(), strict=True, std3_rules=True
|
---|
131 | )
|
---|
132 | except idna.IDNAError:
|
---|
133 | raise exceptions.InvalidAuthority(self.authority)
|
---|
134 | return name
|
---|
135 |
|
---|
136 | authority = ""
|
---|
137 | if self.host:
|
---|
138 | authority = ".".join(
|
---|
139 | [
|
---|
140 | compat.to_str(idna_encoder(part))
|
---|
141 | for part in self.host.split(".")
|
---|
142 | ]
|
---|
143 | )
|
---|
144 |
|
---|
145 | if self.userinfo is not None:
|
---|
146 | authority = (
|
---|
147 | normalizers.encode_component(self.userinfo, self.encoding)
|
---|
148 | + "@"
|
---|
149 | + authority
|
---|
150 | )
|
---|
151 |
|
---|
152 | if self.port is not None:
|
---|
153 | authority += ":" + str(self.port)
|
---|
154 |
|
---|
155 | return uri.URIReference(
|
---|
156 | self.scheme,
|
---|
157 | authority,
|
---|
158 | path=self.path,
|
---|
159 | query=self.query,
|
---|
160 | fragment=self.fragment,
|
---|
161 | encoding=self.encoding,
|
---|
162 | )
|
---|