[230] | 1 | """Module containing the implementation of the IRIReference class."""
|
---|
| 2 | # -*- coding: utf-8 -*-
|
---|
| 3 | # Copyright (c) 2014 Rackspace
|
---|
| 4 | # Copyright (c) 2015 Ian Stapleton Cordasco
|
---|
| 5 | # Licensed under the Apache License, Version 2.0 (the "License");
|
---|
| 6 | # you may not use this file except in compliance with the License.
|
---|
| 7 | # You may obtain a copy of the License at
|
---|
| 8 | #
|
---|
| 9 | # http://www.apache.org/licenses/LICENSE-2.0
|
---|
| 10 | #
|
---|
| 11 | # Unless required by applicable law or agreed to in writing, software
|
---|
| 12 | # distributed under the License is distributed on an "AS IS" BASIS,
|
---|
| 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
---|
| 14 | # implied.
|
---|
| 15 | # See the License for the specific language governing permissions and
|
---|
| 16 | # limitations under the License.
|
---|
| 17 | from collections import namedtuple
|
---|
| 18 |
|
---|
| 19 | from . import compat
|
---|
| 20 | from . import exceptions
|
---|
| 21 | from . import misc
|
---|
| 22 | from . import normalizers
|
---|
| 23 | from . import uri
|
---|
| 24 |
|
---|
| 25 |
|
---|
| 26 | try:
|
---|
| 27 | import idna
|
---|
| 28 | except ImportError: # pragma: no cover
|
---|
| 29 | idna = None
|
---|
| 30 |
|
---|
| 31 |
|
---|
| 32 | class IRIReference(
|
---|
| 33 | namedtuple("IRIReference", misc.URI_COMPONENTS), uri.URIMixin
|
---|
| 34 | ):
|
---|
| 35 | """Immutable object representing a parsed IRI Reference.
|
---|
| 36 |
|
---|
| 37 | Can be encoded into an URIReference object via the procedure
|
---|
| 38 | specified in RFC 3987 Section 3.1
|
---|
| 39 |
|
---|
| 40 | .. note::
|
---|
| 41 | The IRI submodule is a new interface and may possibly change in
|
---|
| 42 | the future. Check for changes to the interface when upgrading.
|
---|
| 43 | """
|
---|
| 44 |
|
---|
| 45 | slots = ()
|
---|
| 46 |
|
---|
| 47 | def __new__(
|
---|
| 48 | cls, scheme, authority, path, query, fragment, encoding="utf-8"
|
---|
| 49 | ):
|
---|
| 50 | """Create a new IRIReference."""
|
---|
| 51 | ref = super(IRIReference, cls).__new__(
|
---|
| 52 | cls,
|
---|
| 53 | scheme or None,
|
---|
| 54 | authority or None,
|
---|
| 55 | path or None,
|
---|
| 56 | query,
|
---|
| 57 | fragment,
|
---|
| 58 | )
|
---|
| 59 | ref.encoding = encoding
|
---|
| 60 | return ref
|
---|
| 61 |
|
---|
| 62 | def __eq__(self, other):
|
---|
| 63 | """Compare this reference to another."""
|
---|
| 64 | other_ref = other
|
---|
| 65 | if isinstance(other, tuple):
|
---|
| 66 | other_ref = self.__class__(*other)
|
---|
| 67 | elif not isinstance(other, IRIReference):
|
---|
| 68 | try:
|
---|
| 69 | other_ref = self.__class__.from_string(other)
|
---|
| 70 | except TypeError:
|
---|
| 71 | raise TypeError(
|
---|
| 72 | "Unable to compare {0}() to {1}()".format(
|
---|
| 73 | type(self).__name__, type(other).__name__
|
---|
| 74 | )
|
---|
| 75 | )
|
---|
| 76 |
|
---|
| 77 | # See http://tools.ietf.org/html/rfc3986#section-6.2
|
---|
| 78 | return tuple(self) == tuple(other_ref)
|
---|
| 79 |
|
---|
| 80 | def _match_subauthority(self):
|
---|
| 81 | return misc.ISUBAUTHORITY_MATCHER.match(self.authority)
|
---|
| 82 |
|
---|
| 83 | @classmethod
|
---|
| 84 | def from_string(cls, iri_string, encoding="utf-8"):
|
---|
| 85 | """Parse a IRI reference from the given unicode IRI string.
|
---|
| 86 |
|
---|
| 87 | :param str iri_string: Unicode IRI to be parsed into a reference.
|
---|
| 88 | :param str encoding: The encoding of the string provided
|
---|
| 89 | :returns: :class:`IRIReference` or subclass thereof
|
---|
| 90 | """
|
---|
| 91 | iri_string = compat.to_str(iri_string, encoding)
|
---|
| 92 |
|
---|
| 93 | split_iri = misc.IRI_MATCHER.match(iri_string).groupdict()
|
---|
| 94 | return cls(
|
---|
| 95 | split_iri["scheme"],
|
---|
| 96 | split_iri["authority"],
|
---|
| 97 | normalizers.encode_component(split_iri["path"], encoding),
|
---|
| 98 | normalizers.encode_component(split_iri["query"], encoding),
|
---|
| 99 | normalizers.encode_component(split_iri["fragment"], encoding),
|
---|
| 100 | encoding,
|
---|
| 101 | )
|
---|
| 102 |
|
---|
| 103 | def encode(self, idna_encoder=None): # noqa: C901
|
---|
| 104 | """Encode an IRIReference into a URIReference instance.
|
---|
| 105 |
|
---|
| 106 | If the ``idna`` module is installed or the ``rfc3986[idna]``
|
---|
| 107 | extra is used then unicode characters in the IRI host
|
---|
| 108 | component will be encoded with IDNA2008.
|
---|
| 109 |
|
---|
| 110 | :param idna_encoder:
|
---|
| 111 | Function that encodes each part of the host component
|
---|
| 112 | If not given will raise an exception if the IRI
|
---|
| 113 | contains a host component.
|
---|
| 114 | :rtype: uri.URIReference
|
---|
| 115 | :returns: A URI reference
|
---|
| 116 | """
|
---|
| 117 | authority = self.authority
|
---|
| 118 | if authority:
|
---|
| 119 | if idna_encoder is None:
|
---|
| 120 | if idna is None: # pragma: no cover
|
---|
| 121 | raise exceptions.MissingDependencyError(
|
---|
| 122 | "Could not import the 'idna' module "
|
---|
| 123 | "and the IRI hostname requires encoding"
|
---|
| 124 | )
|
---|
| 125 |
|
---|
| 126 | def idna_encoder(name):
|
---|
| 127 | if any(ord(c) > 128 for c in name):
|
---|
| 128 | try:
|
---|
| 129 | return idna.encode(
|
---|
| 130 | name.lower(), strict=True, std3_rules=True
|
---|
| 131 | )
|
---|
| 132 | except idna.IDNAError:
|
---|
| 133 | raise exceptions.InvalidAuthority(self.authority)
|
---|
| 134 | return name
|
---|
| 135 |
|
---|
| 136 | authority = ""
|
---|
| 137 | if self.host:
|
---|
| 138 | authority = ".".join(
|
---|
| 139 | [
|
---|
| 140 | compat.to_str(idna_encoder(part))
|
---|
| 141 | for part in self.host.split(".")
|
---|
| 142 | ]
|
---|
| 143 | )
|
---|
| 144 |
|
---|
| 145 | if self.userinfo is not None:
|
---|
| 146 | authority = (
|
---|
| 147 | normalizers.encode_component(self.userinfo, self.encoding)
|
---|
| 148 | + "@"
|
---|
| 149 | + authority
|
---|
| 150 | )
|
---|
| 151 |
|
---|
| 152 | if self.port is not None:
|
---|
| 153 | authority += ":" + str(self.port)
|
---|
| 154 |
|
---|
| 155 | return uri.URIReference(
|
---|
| 156 | self.scheme,
|
---|
| 157 | authority,
|
---|
| 158 | path=self.path,
|
---|
| 159 | query=self.query,
|
---|
| 160 | fragment=self.fragment,
|
---|
| 161 | encoding=self.encoding,
|
---|
| 162 | )
|
---|