source: uri/rfc3986/misc.py@ 1473

Last change on this file since 1473 was 230, checked in by wouter, 4 years ago

#91 clone https://pypi.org/project/rfc3986/

File size: 4.1 KB
Line 
1# -*- coding: utf-8 -*-
2# Copyright (c) 2014 Rackspace
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
12# implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15"""
16Module containing compiled regular expressions and constants.
17
18This module contains important constants, patterns, and compiled regular
19expressions for parsing and validating URIs and their components.
20"""
21
22import re
23
24from . import abnf_regexp
25
26# These are enumerated for the named tuple used as a superclass of
27# URIReference
28URI_COMPONENTS = ["scheme", "authority", "path", "query", "fragment"]
29
30important_characters = {
31 "generic_delimiters": abnf_regexp.GENERIC_DELIMITERS,
32 "sub_delimiters": abnf_regexp.SUB_DELIMITERS,
33 # We need to escape the '*' in this case
34 "re_sub_delimiters": abnf_regexp.SUB_DELIMITERS_RE,
35 "unreserved_chars": abnf_regexp.UNRESERVED_CHARS,
36 # We need to escape the '-' in this case:
37 "re_unreserved": abnf_regexp.UNRESERVED_RE,
38}
39
40# For details about delimiters and reserved characters, see:
41# http://tools.ietf.org/html/rfc3986#section-2.2
42GENERIC_DELIMITERS = abnf_regexp.GENERIC_DELIMITERS_SET
43SUB_DELIMITERS = abnf_regexp.SUB_DELIMITERS_SET
44RESERVED_CHARS = abnf_regexp.RESERVED_CHARS_SET
45# For details about unreserved characters, see:
46# http://tools.ietf.org/html/rfc3986#section-2.3
47UNRESERVED_CHARS = abnf_regexp.UNRESERVED_CHARS_SET
48NON_PCT_ENCODED = abnf_regexp.NON_PCT_ENCODED_SET
49
50URI_MATCHER = re.compile(abnf_regexp.URL_PARSING_RE)
51
52SUBAUTHORITY_MATCHER = re.compile(
53 (
54 "^(?:(?P<userinfo>{0})@)?" # userinfo
55 "(?P<host>{1})" # host
56 ":?(?P<port>{2})?$" # port
57 ).format(
58 abnf_regexp.USERINFO_RE, abnf_regexp.HOST_PATTERN, abnf_regexp.PORT_RE
59 )
60)
61
62
63HOST_MATCHER = re.compile("^" + abnf_regexp.HOST_RE + "$")
64IPv4_MATCHER = re.compile("^" + abnf_regexp.IPv4_RE + "$")
65IPv6_MATCHER = re.compile(r"^\[" + abnf_regexp.IPv6_ADDRZ_RFC4007_RE + r"\]$")
66
67# Used by host validator
68IPv6_NO_RFC4007_MATCHER = re.compile(
69 r"^\[%s\]$" % (abnf_regexp.IPv6_ADDRZ_RE)
70)
71
72# Matcher used to validate path components
73PATH_MATCHER = re.compile(abnf_regexp.PATH_RE)
74
75
76# ##################################
77# Query and Fragment Matcher Section
78# ##################################
79
80QUERY_MATCHER = re.compile(abnf_regexp.QUERY_RE)
81
82FRAGMENT_MATCHER = QUERY_MATCHER
83
84# Scheme validation, see: http://tools.ietf.org/html/rfc3986#section-3.1
85SCHEME_MATCHER = re.compile("^{0}$".format(abnf_regexp.SCHEME_RE))
86
87RELATIVE_REF_MATCHER = re.compile(
88 r"^%s(\?%s)?(#%s)?$"
89 % (
90 abnf_regexp.RELATIVE_PART_RE,
91 abnf_regexp.QUERY_RE,
92 abnf_regexp.FRAGMENT_RE,
93 )
94)
95
96# See http://tools.ietf.org/html/rfc3986#section-4.3
97ABSOLUTE_URI_MATCHER = re.compile(
98 r"^%s:%s(\?%s)?$"
99 % (
100 abnf_regexp.COMPONENT_PATTERN_DICT["scheme"],
101 abnf_regexp.HIER_PART_RE,
102 abnf_regexp.QUERY_RE[1:-1],
103 )
104)
105
106# ###############
107# IRIs / RFC 3987
108# ###############
109
110IRI_MATCHER = re.compile(abnf_regexp.URL_PARSING_RE, re.UNICODE)
111
112ISUBAUTHORITY_MATCHER = re.compile(
113 (
114 u"^(?:(?P<userinfo>{0})@)?" # iuserinfo
115 u"(?P<host>{1})" # ihost
116 u":?(?P<port>{2})?$" # port
117 ).format(
118 abnf_regexp.IUSERINFO_RE, abnf_regexp.IHOST_RE, abnf_regexp.PORT_RE
119 ),
120 re.UNICODE,
121)
122
123
124# Path merger as defined in http://tools.ietf.org/html/rfc3986#section-5.2.3
125def merge_paths(base_uri, relative_path):
126 """Merge a base URI's path with a relative URI's path."""
127 if base_uri.path is None and base_uri.authority is not None:
128 return "/" + relative_path
129 else:
130 path = base_uri.path or ""
131 index = path.rfind("/")
132 return path[:index] + "/" + relative_path
133
134
135UseExisting = object()
Note: See TracBrowser for help on using the repository browser.