# -*- coding: utf-8 -*- # Copyright (c) 2014 Rackspace # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. # See the License for the specific language governing permissions and # limitations under the License. """ Module containing compiled regular expressions and constants. This module contains important constants, patterns, and compiled regular expressions for parsing and validating URIs and their components. """ import re from . import abnf_regexp # These are enumerated for the named tuple used as a superclass of # URIReference URI_COMPONENTS = ["scheme", "authority", "path", "query", "fragment"] important_characters = { "generic_delimiters": abnf_regexp.GENERIC_DELIMITERS, "sub_delimiters": abnf_regexp.SUB_DELIMITERS, # We need to escape the '*' in this case "re_sub_delimiters": abnf_regexp.SUB_DELIMITERS_RE, "unreserved_chars": abnf_regexp.UNRESERVED_CHARS, # We need to escape the '-' in this case: "re_unreserved": abnf_regexp.UNRESERVED_RE, } # For details about delimiters and reserved characters, see: # http://tools.ietf.org/html/rfc3986#section-2.2 GENERIC_DELIMITERS = abnf_regexp.GENERIC_DELIMITERS_SET SUB_DELIMITERS = abnf_regexp.SUB_DELIMITERS_SET RESERVED_CHARS = abnf_regexp.RESERVED_CHARS_SET # For details about unreserved characters, see: # http://tools.ietf.org/html/rfc3986#section-2.3 UNRESERVED_CHARS = abnf_regexp.UNRESERVED_CHARS_SET NON_PCT_ENCODED = abnf_regexp.NON_PCT_ENCODED_SET URI_MATCHER = re.compile(abnf_regexp.URL_PARSING_RE) SUBAUTHORITY_MATCHER = re.compile( ( "^(?:(?P{0})@)?" # userinfo "(?P{1})" # host ":?(?P{2})?$" # port ).format( abnf_regexp.USERINFO_RE, abnf_regexp.HOST_PATTERN, abnf_regexp.PORT_RE ) ) HOST_MATCHER = re.compile("^" + abnf_regexp.HOST_RE + "$") IPv4_MATCHER = re.compile("^" + abnf_regexp.IPv4_RE + "$") IPv6_MATCHER = re.compile(r"^\[" + abnf_regexp.IPv6_ADDRZ_RFC4007_RE + r"\]$") # Used by host validator IPv6_NO_RFC4007_MATCHER = re.compile( r"^\[%s\]$" % (abnf_regexp.IPv6_ADDRZ_RE) ) # Matcher used to validate path components PATH_MATCHER = re.compile(abnf_regexp.PATH_RE) # ################################## # Query and Fragment Matcher Section # ################################## QUERY_MATCHER = re.compile(abnf_regexp.QUERY_RE) FRAGMENT_MATCHER = QUERY_MATCHER # Scheme validation, see: http://tools.ietf.org/html/rfc3986#section-3.1 SCHEME_MATCHER = re.compile("^{0}$".format(abnf_regexp.SCHEME_RE)) RELATIVE_REF_MATCHER = re.compile( r"^%s(\?%s)?(#%s)?$" % ( abnf_regexp.RELATIVE_PART_RE, abnf_regexp.QUERY_RE, abnf_regexp.FRAGMENT_RE, ) ) # See http://tools.ietf.org/html/rfc3986#section-4.3 ABSOLUTE_URI_MATCHER = re.compile( r"^%s:%s(\?%s)?$" % ( abnf_regexp.COMPONENT_PATTERN_DICT["scheme"], abnf_regexp.HIER_PART_RE, abnf_regexp.QUERY_RE[1:-1], ) ) # ############### # IRIs / RFC 3987 # ############### IRI_MATCHER = re.compile(abnf_regexp.URL_PARSING_RE, re.UNICODE) ISUBAUTHORITY_MATCHER = re.compile( ( u"^(?:(?P{0})@)?" # iuserinfo u"(?P{1})" # ihost u":?(?P{2})?$" # port ).format( abnf_regexp.IUSERINFO_RE, abnf_regexp.IHOST_RE, abnf_regexp.PORT_RE ), re.UNICODE, ) # Path merger as defined in http://tools.ietf.org/html/rfc3986#section-5.2.3 def merge_paths(base_uri, relative_path): """Merge a base URI's path with a relative URI's path.""" if base_uri.path is None and base_uri.authority is not None: return "/" + relative_path else: path = base_uri.path or "" index = path.rfind("/") return path[:index] + "/" + relative_path UseExisting = object()