1 | /*
|
---|
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
|
---|
3 | * contributor license agreements. See the NOTICE file distributed with
|
---|
4 | * this work for additional information regarding copyright ownership.
|
---|
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
|
---|
6 | * (the "License"); you may not use this file except in compliance with
|
---|
7 | * the License. You may obtain a copy of the License at
|
---|
8 | *
|
---|
9 | * http://www.apache.org/licenses/LICENSE-2.0
|
---|
10 | *
|
---|
11 | * Unless required by applicable law or agreed to in writing, software
|
---|
12 | * distributed under the License is distributed on an "AS IS" BASIS,
|
---|
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
---|
14 | * See the License for the specific language governing permissions and
|
---|
15 | * limitations under the License.
|
---|
16 | */
|
---|
17 | package agents.org.apache.commons.lang;
|
---|
18 |
|
---|
19 | import java.io.Serializable;
|
---|
20 | import java.util.Collections;
|
---|
21 | import java.util.HashMap;
|
---|
22 | import java.util.HashSet;
|
---|
23 | import java.util.Iterator;
|
---|
24 | import java.util.Map;
|
---|
25 | import java.util.Set;
|
---|
26 |
|
---|
27 | /**
|
---|
28 | * <p>A set of characters.</p>
|
---|
29 | *
|
---|
30 | * <p>Instances are immutable, but instances of subclasses may not be.</p>
|
---|
31 | *
|
---|
32 | * <p>#ThreadSafe#</p>
|
---|
33 | * @author Apache Software Foundation
|
---|
34 | * @author Phil Steitz
|
---|
35 | * @author Pete Gieser
|
---|
36 | * @author Gary Gregory
|
---|
37 | * @since 1.0
|
---|
38 | * @version $Id: CharSet.java 1056988 2011-01-09 17:58:53Z niallp $
|
---|
39 | */
|
---|
40 | public class CharSet implements Serializable {
|
---|
41 |
|
---|
42 | /**
|
---|
43 | * Required for serialization support. Lang version 2.0.
|
---|
44 | *
|
---|
45 | * @see java.io.Serializable
|
---|
46 | */
|
---|
47 | private static final long serialVersionUID = 5947847346149275958L;
|
---|
48 |
|
---|
49 | /**
|
---|
50 | * A CharSet defining no characters.
|
---|
51 | * @since 2.0
|
---|
52 | */
|
---|
53 | public static final CharSet EMPTY = new CharSet((String) null);
|
---|
54 |
|
---|
55 | /**
|
---|
56 | * A CharSet defining ASCII alphabetic characters "a-zA-Z".
|
---|
57 | * @since 2.0
|
---|
58 | */
|
---|
59 | public static final CharSet ASCII_ALPHA = new CharSet("a-zA-Z");
|
---|
60 |
|
---|
61 | /**
|
---|
62 | * A CharSet defining ASCII alphabetic characters "a-z".
|
---|
63 | * @since 2.0
|
---|
64 | */
|
---|
65 | public static final CharSet ASCII_ALPHA_LOWER = new CharSet("a-z");
|
---|
66 |
|
---|
67 | /**
|
---|
68 | * A CharSet defining ASCII alphabetic characters "A-Z".
|
---|
69 | * @since 2.0
|
---|
70 | */
|
---|
71 | public static final CharSet ASCII_ALPHA_UPPER = new CharSet("A-Z");
|
---|
72 |
|
---|
73 | /**
|
---|
74 | * A CharSet defining ASCII alphabetic characters "0-9".
|
---|
75 | * @since 2.0
|
---|
76 | */
|
---|
77 | public static final CharSet ASCII_NUMERIC = new CharSet("0-9");
|
---|
78 |
|
---|
79 | /**
|
---|
80 | * A Map of the common cases used in the factory.
|
---|
81 | * Subclasses can add more common patterns if desired
|
---|
82 | * @since 2.0
|
---|
83 | */
|
---|
84 | protected static final Map COMMON = Collections.synchronizedMap(new HashMap());
|
---|
85 |
|
---|
86 | static {
|
---|
87 | COMMON.put(null, EMPTY);
|
---|
88 | COMMON.put("", EMPTY);
|
---|
89 | COMMON.put("a-zA-Z", ASCII_ALPHA);
|
---|
90 | COMMON.put("A-Za-z", ASCII_ALPHA);
|
---|
91 | COMMON.put("a-z", ASCII_ALPHA_LOWER);
|
---|
92 | COMMON.put("A-Z", ASCII_ALPHA_UPPER);
|
---|
93 | COMMON.put("0-9", ASCII_NUMERIC);
|
---|
94 | }
|
---|
95 |
|
---|
96 | /** The set of CharRange objects. */
|
---|
97 | private final Set set = Collections.synchronizedSet(new HashSet());
|
---|
98 |
|
---|
99 | //-----------------------------------------------------------------------
|
---|
100 | /**
|
---|
101 | * <p>Factory method to create a new CharSet using a special syntax.</p>
|
---|
102 | *
|
---|
103 | * <ul>
|
---|
104 | * <li><code>null</code> or empty string ("")
|
---|
105 | * - set containing no characters</li>
|
---|
106 | * <li>Single character, such as "a"
|
---|
107 | * - set containing just that character</li>
|
---|
108 | * <li>Multi character, such as "a-e"
|
---|
109 | * - set containing characters from one character to the other</li>
|
---|
110 | * <li>Negated, such as "^a" or "^a-e"
|
---|
111 | * - set containing all characters except those defined</li>
|
---|
112 | * <li>Combinations, such as "abe-g"
|
---|
113 | * - set containing all the characters from the individual sets</li>
|
---|
114 | * </ul>
|
---|
115 | *
|
---|
116 | * <p>The matching order is:</p>
|
---|
117 | * <ol>
|
---|
118 | * <li>Negated multi character range, such as "^a-e"
|
---|
119 | * <li>Ordinary multi character range, such as "a-e"
|
---|
120 | * <li>Negated single character, such as "^a"
|
---|
121 | * <li>Ordinary single character, such as "a"
|
---|
122 | * </ol>
|
---|
123 | * <p>Matching works left to right. Once a match is found the
|
---|
124 | * search starts again from the next character.</p>
|
---|
125 | *
|
---|
126 | * <p>If the same range is defined twice using the same syntax, only
|
---|
127 | * one range will be kept.
|
---|
128 | * Thus, "a-ca-c" creates only one range of "a-c".</p>
|
---|
129 | *
|
---|
130 | * <p>If the start and end of a range are in the wrong order,
|
---|
131 | * they are reversed. Thus "a-e" is the same as "e-a".
|
---|
132 | * As a result, "a-ee-a" would create only one range,
|
---|
133 | * as the "a-e" and "e-a" are the same.</p>
|
---|
134 | *
|
---|
135 | * <p>The set of characters represented is the union of the specified ranges.</p>
|
---|
136 | *
|
---|
137 | * <p>All CharSet objects returned by this method will be immutable.</p>
|
---|
138 | *
|
---|
139 | * @param setStr the String describing the set, may be null
|
---|
140 | * @return a CharSet instance
|
---|
141 | * @since 2.0
|
---|
142 | */
|
---|
143 | public static CharSet getInstance(String setStr) {
|
---|
144 | Object set = COMMON.get(setStr);
|
---|
145 | if (set != null) {
|
---|
146 | return (CharSet) set;
|
---|
147 | }
|
---|
148 | return new CharSet(setStr);
|
---|
149 | }
|
---|
150 |
|
---|
151 | /**
|
---|
152 | * <p>Constructs a new CharSet using the set syntax.
|
---|
153 | * Each string is merged in with the set.</p>
|
---|
154 | *
|
---|
155 | * @param setStrs Strings to merge into the initial set, may be null
|
---|
156 | * @return a CharSet instance
|
---|
157 | * @since 2.4
|
---|
158 | */
|
---|
159 | public static CharSet getInstance(String[] setStrs) {
|
---|
160 | if (setStrs == null) {
|
---|
161 | return null;
|
---|
162 | }
|
---|
163 | return new CharSet(setStrs);
|
---|
164 | }
|
---|
165 |
|
---|
166 | //-----------------------------------------------------------------------
|
---|
167 | /**
|
---|
168 | * <p>Constructs a new CharSet using the set syntax.</p>
|
---|
169 | *
|
---|
170 | * @param setStr the String describing the set, may be null
|
---|
171 | * @since 2.0
|
---|
172 | */
|
---|
173 | protected CharSet(String setStr) {
|
---|
174 | super();
|
---|
175 | add(setStr);
|
---|
176 | }
|
---|
177 |
|
---|
178 | /**
|
---|
179 | * <p>Constructs a new CharSet using the set syntax.
|
---|
180 | * Each string is merged in with the set.</p>
|
---|
181 | *
|
---|
182 | * @param set Strings to merge into the initial set
|
---|
183 | * @throws NullPointerException if set is <code>null</code>
|
---|
184 | */
|
---|
185 | protected CharSet(String[] set) {
|
---|
186 | super();
|
---|
187 | int sz = set.length;
|
---|
188 | for (int i = 0; i < sz; i++) {
|
---|
189 | add(set[i]);
|
---|
190 | }
|
---|
191 | }
|
---|
192 |
|
---|
193 | //-----------------------------------------------------------------------
|
---|
194 | /**
|
---|
195 | * <p>Add a set definition string to the <code>CharSet</code>.</p>
|
---|
196 | *
|
---|
197 | * @param str set definition string
|
---|
198 | */
|
---|
199 | protected void add(String str) {
|
---|
200 | if (str == null) {
|
---|
201 | return;
|
---|
202 | }
|
---|
203 |
|
---|
204 | int len = str.length();
|
---|
205 | int pos = 0;
|
---|
206 | while (pos < len) {
|
---|
207 | int remainder = (len - pos);
|
---|
208 | if (remainder >= 4 && str.charAt(pos) == '^' && str.charAt(pos + 2) == '-') {
|
---|
209 | // negated range
|
---|
210 | set.add(CharRange.isNotIn(str.charAt(pos + 1), str.charAt(pos + 3)));
|
---|
211 | pos += 4;
|
---|
212 | } else if (remainder >= 3 && str.charAt(pos + 1) == '-') {
|
---|
213 | // range
|
---|
214 | set.add(CharRange.isIn(str.charAt(pos), str.charAt(pos + 2)));
|
---|
215 | pos += 3;
|
---|
216 | } else if (remainder >= 2 && str.charAt(pos) == '^') {
|
---|
217 | // negated char
|
---|
218 | set.add(CharRange.isNot(str.charAt(pos + 1)));
|
---|
219 | pos += 2;
|
---|
220 | } else {
|
---|
221 | // char
|
---|
222 | set.add(CharRange.is(str.charAt(pos)));
|
---|
223 | pos += 1;
|
---|
224 | }
|
---|
225 | }
|
---|
226 | }
|
---|
227 |
|
---|
228 | //-----------------------------------------------------------------------
|
---|
229 | /**
|
---|
230 | * <p>Gets the internal set as an array of CharRange objects.</p>
|
---|
231 | *
|
---|
232 | * @return an array of immutable CharRange objects
|
---|
233 | * @since 2.0
|
---|
234 | */
|
---|
235 | public CharRange[] getCharRanges() {
|
---|
236 | return (CharRange[]) set.toArray(new CharRange[set.size()]);
|
---|
237 | }
|
---|
238 |
|
---|
239 | //-----------------------------------------------------------------------
|
---|
240 | /**
|
---|
241 | * <p>Does the <code>CharSet</code> contain the specified
|
---|
242 | * character <code>ch</code>.</p>
|
---|
243 | *
|
---|
244 | * @param ch the character to check for
|
---|
245 | * @return <code>true</code> if the set contains the characters
|
---|
246 | */
|
---|
247 | public boolean contains(char ch) {
|
---|
248 | for (Iterator it = set.iterator(); it.hasNext();) {
|
---|
249 | CharRange range = (CharRange) it.next();
|
---|
250 | if (range.contains(ch)) {
|
---|
251 | return true;
|
---|
252 | }
|
---|
253 | }
|
---|
254 | return false;
|
---|
255 | }
|
---|
256 |
|
---|
257 | // Basics
|
---|
258 | //-----------------------------------------------------------------------
|
---|
259 | /**
|
---|
260 | * <p>Compares two CharSet objects, returning true if they represent
|
---|
261 | * exactly the same set of characters defined in the same way.</p>
|
---|
262 | *
|
---|
263 | * <p>The two sets <code>abc</code> and <code>a-c</code> are <i>not</i>
|
---|
264 | * equal according to this method.</p>
|
---|
265 | *
|
---|
266 | * @param obj the object to compare to
|
---|
267 | * @return true if equal
|
---|
268 | * @since 2.0
|
---|
269 | */
|
---|
270 | public boolean equals(Object obj) {
|
---|
271 | if (obj == this) {
|
---|
272 | return true;
|
---|
273 | }
|
---|
274 | if (obj instanceof CharSet == false) {
|
---|
275 | return false;
|
---|
276 | }
|
---|
277 | CharSet other = (CharSet) obj;
|
---|
278 | return set.equals(other.set);
|
---|
279 | }
|
---|
280 |
|
---|
281 | /**
|
---|
282 | * <p>Gets a hashCode compatible with the equals method.</p>
|
---|
283 | *
|
---|
284 | * @return a suitable hashCode
|
---|
285 | * @since 2.0
|
---|
286 | */
|
---|
287 | public int hashCode() {
|
---|
288 | return 89 + set.hashCode();
|
---|
289 | }
|
---|
290 |
|
---|
291 | /**
|
---|
292 | * <p>Gets a string representation of the set.</p>
|
---|
293 | *
|
---|
294 | * @return string representation of the set
|
---|
295 | */
|
---|
296 | public String toString() {
|
---|
297 | return set.toString();
|
---|
298 | }
|
---|
299 |
|
---|
300 | }
|
---|