source: src/main/java/agents/org/apache/commons/lang/CharSet.java

Last change on this file was 127, checked in by Wouter Pasman, 6 years ago

#41 ROLL BACK of rev.126 . So this version is equal to rev. 125

File size: 9.7 KB
Line 
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17package agents.org.apache.commons.lang;
18
19import java.io.Serializable;
20import java.util.Collections;
21import java.util.HashMap;
22import java.util.HashSet;
23import java.util.Iterator;
24import java.util.Map;
25import java.util.Set;
26
27/**
28 * <p>A set of characters.</p>
29 *
30 * <p>Instances are immutable, but instances of subclasses may not be.</p>
31 *
32 * <p>#ThreadSafe#</p>
33 * @author Apache Software Foundation
34 * @author Phil Steitz
35 * @author Pete Gieser
36 * @author Gary Gregory
37 * @since 1.0
38 * @version $Id: CharSet.java 1056988 2011-01-09 17:58:53Z niallp $
39 */
40public class CharSet implements Serializable {
41
42 /**
43 * Required for serialization support. Lang version 2.0.
44 *
45 * @see java.io.Serializable
46 */
47 private static final long serialVersionUID = 5947847346149275958L;
48
49 /**
50 * A CharSet defining no characters.
51 * @since 2.0
52 */
53 public static final CharSet EMPTY = new CharSet((String) null);
54
55 /**
56 * A CharSet defining ASCII alphabetic characters "a-zA-Z".
57 * @since 2.0
58 */
59 public static final CharSet ASCII_ALPHA = new CharSet("a-zA-Z");
60
61 /**
62 * A CharSet defining ASCII alphabetic characters "a-z".
63 * @since 2.0
64 */
65 public static final CharSet ASCII_ALPHA_LOWER = new CharSet("a-z");
66
67 /**
68 * A CharSet defining ASCII alphabetic characters "A-Z".
69 * @since 2.0
70 */
71 public static final CharSet ASCII_ALPHA_UPPER = new CharSet("A-Z");
72
73 /**
74 * A CharSet defining ASCII alphabetic characters "0-9".
75 * @since 2.0
76 */
77 public static final CharSet ASCII_NUMERIC = new CharSet("0-9");
78
79 /**
80 * A Map of the common cases used in the factory.
81 * Subclasses can add more common patterns if desired
82 * @since 2.0
83 */
84 protected static final Map COMMON = Collections.synchronizedMap(new HashMap());
85
86 static {
87 COMMON.put(null, EMPTY);
88 COMMON.put("", EMPTY);
89 COMMON.put("a-zA-Z", ASCII_ALPHA);
90 COMMON.put("A-Za-z", ASCII_ALPHA);
91 COMMON.put("a-z", ASCII_ALPHA_LOWER);
92 COMMON.put("A-Z", ASCII_ALPHA_UPPER);
93 COMMON.put("0-9", ASCII_NUMERIC);
94 }
95
96 /** The set of CharRange objects. */
97 private final Set set = Collections.synchronizedSet(new HashSet());
98
99 //-----------------------------------------------------------------------
100 /**
101 * <p>Factory method to create a new CharSet using a special syntax.</p>
102 *
103 * <ul>
104 * <li><code>null</code> or empty string ("")
105 * - set containing no characters</li>
106 * <li>Single character, such as "a"
107 * - set containing just that character</li>
108 * <li>Multi character, such as "a-e"
109 * - set containing characters from one character to the other</li>
110 * <li>Negated, such as "^a" or "^a-e"
111 * - set containing all characters except those defined</li>
112 * <li>Combinations, such as "abe-g"
113 * - set containing all the characters from the individual sets</li>
114 * </ul>
115 *
116 * <p>The matching order is:</p>
117 * <ol>
118 * <li>Negated multi character range, such as "^a-e"
119 * <li>Ordinary multi character range, such as "a-e"
120 * <li>Negated single character, such as "^a"
121 * <li>Ordinary single character, such as "a"
122 * </ol>
123 * <p>Matching works left to right. Once a match is found the
124 * search starts again from the next character.</p>
125 *
126 * <p>If the same range is defined twice using the same syntax, only
127 * one range will be kept.
128 * Thus, "a-ca-c" creates only one range of "a-c".</p>
129 *
130 * <p>If the start and end of a range are in the wrong order,
131 * they are reversed. Thus "a-e" is the same as "e-a".
132 * As a result, "a-ee-a" would create only one range,
133 * as the "a-e" and "e-a" are the same.</p>
134 *
135 * <p>The set of characters represented is the union of the specified ranges.</p>
136 *
137 * <p>All CharSet objects returned by this method will be immutable.</p>
138 *
139 * @param setStr the String describing the set, may be null
140 * @return a CharSet instance
141 * @since 2.0
142 */
143 public static CharSet getInstance(String setStr) {
144 Object set = COMMON.get(setStr);
145 if (set != null) {
146 return (CharSet) set;
147 }
148 return new CharSet(setStr);
149 }
150
151 /**
152 * <p>Constructs a new CharSet using the set syntax.
153 * Each string is merged in with the set.</p>
154 *
155 * @param setStrs Strings to merge into the initial set, may be null
156 * @return a CharSet instance
157 * @since 2.4
158 */
159 public static CharSet getInstance(String[] setStrs) {
160 if (setStrs == null) {
161 return null;
162 }
163 return new CharSet(setStrs);
164 }
165
166 //-----------------------------------------------------------------------
167 /**
168 * <p>Constructs a new CharSet using the set syntax.</p>
169 *
170 * @param setStr the String describing the set, may be null
171 * @since 2.0
172 */
173 protected CharSet(String setStr) {
174 super();
175 add(setStr);
176 }
177
178 /**
179 * <p>Constructs a new CharSet using the set syntax.
180 * Each string is merged in with the set.</p>
181 *
182 * @param set Strings to merge into the initial set
183 * @throws NullPointerException if set is <code>null</code>
184 */
185 protected CharSet(String[] set) {
186 super();
187 int sz = set.length;
188 for (int i = 0; i < sz; i++) {
189 add(set[i]);
190 }
191 }
192
193 //-----------------------------------------------------------------------
194 /**
195 * <p>Add a set definition string to the <code>CharSet</code>.</p>
196 *
197 * @param str set definition string
198 */
199 protected void add(String str) {
200 if (str == null) {
201 return;
202 }
203
204 int len = str.length();
205 int pos = 0;
206 while (pos < len) {
207 int remainder = (len - pos);
208 if (remainder >= 4 && str.charAt(pos) == '^' && str.charAt(pos + 2) == '-') {
209 // negated range
210 set.add(CharRange.isNotIn(str.charAt(pos + 1), str.charAt(pos + 3)));
211 pos += 4;
212 } else if (remainder >= 3 && str.charAt(pos + 1) == '-') {
213 // range
214 set.add(CharRange.isIn(str.charAt(pos), str.charAt(pos + 2)));
215 pos += 3;
216 } else if (remainder >= 2 && str.charAt(pos) == '^') {
217 // negated char
218 set.add(CharRange.isNot(str.charAt(pos + 1)));
219 pos += 2;
220 } else {
221 // char
222 set.add(CharRange.is(str.charAt(pos)));
223 pos += 1;
224 }
225 }
226 }
227
228 //-----------------------------------------------------------------------
229 /**
230 * <p>Gets the internal set as an array of CharRange objects.</p>
231 *
232 * @return an array of immutable CharRange objects
233 * @since 2.0
234 */
235 public CharRange[] getCharRanges() {
236 return (CharRange[]) set.toArray(new CharRange[set.size()]);
237 }
238
239 //-----------------------------------------------------------------------
240 /**
241 * <p>Does the <code>CharSet</code> contain the specified
242 * character <code>ch</code>.</p>
243 *
244 * @param ch the character to check for
245 * @return <code>true</code> if the set contains the characters
246 */
247 public boolean contains(char ch) {
248 for (Iterator it = set.iterator(); it.hasNext();) {
249 CharRange range = (CharRange) it.next();
250 if (range.contains(ch)) {
251 return true;
252 }
253 }
254 return false;
255 }
256
257 // Basics
258 //-----------------------------------------------------------------------
259 /**
260 * <p>Compares two CharSet objects, returning true if they represent
261 * exactly the same set of characters defined in the same way.</p>
262 *
263 * <p>The two sets <code>abc</code> and <code>a-c</code> are <i>not</i>
264 * equal according to this method.</p>
265 *
266 * @param obj the object to compare to
267 * @return true if equal
268 * @since 2.0
269 */
270 public boolean equals(Object obj) {
271 if (obj == this) {
272 return true;
273 }
274 if (obj instanceof CharSet == false) {
275 return false;
276 }
277 CharSet other = (CharSet) obj;
278 return set.equals(other.set);
279 }
280
281 /**
282 * <p>Gets a hashCode compatible with the equals method.</p>
283 *
284 * @return a suitable hashCode
285 * @since 2.0
286 */
287 public int hashCode() {
288 return 89 + set.hashCode();
289 }
290
291 /**
292 * <p>Gets a string representation of the set.</p>
293 *
294 * @return string representation of the set
295 */
296 public String toString() {
297 return set.toString();
298 }
299
300}
Note: See TracBrowser for help on using the repository browser.