1 | /*
|
---|
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
|
---|
3 | * contributor license agreements. See the NOTICE file distributed with
|
---|
4 | * this work for additional information regarding copyright ownership.
|
---|
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
|
---|
6 | * (the "License"); you may not use this file except in compliance with
|
---|
7 | * the License. You may obtain a copy of the License at
|
---|
8 | *
|
---|
9 | * http://www.apache.org/licenses/LICENSE-2.0
|
---|
10 | *
|
---|
11 | * Unless required by applicable law or agreed to in writing, software
|
---|
12 | * distributed under the License is distributed on an "AS IS" BASIS,
|
---|
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
---|
14 | * See the License for the specific language governing permissions and
|
---|
15 | * limitations under the License.
|
---|
16 | */
|
---|
17 | package agents.org.apache.commons.lang.text;
|
---|
18 |
|
---|
19 | import java.util.Arrays;
|
---|
20 |
|
---|
21 | /**
|
---|
22 | * A matcher class that can be queried to determine if a character array
|
---|
23 | * portion matches.
|
---|
24 | * <p>
|
---|
25 | * This class comes complete with various factory methods.
|
---|
26 | * If these do not suffice, you can subclass and implement your own matcher.
|
---|
27 | *
|
---|
28 | * @author Apache Software Foundation
|
---|
29 | * @since 2.2
|
---|
30 | * @version $Id: StrMatcher.java 905636 2010-02-02 14:03:32Z niallp $
|
---|
31 | */
|
---|
32 | public abstract class StrMatcher {
|
---|
33 |
|
---|
34 | /**
|
---|
35 | * Matches the comma character.
|
---|
36 | */
|
---|
37 | private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
|
---|
38 | /**
|
---|
39 | * Matches the tab character.
|
---|
40 | */
|
---|
41 | private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
|
---|
42 | /**
|
---|
43 | * Matches the space character.
|
---|
44 | */
|
---|
45 | private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
|
---|
46 | /**
|
---|
47 | * Matches the same characters as StringTokenizer,
|
---|
48 | * namely space, tab, newline, formfeed.
|
---|
49 | */
|
---|
50 | private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());
|
---|
51 | /**
|
---|
52 | * Matches the String trim() whitespace characters.
|
---|
53 | */
|
---|
54 | private static final StrMatcher TRIM_MATCHER = new TrimMatcher();
|
---|
55 | /**
|
---|
56 | * Matches the double quote character.
|
---|
57 | */
|
---|
58 | private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');
|
---|
59 | /**
|
---|
60 | * Matches the double quote character.
|
---|
61 | */
|
---|
62 | private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');
|
---|
63 | /**
|
---|
64 | * Matches the single or double quote character.
|
---|
65 | */
|
---|
66 | private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());
|
---|
67 | /**
|
---|
68 | * Matches no characters.
|
---|
69 | */
|
---|
70 | private static final StrMatcher NONE_MATCHER = new NoMatcher();
|
---|
71 |
|
---|
72 | // -----------------------------------------------------------------------
|
---|
73 |
|
---|
74 | /**
|
---|
75 | * Returns a matcher which matches the comma character.
|
---|
76 | *
|
---|
77 | * @return a matcher for a comma
|
---|
78 | */
|
---|
79 | public static StrMatcher commaMatcher() {
|
---|
80 | return COMMA_MATCHER;
|
---|
81 | }
|
---|
82 |
|
---|
83 | /**
|
---|
84 | * Returns a matcher which matches the tab character.
|
---|
85 | *
|
---|
86 | * @return a matcher for a tab
|
---|
87 | */
|
---|
88 | public static StrMatcher tabMatcher() {
|
---|
89 | return TAB_MATCHER;
|
---|
90 | }
|
---|
91 |
|
---|
92 | /**
|
---|
93 | * Returns a matcher which matches the space character.
|
---|
94 | *
|
---|
95 | * @return a matcher for a space
|
---|
96 | */
|
---|
97 | public static StrMatcher spaceMatcher() {
|
---|
98 | return SPACE_MATCHER;
|
---|
99 | }
|
---|
100 |
|
---|
101 | /**
|
---|
102 | * Matches the same characters as StringTokenizer,
|
---|
103 | * namely space, tab, newline and formfeed.
|
---|
104 | *
|
---|
105 | * @return the split matcher
|
---|
106 | */
|
---|
107 | public static StrMatcher splitMatcher() {
|
---|
108 | return SPLIT_MATCHER;
|
---|
109 | }
|
---|
110 |
|
---|
111 | /**
|
---|
112 | * Matches the String trim() whitespace characters.
|
---|
113 | *
|
---|
114 | * @return the trim matcher
|
---|
115 | */
|
---|
116 | public static StrMatcher trimMatcher() {
|
---|
117 | return TRIM_MATCHER;
|
---|
118 | }
|
---|
119 |
|
---|
120 | /**
|
---|
121 | * Returns a matcher which matches the single quote character.
|
---|
122 | *
|
---|
123 | * @return a matcher for a single quote
|
---|
124 | */
|
---|
125 | public static StrMatcher singleQuoteMatcher() {
|
---|
126 | return SINGLE_QUOTE_MATCHER;
|
---|
127 | }
|
---|
128 |
|
---|
129 | /**
|
---|
130 | * Returns a matcher which matches the double quote character.
|
---|
131 | *
|
---|
132 | * @return a matcher for a double quote
|
---|
133 | */
|
---|
134 | public static StrMatcher doubleQuoteMatcher() {
|
---|
135 | return DOUBLE_QUOTE_MATCHER;
|
---|
136 | }
|
---|
137 |
|
---|
138 | /**
|
---|
139 | * Returns a matcher which matches the single or double quote character.
|
---|
140 | *
|
---|
141 | * @return a matcher for a single or double quote
|
---|
142 | */
|
---|
143 | public static StrMatcher quoteMatcher() {
|
---|
144 | return QUOTE_MATCHER;
|
---|
145 | }
|
---|
146 |
|
---|
147 | /**
|
---|
148 | * Matches no characters.
|
---|
149 | *
|
---|
150 | * @return a matcher that matches nothing
|
---|
151 | */
|
---|
152 | public static StrMatcher noneMatcher() {
|
---|
153 | return NONE_MATCHER;
|
---|
154 | }
|
---|
155 |
|
---|
156 | /**
|
---|
157 | * Constructor that creates a matcher from a character.
|
---|
158 | *
|
---|
159 | * @param ch the character to match, must not be null
|
---|
160 | * @return a new Matcher for the given char
|
---|
161 | */
|
---|
162 | public static StrMatcher charMatcher(char ch) {
|
---|
163 | return new CharMatcher(ch);
|
---|
164 | }
|
---|
165 |
|
---|
166 | /**
|
---|
167 | * Constructor that creates a matcher from a set of characters.
|
---|
168 | *
|
---|
169 | * @param chars the characters to match, null or empty matches nothing
|
---|
170 | * @return a new matcher for the given char[]
|
---|
171 | */
|
---|
172 | public static StrMatcher charSetMatcher(char[] chars) {
|
---|
173 | if (chars == null || chars.length == 0) {
|
---|
174 | return NONE_MATCHER;
|
---|
175 | }
|
---|
176 | if (chars.length == 1) {
|
---|
177 | return new CharMatcher(chars[0]);
|
---|
178 | }
|
---|
179 | return new CharSetMatcher(chars);
|
---|
180 | }
|
---|
181 |
|
---|
182 | /**
|
---|
183 | * Constructor that creates a matcher from a string representing a set of characters.
|
---|
184 | *
|
---|
185 | * @param chars the characters to match, null or empty matches nothing
|
---|
186 | * @return a new Matcher for the given characters
|
---|
187 | */
|
---|
188 | public static StrMatcher charSetMatcher(String chars) {
|
---|
189 | if (chars == null || chars.length() == 0) {
|
---|
190 | return NONE_MATCHER;
|
---|
191 | }
|
---|
192 | if (chars.length() == 1) {
|
---|
193 | return new CharMatcher(chars.charAt(0));
|
---|
194 | }
|
---|
195 | return new CharSetMatcher(chars.toCharArray());
|
---|
196 | }
|
---|
197 |
|
---|
198 | /**
|
---|
199 | * Constructor that creates a matcher from a string.
|
---|
200 | *
|
---|
201 | * @param str the string to match, null or empty matches nothing
|
---|
202 | * @return a new Matcher for the given String
|
---|
203 | */
|
---|
204 | public static StrMatcher stringMatcher(String str) {
|
---|
205 | if (str == null || str.length() == 0) {
|
---|
206 | return NONE_MATCHER;
|
---|
207 | }
|
---|
208 | return new StringMatcher(str);
|
---|
209 | }
|
---|
210 |
|
---|
211 | //-----------------------------------------------------------------------
|
---|
212 | /**
|
---|
213 | * Constructor.
|
---|
214 | */
|
---|
215 | protected StrMatcher() {
|
---|
216 | super();
|
---|
217 | }
|
---|
218 |
|
---|
219 | /**
|
---|
220 | * Returns the number of matching characters, zero for no match.
|
---|
221 | * <p>
|
---|
222 | * This method is called to check for a match.
|
---|
223 | * The parameter <code>pos</code> represents the current position to be
|
---|
224 | * checked in the string <code>buffer</code> (a character array which must
|
---|
225 | * not be changed).
|
---|
226 | * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>.
|
---|
227 | * <p>
|
---|
228 | * The character array may be larger than the active area to be matched.
|
---|
229 | * Only values in the buffer between the specifed indices may be accessed.
|
---|
230 | * <p>
|
---|
231 | * The matching code may check one character or many.
|
---|
232 | * It may check characters preceeding <code>pos</code> as well as those
|
---|
233 | * after, so long as no checks exceed the bounds specified.
|
---|
234 | * <p>
|
---|
235 | * It must return zero for no match, or a positive number if a match was found.
|
---|
236 | * The number indicates the number of characters that matched.
|
---|
237 | *
|
---|
238 | * @param buffer the text content to match against, do not change
|
---|
239 | * @param pos the starting position for the match, valid for buffer
|
---|
240 | * @param bufferStart the first active index in the buffer, valid for buffer
|
---|
241 | * @param bufferEnd the end index (exclusive) of the active buffer, valid for buffer
|
---|
242 | * @return the number of matching characters, zero for no match
|
---|
243 | */
|
---|
244 | public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);
|
---|
245 |
|
---|
246 | /**
|
---|
247 | * Returns the number of matching characters, zero for no match.
|
---|
248 | * <p>
|
---|
249 | * This method is called to check for a match.
|
---|
250 | * The parameter <code>pos</code> represents the current position to be
|
---|
251 | * checked in the string <code>buffer</code> (a character array which must
|
---|
252 | * not be changed).
|
---|
253 | * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>.
|
---|
254 | * <p>
|
---|
255 | * The matching code may check one character or many.
|
---|
256 | * It may check characters preceeding <code>pos</code> as well as those after.
|
---|
257 | * <p>
|
---|
258 | * It must return zero for no match, or a positive number if a match was found.
|
---|
259 | * The number indicates the number of characters that matched.
|
---|
260 | *
|
---|
261 | * @param buffer the text content to match against, do not change
|
---|
262 | * @param pos the starting position for the match, valid for buffer
|
---|
263 | * @return the number of matching characters, zero for no match
|
---|
264 | * @since 2.4
|
---|
265 | */
|
---|
266 | public int isMatch(char[] buffer, int pos) {
|
---|
267 | return isMatch(buffer, pos, 0, buffer.length);
|
---|
268 | }
|
---|
269 |
|
---|
270 | //-----------------------------------------------------------------------
|
---|
271 | /**
|
---|
272 | * Class used to define a set of characters for matching purposes.
|
---|
273 | */
|
---|
274 | static final class CharSetMatcher extends StrMatcher {
|
---|
275 | /** The set of characters to match. */
|
---|
276 | private final char[] chars;
|
---|
277 |
|
---|
278 | /**
|
---|
279 | * Constructor that creates a matcher from a character array.
|
---|
280 | *
|
---|
281 | * @param chars the characters to match, must not be null
|
---|
282 | */
|
---|
283 | CharSetMatcher(char chars[]) {
|
---|
284 | super();
|
---|
285 | this.chars = (char[]) chars.clone();
|
---|
286 | Arrays.sort(this.chars);
|
---|
287 | }
|
---|
288 |
|
---|
289 | /**
|
---|
290 | * Returns whether or not the given character matches.
|
---|
291 | *
|
---|
292 | * @param buffer the text content to match against, do not change
|
---|
293 | * @param pos the starting position for the match, valid for buffer
|
---|
294 | * @param bufferStart the first active index in the buffer, valid for buffer
|
---|
295 | * @param bufferEnd the end index of the active buffer, valid for buffer
|
---|
296 | * @return the number of matching characters, zero for no match
|
---|
297 | */
|
---|
298 | public int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd) {
|
---|
299 | return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
|
---|
300 | }
|
---|
301 | }
|
---|
302 |
|
---|
303 | //-----------------------------------------------------------------------
|
---|
304 | /**
|
---|
305 | * Class used to define a character for matching purposes.
|
---|
306 | */
|
---|
307 | static final class CharMatcher extends StrMatcher {
|
---|
308 | /** The character to match. */
|
---|
309 | private final char ch;
|
---|
310 |
|
---|
311 | /**
|
---|
312 | * Constructor that creates a matcher that matches a single character.
|
---|
313 | *
|
---|
314 | * @param ch the character to match
|
---|
315 | */
|
---|
316 | CharMatcher(char ch) {
|
---|
317 | super();
|
---|
318 | this.ch = ch;
|
---|
319 | }
|
---|
320 |
|
---|
321 | /**
|
---|
322 | * Returns whether or not the given character matches.
|
---|
323 | *
|
---|
324 | * @param buffer the text content to match against, do not change
|
---|
325 | * @param pos the starting position for the match, valid for buffer
|
---|
326 | * @param bufferStart the first active index in the buffer, valid for buffer
|
---|
327 | * @param bufferEnd the end index of the active buffer, valid for buffer
|
---|
328 | * @return the number of matching characters, zero for no match
|
---|
329 | */
|
---|
330 | public int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd) {
|
---|
331 | return ch == buffer[pos] ? 1 : 0;
|
---|
332 | }
|
---|
333 | }
|
---|
334 |
|
---|
335 | //-----------------------------------------------------------------------
|
---|
336 | /**
|
---|
337 | * Class used to define a set of characters for matching purposes.
|
---|
338 | */
|
---|
339 | static final class StringMatcher extends StrMatcher {
|
---|
340 | /** The string to match, as a character array. */
|
---|
341 | private final char[] chars;
|
---|
342 |
|
---|
343 | /**
|
---|
344 | * Constructor that creates a matcher from a String.
|
---|
345 | *
|
---|
346 | * @param str the string to match, must not be null
|
---|
347 | */
|
---|
348 | StringMatcher(String str) {
|
---|
349 | super();
|
---|
350 | chars = str.toCharArray();
|
---|
351 | }
|
---|
352 |
|
---|
353 | /**
|
---|
354 | * Returns whether or not the given text matches the stored string.
|
---|
355 | *
|
---|
356 | * @param buffer the text content to match against, do not change
|
---|
357 | * @param pos the starting position for the match, valid for buffer
|
---|
358 | * @param bufferStart the first active index in the buffer, valid for buffer
|
---|
359 | * @param bufferEnd the end index of the active buffer, valid for buffer
|
---|
360 | * @return the number of matching characters, zero for no match
|
---|
361 | */
|
---|
362 | public int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd) {
|
---|
363 | int len = chars.length;
|
---|
364 | if (pos + len > bufferEnd) {
|
---|
365 | return 0;
|
---|
366 | }
|
---|
367 | for (int i = 0; i < chars.length; i++, pos++) {
|
---|
368 | if (chars[i] != buffer[pos]) {
|
---|
369 | return 0;
|
---|
370 | }
|
---|
371 | }
|
---|
372 | return len;
|
---|
373 | }
|
---|
374 | }
|
---|
375 |
|
---|
376 | //-----------------------------------------------------------------------
|
---|
377 | /**
|
---|
378 | * Class used to match no characters.
|
---|
379 | */
|
---|
380 | static final class NoMatcher extends StrMatcher {
|
---|
381 |
|
---|
382 | /**
|
---|
383 | * Constructs a new instance of <code>NoMatcher</code>.
|
---|
384 | */
|
---|
385 | NoMatcher() {
|
---|
386 | super();
|
---|
387 | }
|
---|
388 |
|
---|
389 | /**
|
---|
390 | * Always returns <code>false</code>.
|
---|
391 | *
|
---|
392 | * @param buffer the text content to match against, do not change
|
---|
393 | * @param pos the starting position for the match, valid for buffer
|
---|
394 | * @param bufferStart the first active index in the buffer, valid for buffer
|
---|
395 | * @param bufferEnd the end index of the active buffer, valid for buffer
|
---|
396 | * @return the number of matching characters, zero for no match
|
---|
397 | */
|
---|
398 | public int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd) {
|
---|
399 | return 0;
|
---|
400 | }
|
---|
401 | }
|
---|
402 |
|
---|
403 | //-----------------------------------------------------------------------
|
---|
404 | /**
|
---|
405 | * Class used to match whitespace as per trim().
|
---|
406 | */
|
---|
407 | static final class TrimMatcher extends StrMatcher {
|
---|
408 |
|
---|
409 | /**
|
---|
410 | * Constructs a new instance of <code>TrimMatcher</code>.
|
---|
411 | */
|
---|
412 | TrimMatcher() {
|
---|
413 | super();
|
---|
414 | }
|
---|
415 |
|
---|
416 | /**
|
---|
417 | * Returns whether or not the given character matches.
|
---|
418 | *
|
---|
419 | * @param buffer the text content to match against, do not change
|
---|
420 | * @param pos the starting position for the match, valid for buffer
|
---|
421 | * @param bufferStart the first active index in the buffer, valid for buffer
|
---|
422 | * @param bufferEnd the end index of the active buffer, valid for buffer
|
---|
423 | * @return the number of matching characters, zero for no match
|
---|
424 | */
|
---|
425 | public int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd) {
|
---|
426 | return buffer[pos] <= 32 ? 1 : 0;
|
---|
427 | }
|
---|
428 | }
|
---|
429 |
|
---|
430 | }
|
---|