1 | /*
|
---|
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
|
---|
3 | * contributor license agreements. See the NOTICE file distributed with
|
---|
4 | * this work for additional information regarding copyright ownership.
|
---|
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
|
---|
6 | * (the "License"); you may not use this file except in compliance with
|
---|
7 | * the License. You may obtain a copy of the License at
|
---|
8 | *
|
---|
9 | * http://www.apache.org/licenses/LICENSE-2.0
|
---|
10 | *
|
---|
11 | * Unless required by applicable law or agreed to in writing, software
|
---|
12 | * distributed under the License is distributed on an "AS IS" BASIS,
|
---|
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
---|
14 | * See the License for the specific language governing permissions and
|
---|
15 | * limitations under the License.
|
---|
16 | */
|
---|
17 | package agents.org.apache.commons.lang.text;
|
---|
18 |
|
---|
19 | import java.util.ArrayList;
|
---|
20 | import java.util.Collections;
|
---|
21 | import java.util.List;
|
---|
22 | import java.util.ListIterator;
|
---|
23 | import java.util.NoSuchElementException;
|
---|
24 |
|
---|
25 | /**
|
---|
26 | * Tokenizes a string based based on delimiters (separators)
|
---|
27 | * and supporting quoting and ignored character concepts.
|
---|
28 | * <p>
|
---|
29 | * This class can split a String into many smaller strings. It aims
|
---|
30 | * to do a similar job to {@link java.util.StringTokenizer StringTokenizer},
|
---|
31 | * however it offers much more control and flexibility including implementing
|
---|
32 | * the <code>ListIterator</code> interface. By default, it is set up
|
---|
33 | * like <code>StringTokenizer</code>.
|
---|
34 | * <p>
|
---|
35 | * The input String is split into a number of <i>tokens</i>.
|
---|
36 | * Each token is separated from the next String by a <i>delimiter</i>.
|
---|
37 | * One or more delimiter characters must be specified.
|
---|
38 | * <p>
|
---|
39 | * Each token may be surrounded by quotes.
|
---|
40 | * The <i>quote</i> matcher specifies the quote character(s).
|
---|
41 | * A quote may be escaped within a quoted section by duplicating itself.
|
---|
42 | * <p>
|
---|
43 | * Between each token and the delimiter are potentially characters that need trimming.
|
---|
44 | * The <i>trimmer</i> matcher specifies these characters.
|
---|
45 | * One usage might be to trim whitespace characters.
|
---|
46 | * <p>
|
---|
47 | * At any point outside the quotes there might potentially be invalid characters.
|
---|
48 | * The <i>ignored</i> matcher specifies these characters to be removed.
|
---|
49 | * One usage might be to remove new line characters.
|
---|
50 | * <p>
|
---|
51 | * Empty tokens may be removed or returned as null.
|
---|
52 | * <pre>
|
---|
53 | * "a,b,c" - Three tokens "a","b","c" (comma delimiter)
|
---|
54 | * " a, b , c " - Three tokens "a","b","c" (default CSV processing trims whitespace)
|
---|
55 | * "a, ", b ,", c" - Three tokens "a, " , " b ", ", c" (quoted text untouched)
|
---|
56 | * </pre>
|
---|
57 | * <p>
|
---|
58 | *
|
---|
59 | * This tokenizer has the following properties and options:
|
---|
60 | *
|
---|
61 | * <table>
|
---|
62 | * <tr>
|
---|
63 | * <th>Property</th><th>Type</th><th>Default</th>
|
---|
64 | * </tr>
|
---|
65 | * <tr>
|
---|
66 | * <td>delim</td><td>CharSetMatcher</td><td>{ \t\n\r\f}</td>
|
---|
67 | * </tr>
|
---|
68 | * <tr>
|
---|
69 | * <td>quote</td><td>NoneMatcher</td><td>{}</td>
|
---|
70 | * </tr>
|
---|
71 | * <tr>
|
---|
72 | * <td>ignore</td><td>NoneMatcher</td><td>{}</td>
|
---|
73 | * </tr>
|
---|
74 | * <tr>
|
---|
75 | * <td>emptyTokenAsNull</td><td>boolean</td><td>false</td>
|
---|
76 | * </tr>
|
---|
77 | * <tr>
|
---|
78 | * <td>ignoreEmptyTokens</td><td>boolean</td><td>true</td>
|
---|
79 | * </tr>
|
---|
80 | * </table>
|
---|
81 | *
|
---|
82 | * @author Apache Software Foundation
|
---|
83 | * @author Matthew Inger
|
---|
84 | * @author Gary D. Gregory
|
---|
85 | * @since 2.2
|
---|
86 | * @version $Id: StrTokenizer.java 907631 2010-02-08 12:22:48Z sebb $
|
---|
87 | */
|
---|
88 | public class StrTokenizer implements ListIterator, Cloneable {
|
---|
89 |
|
---|
90 | private static final StrTokenizer CSV_TOKENIZER_PROTOTYPE;
|
---|
91 | private static final StrTokenizer TSV_TOKENIZER_PROTOTYPE;
|
---|
92 | static {
|
---|
93 | CSV_TOKENIZER_PROTOTYPE = new StrTokenizer();
|
---|
94 | CSV_TOKENIZER_PROTOTYPE.setDelimiterMatcher(StrMatcher.commaMatcher());
|
---|
95 | CSV_TOKENIZER_PROTOTYPE.setQuoteMatcher(StrMatcher.doubleQuoteMatcher());
|
---|
96 | CSV_TOKENIZER_PROTOTYPE.setIgnoredMatcher(StrMatcher.noneMatcher());
|
---|
97 | CSV_TOKENIZER_PROTOTYPE.setTrimmerMatcher(StrMatcher.trimMatcher());
|
---|
98 | CSV_TOKENIZER_PROTOTYPE.setEmptyTokenAsNull(false);
|
---|
99 | CSV_TOKENIZER_PROTOTYPE.setIgnoreEmptyTokens(false);
|
---|
100 |
|
---|
101 | TSV_TOKENIZER_PROTOTYPE = new StrTokenizer();
|
---|
102 | TSV_TOKENIZER_PROTOTYPE.setDelimiterMatcher(StrMatcher.tabMatcher());
|
---|
103 | TSV_TOKENIZER_PROTOTYPE.setQuoteMatcher(StrMatcher.doubleQuoteMatcher());
|
---|
104 | TSV_TOKENIZER_PROTOTYPE.setIgnoredMatcher(StrMatcher.noneMatcher());
|
---|
105 | TSV_TOKENIZER_PROTOTYPE.setTrimmerMatcher(StrMatcher.trimMatcher());
|
---|
106 | TSV_TOKENIZER_PROTOTYPE.setEmptyTokenAsNull(false);
|
---|
107 | TSV_TOKENIZER_PROTOTYPE.setIgnoreEmptyTokens(false);
|
---|
108 | }
|
---|
109 |
|
---|
110 | /** The text to work on. */
|
---|
111 | private char chars[];
|
---|
112 | /** The parsed tokens */
|
---|
113 | private String tokens[];
|
---|
114 | /** The current iteration position */
|
---|
115 | private int tokenPos;
|
---|
116 |
|
---|
117 | /** The delimiter matcher */
|
---|
118 | private StrMatcher delimMatcher = StrMatcher.splitMatcher();
|
---|
119 | /** The quote matcher */
|
---|
120 | private StrMatcher quoteMatcher = StrMatcher.noneMatcher();
|
---|
121 | /** The ignored matcher */
|
---|
122 | private StrMatcher ignoredMatcher = StrMatcher.noneMatcher();
|
---|
123 | /** The trimmer matcher */
|
---|
124 | private StrMatcher trimmerMatcher = StrMatcher.noneMatcher();
|
---|
125 |
|
---|
126 | /** Whether to return empty tokens as null */
|
---|
127 | private boolean emptyAsNull = false;
|
---|
128 | /** Whether to ignore empty tokens */
|
---|
129 | private boolean ignoreEmptyTokens = true;
|
---|
130 |
|
---|
131 | //-----------------------------------------------------------------------
|
---|
132 |
|
---|
133 | /**
|
---|
134 | * Returns a clone of <code>CSV_TOKENIZER_PROTOTYPE</code>.
|
---|
135 | *
|
---|
136 | * @return a clone of <code>CSV_TOKENIZER_PROTOTYPE</code>.
|
---|
137 | */
|
---|
138 | private static StrTokenizer getCSVClone() {
|
---|
139 | return (StrTokenizer) CSV_TOKENIZER_PROTOTYPE.clone();
|
---|
140 | }
|
---|
141 |
|
---|
142 | /**
|
---|
143 | * Gets a new tokenizer instance which parses Comma Separated Value strings
|
---|
144 | * initializing it with the given input. The default for CSV processing
|
---|
145 | * will be trim whitespace from both ends (which can be overridden with
|
---|
146 | * the setTrimmer method).
|
---|
147 | * <p>
|
---|
148 | * You must call a "reset" method to set the string which you want to parse.
|
---|
149 | * @return a new tokenizer instance which parses Comma Separated Value strings
|
---|
150 | */
|
---|
151 | public static StrTokenizer getCSVInstance() {
|
---|
152 | return getCSVClone();
|
---|
153 | }
|
---|
154 |
|
---|
155 | /**
|
---|
156 | * Gets a new tokenizer instance which parses Comma Separated Value strings
|
---|
157 | * initializing it with the given input. The default for CSV processing
|
---|
158 | * will be trim whitespace from both ends (which can be overridden with
|
---|
159 | * the setTrimmer method).
|
---|
160 | *
|
---|
161 | * @param input the text to parse
|
---|
162 | * @return a new tokenizer instance which parses Comma Separated Value strings
|
---|
163 | */
|
---|
164 | public static StrTokenizer getCSVInstance(String input) {
|
---|
165 | StrTokenizer tok = getCSVClone();
|
---|
166 | tok.reset(input);
|
---|
167 | return tok;
|
---|
168 | }
|
---|
169 |
|
---|
170 | /**
|
---|
171 | * Gets a new tokenizer instance which parses Comma Separated Value strings
|
---|
172 | * initializing it with the given input. The default for CSV processing
|
---|
173 | * will be trim whitespace from both ends (which can be overridden with
|
---|
174 | * the setTrimmer method).
|
---|
175 | *
|
---|
176 | * @param input the text to parse
|
---|
177 | * @return a new tokenizer instance which parses Comma Separated Value strings
|
---|
178 | */
|
---|
179 | public static StrTokenizer getCSVInstance(char[] input) {
|
---|
180 | StrTokenizer tok = getCSVClone();
|
---|
181 | tok.reset(input);
|
---|
182 | return tok;
|
---|
183 | }
|
---|
184 |
|
---|
185 | /**
|
---|
186 | * Returns a clone of <code>TSV_TOKENIZER_PROTOTYPE</code>.
|
---|
187 | *
|
---|
188 | * @return a clone of <code>TSV_TOKENIZER_PROTOTYPE</code>.
|
---|
189 | */
|
---|
190 | private static StrTokenizer getTSVClone() {
|
---|
191 | return (StrTokenizer) TSV_TOKENIZER_PROTOTYPE.clone();
|
---|
192 | }
|
---|
193 |
|
---|
194 |
|
---|
195 | /**
|
---|
196 | * Gets a new tokenizer instance which parses Tab Separated Value strings.
|
---|
197 | * The default for CSV processing will be trim whitespace from both ends
|
---|
198 | * (which can be overridden with the setTrimmer method).
|
---|
199 | * <p>
|
---|
200 | * You must call a "reset" method to set the string which you want to parse.
|
---|
201 | * @return a new tokenizer instance which parses Tab Separated Value strings.
|
---|
202 | */
|
---|
203 | public static StrTokenizer getTSVInstance() {
|
---|
204 | return getTSVClone();
|
---|
205 | }
|
---|
206 |
|
---|
207 | /**
|
---|
208 | * Gets a new tokenizer instance which parses Tab Separated Value strings.
|
---|
209 | * The default for CSV processing will be trim whitespace from both ends
|
---|
210 | * (which can be overridden with the setTrimmer method).
|
---|
211 | * @param input the string to parse
|
---|
212 | * @return a new tokenizer instance which parses Tab Separated Value strings.
|
---|
213 | */
|
---|
214 | public static StrTokenizer getTSVInstance(String input) {
|
---|
215 | StrTokenizer tok = getTSVClone();
|
---|
216 | tok.reset(input);
|
---|
217 | return tok;
|
---|
218 | }
|
---|
219 |
|
---|
220 | /**
|
---|
221 | * Gets a new tokenizer instance which parses Tab Separated Value strings.
|
---|
222 | * The default for CSV processing will be trim whitespace from both ends
|
---|
223 | * (which can be overridden with the setTrimmer method).
|
---|
224 | * @param input the string to parse
|
---|
225 | * @return a new tokenizer instance which parses Tab Separated Value strings.
|
---|
226 | */
|
---|
227 | public static StrTokenizer getTSVInstance(char[] input) {
|
---|
228 | StrTokenizer tok = getTSVClone();
|
---|
229 | tok.reset(input);
|
---|
230 | return tok;
|
---|
231 | }
|
---|
232 |
|
---|
233 | //-----------------------------------------------------------------------
|
---|
234 | /**
|
---|
235 | * Constructs a tokenizer splitting on space, tab, newline and formfeed
|
---|
236 | * as per StringTokenizer, but with no text to tokenize.
|
---|
237 | * <p>
|
---|
238 | * This constructor is normally used with {@link #reset(String)}.
|
---|
239 | */
|
---|
240 | public StrTokenizer() {
|
---|
241 | super();
|
---|
242 | this.chars = null;
|
---|
243 | }
|
---|
244 |
|
---|
245 | /**
|
---|
246 | * Constructs a tokenizer splitting on space, tab, newline and formfeed
|
---|
247 | * as per StringTokenizer.
|
---|
248 | *
|
---|
249 | * @param input the string which is to be parsed
|
---|
250 | */
|
---|
251 | public StrTokenizer(String input) {
|
---|
252 | super();
|
---|
253 | if (input != null) {
|
---|
254 | chars = input.toCharArray();
|
---|
255 | } else {
|
---|
256 | chars = null;
|
---|
257 | }
|
---|
258 | }
|
---|
259 |
|
---|
260 | /**
|
---|
261 | * Constructs a tokenizer splitting on the specified delimiter character.
|
---|
262 | *
|
---|
263 | * @param input the string which is to be parsed
|
---|
264 | * @param delim the field delimiter character
|
---|
265 | */
|
---|
266 | public StrTokenizer(String input, char delim) {
|
---|
267 | this(input);
|
---|
268 | setDelimiterChar(delim);
|
---|
269 | }
|
---|
270 |
|
---|
271 | /**
|
---|
272 | * Constructs a tokenizer splitting on the specified delimiter string.
|
---|
273 | *
|
---|
274 | * @param input the string which is to be parsed
|
---|
275 | * @param delim the field delimiter string
|
---|
276 | */
|
---|
277 | public StrTokenizer(String input, String delim) {
|
---|
278 | this(input);
|
---|
279 | setDelimiterString(delim);
|
---|
280 | }
|
---|
281 |
|
---|
282 | /**
|
---|
283 | * Constructs a tokenizer splitting using the specified delimiter matcher.
|
---|
284 | *
|
---|
285 | * @param input the string which is to be parsed
|
---|
286 | * @param delim the field delimiter matcher
|
---|
287 | */
|
---|
288 | public StrTokenizer(String input, StrMatcher delim) {
|
---|
289 | this(input);
|
---|
290 | setDelimiterMatcher(delim);
|
---|
291 | }
|
---|
292 |
|
---|
293 | /**
|
---|
294 | * Constructs a tokenizer splitting on the specified delimiter character
|
---|
295 | * and handling quotes using the specified quote character.
|
---|
296 | *
|
---|
297 | * @param input the string which is to be parsed
|
---|
298 | * @param delim the field delimiter character
|
---|
299 | * @param quote the field quoted string character
|
---|
300 | */
|
---|
301 | public StrTokenizer(String input, char delim, char quote) {
|
---|
302 | this(input, delim);
|
---|
303 | setQuoteChar(quote);
|
---|
304 | }
|
---|
305 |
|
---|
306 | /**
|
---|
307 | * Constructs a tokenizer splitting using the specified delimiter matcher
|
---|
308 | * and handling quotes using the specified quote matcher.
|
---|
309 | *
|
---|
310 | * @param input the string which is to be parsed
|
---|
311 | * @param delim the field delimiter matcher
|
---|
312 | * @param quote the field quoted string matcher
|
---|
313 | */
|
---|
314 | public StrTokenizer(String input, StrMatcher delim, StrMatcher quote) {
|
---|
315 | this(input, delim);
|
---|
316 | setQuoteMatcher(quote);
|
---|
317 | }
|
---|
318 |
|
---|
319 | /**
|
---|
320 | * Constructs a tokenizer splitting on space, tab, newline and formfeed
|
---|
321 | * as per StringTokenizer.
|
---|
322 | * <p>
|
---|
323 | * The input character array is not cloned, and must not be altered after
|
---|
324 | * passing in to this method.
|
---|
325 | *
|
---|
326 | * @param input the string which is to be parsed, not cloned
|
---|
327 | */
|
---|
328 | public StrTokenizer(char[] input) {
|
---|
329 | super();
|
---|
330 | this.chars = input;
|
---|
331 | }
|
---|
332 |
|
---|
333 | /**
|
---|
334 | * Constructs a tokenizer splitting on the specified character.
|
---|
335 | * <p>
|
---|
336 | * The input character array is not cloned, and must not be altered after
|
---|
337 | * passing in to this method.
|
---|
338 | *
|
---|
339 | * @param input the string which is to be parsed, not cloned
|
---|
340 | * @param delim the field delimiter character
|
---|
341 | */
|
---|
342 | public StrTokenizer(char[] input, char delim) {
|
---|
343 | this(input);
|
---|
344 | setDelimiterChar(delim);
|
---|
345 | }
|
---|
346 |
|
---|
347 | /**
|
---|
348 | * Constructs a tokenizer splitting on the specified string.
|
---|
349 | * <p>
|
---|
350 | * The input character array is not cloned, and must not be altered after
|
---|
351 | * passing in to this method.
|
---|
352 | *
|
---|
353 | * @param input the string which is to be parsed, not cloned
|
---|
354 | * @param delim the field delimiter string
|
---|
355 | */
|
---|
356 | public StrTokenizer(char[] input, String delim) {
|
---|
357 | this(input);
|
---|
358 | setDelimiterString(delim);
|
---|
359 | }
|
---|
360 |
|
---|
361 | /**
|
---|
362 | * Constructs a tokenizer splitting using the specified delimiter matcher.
|
---|
363 | * <p>
|
---|
364 | * The input character array is not cloned, and must not be altered after
|
---|
365 | * passing in to this method.
|
---|
366 | *
|
---|
367 | * @param input the string which is to be parsed, not cloned
|
---|
368 | * @param delim the field delimiter matcher
|
---|
369 | */
|
---|
370 | public StrTokenizer(char[] input, StrMatcher delim) {
|
---|
371 | this(input);
|
---|
372 | setDelimiterMatcher(delim);
|
---|
373 | }
|
---|
374 |
|
---|
375 | /**
|
---|
376 | * Constructs a tokenizer splitting on the specified delimiter character
|
---|
377 | * and handling quotes using the specified quote character.
|
---|
378 | * <p>
|
---|
379 | * The input character array is not cloned, and must not be altered after
|
---|
380 | * passing in to this method.
|
---|
381 | *
|
---|
382 | * @param input the string which is to be parsed, not cloned
|
---|
383 | * @param delim the field delimiter character
|
---|
384 | * @param quote the field quoted string character
|
---|
385 | */
|
---|
386 | public StrTokenizer(char[] input, char delim, char quote) {
|
---|
387 | this(input, delim);
|
---|
388 | setQuoteChar(quote);
|
---|
389 | }
|
---|
390 |
|
---|
391 | /**
|
---|
392 | * Constructs a tokenizer splitting using the specified delimiter matcher
|
---|
393 | * and handling quotes using the specified quote matcher.
|
---|
394 | * <p>
|
---|
395 | * The input character array is not cloned, and must not be altered after
|
---|
396 | * passing in to this method.
|
---|
397 | *
|
---|
398 | * @param input the string which is to be parsed, not cloned
|
---|
399 | * @param delim the field delimiter character
|
---|
400 | * @param quote the field quoted string character
|
---|
401 | */
|
---|
402 | public StrTokenizer(char[] input, StrMatcher delim, StrMatcher quote) {
|
---|
403 | this(input, delim);
|
---|
404 | setQuoteMatcher(quote);
|
---|
405 | }
|
---|
406 |
|
---|
407 | // API
|
---|
408 | //-----------------------------------------------------------------------
|
---|
409 | /**
|
---|
410 | * Gets the number of tokens found in the String.
|
---|
411 | *
|
---|
412 | * @return the number of matched tokens
|
---|
413 | */
|
---|
414 | public int size() {
|
---|
415 | checkTokenized();
|
---|
416 | return tokens.length;
|
---|
417 | }
|
---|
418 |
|
---|
419 | /**
|
---|
420 | * Gets the next token from the String.
|
---|
421 | * Equivalent to {@link #next()} except it returns null rather than
|
---|
422 | * throwing {@link NoSuchElementException} when no tokens remain.
|
---|
423 | *
|
---|
424 | * @return the next sequential token, or null when no more tokens are found
|
---|
425 | */
|
---|
426 | public String nextToken() {
|
---|
427 | if (hasNext()) {
|
---|
428 | return tokens[tokenPos++];
|
---|
429 | }
|
---|
430 | return null;
|
---|
431 | }
|
---|
432 |
|
---|
433 | /**
|
---|
434 | * Gets the previous token from the String.
|
---|
435 | *
|
---|
436 | * @return the previous sequential token, or null when no more tokens are found
|
---|
437 | */
|
---|
438 | public String previousToken() {
|
---|
439 | if (hasPrevious()) {
|
---|
440 | return tokens[--tokenPos];
|
---|
441 | }
|
---|
442 | return null;
|
---|
443 | }
|
---|
444 |
|
---|
445 | /**
|
---|
446 | * Gets a copy of the full token list as an independent modifiable array.
|
---|
447 | *
|
---|
448 | * @return the tokens as a String array
|
---|
449 | */
|
---|
450 | public String[] getTokenArray() {
|
---|
451 | checkTokenized();
|
---|
452 | return (String[]) tokens.clone();
|
---|
453 | }
|
---|
454 |
|
---|
455 | /**
|
---|
456 | * Gets a copy of the full token list as an independent modifiable list.
|
---|
457 | *
|
---|
458 | * @return the tokens as a String array
|
---|
459 | */
|
---|
460 | public List getTokenList() {
|
---|
461 | checkTokenized();
|
---|
462 | List list = new ArrayList(tokens.length);
|
---|
463 | for (int i = 0; i < tokens.length; i++) {
|
---|
464 | list.add(tokens[i]);
|
---|
465 | }
|
---|
466 | return list;
|
---|
467 | }
|
---|
468 |
|
---|
469 | /**
|
---|
470 | * Resets this tokenizer, forgetting all parsing and iteration already completed.
|
---|
471 | * <p>
|
---|
472 | * This method allows the same tokenizer to be reused for the same String.
|
---|
473 | *
|
---|
474 | * @return this, to enable chaining
|
---|
475 | */
|
---|
476 | public StrTokenizer reset() {
|
---|
477 | tokenPos = 0;
|
---|
478 | tokens = null;
|
---|
479 | return this;
|
---|
480 | }
|
---|
481 |
|
---|
482 | /**
|
---|
483 | * Reset this tokenizer, giving it a new input string to parse.
|
---|
484 | * In this manner you can re-use a tokenizer with the same settings
|
---|
485 | * on multiple input lines.
|
---|
486 | *
|
---|
487 | * @param input the new string to tokenize, null sets no text to parse
|
---|
488 | * @return this, to enable chaining
|
---|
489 | */
|
---|
490 | public StrTokenizer reset(String input) {
|
---|
491 | reset();
|
---|
492 | if (input != null) {
|
---|
493 | this.chars = input.toCharArray();
|
---|
494 | } else {
|
---|
495 | this.chars = null;
|
---|
496 | }
|
---|
497 | return this;
|
---|
498 | }
|
---|
499 |
|
---|
500 | /**
|
---|
501 | * Reset this tokenizer, giving it a new input string to parse.
|
---|
502 | * In this manner you can re-use a tokenizer with the same settings
|
---|
503 | * on multiple input lines.
|
---|
504 | * <p>
|
---|
505 | * The input character array is not cloned, and must not be altered after
|
---|
506 | * passing in to this method.
|
---|
507 | *
|
---|
508 | * @param input the new character array to tokenize, not cloned, null sets no text to parse
|
---|
509 | * @return this, to enable chaining
|
---|
510 | */
|
---|
511 | public StrTokenizer reset(char[] input) {
|
---|
512 | reset();
|
---|
513 | this.chars = input;
|
---|
514 | return this;
|
---|
515 | }
|
---|
516 |
|
---|
517 | // ListIterator
|
---|
518 | //-----------------------------------------------------------------------
|
---|
519 | /**
|
---|
520 | * Checks whether there are any more tokens.
|
---|
521 | *
|
---|
522 | * @return true if there are more tokens
|
---|
523 | */
|
---|
524 | public boolean hasNext() {
|
---|
525 | checkTokenized();
|
---|
526 | return tokenPos < tokens.length;
|
---|
527 | }
|
---|
528 |
|
---|
529 | /**
|
---|
530 | * Gets the next token.
|
---|
531 | *
|
---|
532 | * @return the next String token
|
---|
533 | * @throws NoSuchElementException if there are no more elements
|
---|
534 | */
|
---|
535 | public Object next() {
|
---|
536 | if (hasNext()) {
|
---|
537 | return tokens[tokenPos++];
|
---|
538 | }
|
---|
539 | throw new NoSuchElementException();
|
---|
540 | }
|
---|
541 |
|
---|
542 | /**
|
---|
543 | * Gets the index of the next token to return.
|
---|
544 | *
|
---|
545 | * @return the next token index
|
---|
546 | */
|
---|
547 | public int nextIndex() {
|
---|
548 | return tokenPos;
|
---|
549 | }
|
---|
550 |
|
---|
551 | /**
|
---|
552 | * Checks whether there are any previous tokens that can be iterated to.
|
---|
553 | *
|
---|
554 | * @return true if there are previous tokens
|
---|
555 | */
|
---|
556 | public boolean hasPrevious() {
|
---|
557 | checkTokenized();
|
---|
558 | return tokenPos > 0;
|
---|
559 | }
|
---|
560 |
|
---|
561 | /**
|
---|
562 | * Gets the token previous to the last returned token.
|
---|
563 | *
|
---|
564 | * @return the previous token
|
---|
565 | */
|
---|
566 | public Object previous() {
|
---|
567 | if (hasPrevious()) {
|
---|
568 | return tokens[--tokenPos];
|
---|
569 | }
|
---|
570 | throw new NoSuchElementException();
|
---|
571 | }
|
---|
572 |
|
---|
573 | /**
|
---|
574 | * Gets the index of the previous token.
|
---|
575 | *
|
---|
576 | * @return the previous token index
|
---|
577 | */
|
---|
578 | public int previousIndex() {
|
---|
579 | return tokenPos - 1;
|
---|
580 | }
|
---|
581 |
|
---|
582 | /**
|
---|
583 | * Unsupported ListIterator operation.
|
---|
584 | *
|
---|
585 | * @throws UnsupportedOperationException always
|
---|
586 | */
|
---|
587 | public void remove() {
|
---|
588 | throw new UnsupportedOperationException("remove() is unsupported");
|
---|
589 | }
|
---|
590 |
|
---|
591 | /**
|
---|
592 | * Unsupported ListIterator operation.
|
---|
593 | * @param obj this parameter ignored.
|
---|
594 | * @throws UnsupportedOperationException always
|
---|
595 | */
|
---|
596 | public void set(Object obj) {
|
---|
597 | throw new UnsupportedOperationException("set() is unsupported");
|
---|
598 | }
|
---|
599 |
|
---|
600 | /**
|
---|
601 | * Unsupported ListIterator operation.
|
---|
602 | * @param obj this parameter ignored.
|
---|
603 | * @throws UnsupportedOperationException always
|
---|
604 | */
|
---|
605 | public void add(Object obj) {
|
---|
606 | throw new UnsupportedOperationException("add() is unsupported");
|
---|
607 | }
|
---|
608 |
|
---|
609 | // Implementation
|
---|
610 | //-----------------------------------------------------------------------
|
---|
611 | /**
|
---|
612 | * Checks if tokenization has been done, and if not then do it.
|
---|
613 | */
|
---|
614 | private void checkTokenized() {
|
---|
615 | if (tokens == null) {
|
---|
616 | if (chars == null) {
|
---|
617 | // still call tokenize as subclass may do some work
|
---|
618 | List split = tokenize(null, 0, 0);
|
---|
619 | tokens = (String[]) split.toArray(new String[split.size()]);
|
---|
620 | } else {
|
---|
621 | List split = tokenize(chars, 0, chars.length);
|
---|
622 | tokens = (String[]) split.toArray(new String[split.size()]);
|
---|
623 | }
|
---|
624 | }
|
---|
625 | }
|
---|
626 |
|
---|
627 | /**
|
---|
628 | * Internal method to performs the tokenization.
|
---|
629 | * <p>
|
---|
630 | * Most users of this class do not need to call this method. This method
|
---|
631 | * will be called automatically by other (public) methods when required.
|
---|
632 | * <p>
|
---|
633 | * This method exists to allow subclasses to add code before or after the
|
---|
634 | * tokenization. For example, a subclass could alter the character array,
|
---|
635 | * offset or count to be parsed, or call the tokenizer multiple times on
|
---|
636 | * multiple strings. It is also be possible to filter the results.
|
---|
637 | * <p>
|
---|
638 | * <code>StrTokenizer</code> will always pass a zero offset and a count
|
---|
639 | * equal to the length of the array to this method, however a subclass
|
---|
640 | * may pass other values, or even an entirely different array.
|
---|
641 | *
|
---|
642 | * @param chars the character array being tokenized, may be null
|
---|
643 | * @param offset the start position within the character array, must be valid
|
---|
644 | * @param count the number of characters to tokenize, must be valid
|
---|
645 | * @return the modifiable list of String tokens, unmodifiable if null array or zero count
|
---|
646 | */
|
---|
647 | protected List tokenize(char[] chars, int offset, int count) {
|
---|
648 | if (chars == null || count == 0) {
|
---|
649 | return Collections.EMPTY_LIST;
|
---|
650 | }
|
---|
651 | StrBuilder buf = new StrBuilder();
|
---|
652 | List tokens = new ArrayList();
|
---|
653 | int pos = offset;
|
---|
654 |
|
---|
655 | // loop around the entire buffer
|
---|
656 | while (pos >= 0 && pos < count) {
|
---|
657 | // find next token
|
---|
658 | pos = readNextToken(chars, pos, count, buf, tokens);
|
---|
659 |
|
---|
660 | // handle case where end of string is a delimiter
|
---|
661 | if (pos >= count) {
|
---|
662 | addToken(tokens, "");
|
---|
663 | }
|
---|
664 | }
|
---|
665 | return tokens;
|
---|
666 | }
|
---|
667 |
|
---|
668 | /**
|
---|
669 | * Adds a token to a list, paying attention to the parameters we've set.
|
---|
670 | *
|
---|
671 | * @param list the list to add to
|
---|
672 | * @param tok the token to add
|
---|
673 | */
|
---|
674 | private void addToken(List list, String tok) {
|
---|
675 | if (tok == null || tok.length() == 0) {
|
---|
676 | if (isIgnoreEmptyTokens()) {
|
---|
677 | return;
|
---|
678 | }
|
---|
679 | if (isEmptyTokenAsNull()) {
|
---|
680 | tok = null;
|
---|
681 | }
|
---|
682 | }
|
---|
683 | list.add(tok);
|
---|
684 | }
|
---|
685 |
|
---|
686 | /**
|
---|
687 | * Reads character by character through the String to get the next token.
|
---|
688 | *
|
---|
689 | * @param chars the character array being tokenized
|
---|
690 | * @param start the first character of field
|
---|
691 | * @param len the length of the character array being tokenized
|
---|
692 | * @param workArea a temporary work area
|
---|
693 | * @param tokens the list of parsed tokens
|
---|
694 | * @return the starting position of the next field (the character
|
---|
695 | * immediately after the delimiter), or -1 if end of string found
|
---|
696 | */
|
---|
697 | private int readNextToken(char[] chars, int start, int len, StrBuilder workArea, List tokens) {
|
---|
698 | // skip all leading whitespace, unless it is the
|
---|
699 | // field delimiter or the quote character
|
---|
700 | while (start < len) {
|
---|
701 | int removeLen = Math.max(
|
---|
702 | getIgnoredMatcher().isMatch(chars, start, start, len),
|
---|
703 | getTrimmerMatcher().isMatch(chars, start, start, len));
|
---|
704 | if (removeLen == 0 ||
|
---|
705 | getDelimiterMatcher().isMatch(chars, start, start, len) > 0 ||
|
---|
706 | getQuoteMatcher().isMatch(chars, start, start, len) > 0) {
|
---|
707 | break;
|
---|
708 | }
|
---|
709 | start += removeLen;
|
---|
710 | }
|
---|
711 |
|
---|
712 | // handle reaching end
|
---|
713 | if (start >= len) {
|
---|
714 | addToken(tokens, "");
|
---|
715 | return -1;
|
---|
716 | }
|
---|
717 |
|
---|
718 | // handle empty token
|
---|
719 | int delimLen = getDelimiterMatcher().isMatch(chars, start, start, len);
|
---|
720 | if (delimLen > 0) {
|
---|
721 | addToken(tokens, "");
|
---|
722 | return start + delimLen;
|
---|
723 | }
|
---|
724 |
|
---|
725 | // handle found token
|
---|
726 | int quoteLen = getQuoteMatcher().isMatch(chars, start, start, len);
|
---|
727 | if (quoteLen > 0) {
|
---|
728 | return readWithQuotes(chars, start + quoteLen, len, workArea, tokens, start, quoteLen);
|
---|
729 | }
|
---|
730 | return readWithQuotes(chars, start, len, workArea, tokens, 0, 0);
|
---|
731 | }
|
---|
732 |
|
---|
733 | /**
|
---|
734 | * Reads a possibly quoted string token.
|
---|
735 | *
|
---|
736 | * @param chars the character array being tokenized
|
---|
737 | * @param start the first character of field
|
---|
738 | * @param len the length of the character array being tokenized
|
---|
739 | * @param workArea a temporary work area
|
---|
740 | * @param tokens the list of parsed tokens
|
---|
741 | * @param quoteStart the start position of the matched quote, 0 if no quoting
|
---|
742 | * @param quoteLen the length of the matched quote, 0 if no quoting
|
---|
743 | * @return the starting position of the next field (the character
|
---|
744 | * immediately after the delimiter, or if end of string found,
|
---|
745 | * then the length of string
|
---|
746 | */
|
---|
747 | private int readWithQuotes(char[] chars, int start, int len, StrBuilder workArea,
|
---|
748 | List tokens, int quoteStart, int quoteLen)
|
---|
749 | {
|
---|
750 | // Loop until we've found the end of the quoted
|
---|
751 | // string or the end of the input
|
---|
752 | workArea.clear();
|
---|
753 | int pos = start;
|
---|
754 | boolean quoting = (quoteLen > 0);
|
---|
755 | int trimStart = 0;
|
---|
756 |
|
---|
757 | while (pos < len) {
|
---|
758 | // quoting mode can occur several times throughout a string
|
---|
759 | // we must switch between quoting and non-quoting until we
|
---|
760 | // encounter a non-quoted delimiter, or end of string
|
---|
761 | if (quoting) {
|
---|
762 | // In quoting mode
|
---|
763 |
|
---|
764 | // If we've found a quote character, see if it's
|
---|
765 | // followed by a second quote. If so, then we need
|
---|
766 | // to actually put the quote character into the token
|
---|
767 | // rather than end the token.
|
---|
768 | if (isQuote(chars, pos, len, quoteStart, quoteLen)) {
|
---|
769 | if (isQuote(chars, pos + quoteLen, len, quoteStart, quoteLen)) {
|
---|
770 | // matched pair of quotes, thus an escaped quote
|
---|
771 | workArea.append(chars, pos, quoteLen);
|
---|
772 | pos += (quoteLen * 2);
|
---|
773 | trimStart = workArea.size();
|
---|
774 | continue;
|
---|
775 | }
|
---|
776 |
|
---|
777 | // end of quoting
|
---|
778 | quoting = false;
|
---|
779 | pos += quoteLen;
|
---|
780 | continue;
|
---|
781 | }
|
---|
782 |
|
---|
783 | // copy regular character from inside quotes
|
---|
784 | workArea.append(chars[pos++]);
|
---|
785 | trimStart = workArea.size();
|
---|
786 |
|
---|
787 | } else {
|
---|
788 | // Not in quoting mode
|
---|
789 |
|
---|
790 | // check for delimiter, and thus end of token
|
---|
791 | int delimLen = getDelimiterMatcher().isMatch(chars, pos, start, len);
|
---|
792 | if (delimLen > 0) {
|
---|
793 | // return condition when end of token found
|
---|
794 | addToken(tokens, workArea.substring(0, trimStart));
|
---|
795 | return pos + delimLen;
|
---|
796 | }
|
---|
797 |
|
---|
798 | // check for quote, and thus back into quoting mode
|
---|
799 | if (quoteLen > 0) {
|
---|
800 | if (isQuote(chars, pos, len, quoteStart, quoteLen)) {
|
---|
801 | quoting = true;
|
---|
802 | pos += quoteLen;
|
---|
803 | continue;
|
---|
804 | }
|
---|
805 | }
|
---|
806 |
|
---|
807 | // check for ignored (outside quotes), and ignore
|
---|
808 | int ignoredLen = getIgnoredMatcher().isMatch(chars, pos, start, len);
|
---|
809 | if (ignoredLen > 0) {
|
---|
810 | pos += ignoredLen;
|
---|
811 | continue;
|
---|
812 | }
|
---|
813 |
|
---|
814 | // check for trimmed character
|
---|
815 | // don't yet know if its at the end, so copy to workArea
|
---|
816 | // use trimStart to keep track of trim at the end
|
---|
817 | int trimmedLen = getTrimmerMatcher().isMatch(chars, pos, start, len);
|
---|
818 | if (trimmedLen > 0) {
|
---|
819 | workArea.append(chars, pos, trimmedLen);
|
---|
820 | pos += trimmedLen;
|
---|
821 | continue;
|
---|
822 | }
|
---|
823 |
|
---|
824 | // copy regular character from outside quotes
|
---|
825 | workArea.append(chars[pos++]);
|
---|
826 | trimStart = workArea.size();
|
---|
827 | }
|
---|
828 | }
|
---|
829 |
|
---|
830 | // return condition when end of string found
|
---|
831 | addToken(tokens, workArea.substring(0, trimStart));
|
---|
832 | return -1;
|
---|
833 | }
|
---|
834 |
|
---|
835 | /**
|
---|
836 | * Checks if the characters at the index specified match the quote
|
---|
837 | * already matched in readNextToken().
|
---|
838 | *
|
---|
839 | * @param chars the character array being tokenized
|
---|
840 | * @param pos the position to check for a quote
|
---|
841 | * @param len the length of the character array being tokenized
|
---|
842 | * @param quoteStart the start position of the matched quote, 0 if no quoting
|
---|
843 | * @param quoteLen the length of the matched quote, 0 if no quoting
|
---|
844 | * @return true if a quote is matched
|
---|
845 | */
|
---|
846 | private boolean isQuote(char[] chars, int pos, int len, int quoteStart, int quoteLen) {
|
---|
847 | for (int i = 0; i < quoteLen; i++) {
|
---|
848 | if ((pos + i) >= len || chars[pos + i] != chars[quoteStart + i]) {
|
---|
849 | return false;
|
---|
850 | }
|
---|
851 | }
|
---|
852 | return true;
|
---|
853 | }
|
---|
854 |
|
---|
855 | // Delimiter
|
---|
856 | //-----------------------------------------------------------------------
|
---|
857 | /**
|
---|
858 | * Gets the field delimiter matcher.
|
---|
859 | *
|
---|
860 | * @return the delimiter matcher in use
|
---|
861 | */
|
---|
862 | public StrMatcher getDelimiterMatcher() {
|
---|
863 | return this.delimMatcher;
|
---|
864 | }
|
---|
865 |
|
---|
866 | /**
|
---|
867 | * Sets the field delimiter matcher.
|
---|
868 | * <p>
|
---|
869 | * The delimitier is used to separate one token from another.
|
---|
870 | *
|
---|
871 | * @param delim the delimiter matcher to use
|
---|
872 | * @return this, to enable chaining
|
---|
873 | */
|
---|
874 | public StrTokenizer setDelimiterMatcher(StrMatcher delim) {
|
---|
875 | if (delim == null) {
|
---|
876 | this.delimMatcher = StrMatcher.noneMatcher();
|
---|
877 | } else {
|
---|
878 | this.delimMatcher = delim;
|
---|
879 | }
|
---|
880 | return this;
|
---|
881 | }
|
---|
882 |
|
---|
883 | /**
|
---|
884 | * Sets the field delimiter character.
|
---|
885 | *
|
---|
886 | * @param delim the delimiter character to use
|
---|
887 | * @return this, to enable chaining
|
---|
888 | */
|
---|
889 | public StrTokenizer setDelimiterChar(char delim) {
|
---|
890 | return setDelimiterMatcher(StrMatcher.charMatcher(delim));
|
---|
891 | }
|
---|
892 |
|
---|
893 | /**
|
---|
894 | * Sets the field delimiter string.
|
---|
895 | *
|
---|
896 | * @param delim the delimiter string to use
|
---|
897 | * @return this, to enable chaining
|
---|
898 | */
|
---|
899 | public StrTokenizer setDelimiterString(String delim) {
|
---|
900 | return setDelimiterMatcher(StrMatcher.stringMatcher(delim));
|
---|
901 | }
|
---|
902 |
|
---|
903 | // Quote
|
---|
904 | //-----------------------------------------------------------------------
|
---|
905 | /**
|
---|
906 | * Gets the quote matcher currently in use.
|
---|
907 | * <p>
|
---|
908 | * The quote character is used to wrap data between the tokens.
|
---|
909 | * This enables delimiters to be entered as data.
|
---|
910 | * The default value is '"' (double quote).
|
---|
911 | *
|
---|
912 | * @return the quote matcher in use
|
---|
913 | */
|
---|
914 | public StrMatcher getQuoteMatcher() {
|
---|
915 | return quoteMatcher;
|
---|
916 | }
|
---|
917 |
|
---|
918 | /**
|
---|
919 | * Set the quote matcher to use.
|
---|
920 | * <p>
|
---|
921 | * The quote character is used to wrap data between the tokens.
|
---|
922 | * This enables delimiters to be entered as data.
|
---|
923 | *
|
---|
924 | * @param quote the quote matcher to use, null ignored
|
---|
925 | * @return this, to enable chaining
|
---|
926 | */
|
---|
927 | public StrTokenizer setQuoteMatcher(StrMatcher quote) {
|
---|
928 | if (quote != null) {
|
---|
929 | this.quoteMatcher = quote;
|
---|
930 | }
|
---|
931 | return this;
|
---|
932 | }
|
---|
933 |
|
---|
934 | /**
|
---|
935 | * Sets the quote character to use.
|
---|
936 | * <p>
|
---|
937 | * The quote character is used to wrap data between the tokens.
|
---|
938 | * This enables delimiters to be entered as data.
|
---|
939 | *
|
---|
940 | * @param quote the quote character to use
|
---|
941 | * @return this, to enable chaining
|
---|
942 | */
|
---|
943 | public StrTokenizer setQuoteChar(char quote) {
|
---|
944 | return setQuoteMatcher(StrMatcher.charMatcher(quote));
|
---|
945 | }
|
---|
946 |
|
---|
947 | // Ignored
|
---|
948 | //-----------------------------------------------------------------------
|
---|
949 | /**
|
---|
950 | * Gets the ignored character matcher.
|
---|
951 | * <p>
|
---|
952 | * These characters are ignored when parsing the String, unless they are
|
---|
953 | * within a quoted region.
|
---|
954 | * The default value is not to ignore anything.
|
---|
955 | *
|
---|
956 | * @return the ignored matcher in use
|
---|
957 | */
|
---|
958 | public StrMatcher getIgnoredMatcher() {
|
---|
959 | return ignoredMatcher;
|
---|
960 | }
|
---|
961 |
|
---|
962 | /**
|
---|
963 | * Set the matcher for characters to ignore.
|
---|
964 | * <p>
|
---|
965 | * These characters are ignored when parsing the String, unless they are
|
---|
966 | * within a quoted region.
|
---|
967 | *
|
---|
968 | * @param ignored the ignored matcher to use, null ignored
|
---|
969 | * @return this, to enable chaining
|
---|
970 | */
|
---|
971 | public StrTokenizer setIgnoredMatcher(StrMatcher ignored) {
|
---|
972 | if (ignored != null) {
|
---|
973 | this.ignoredMatcher = ignored;
|
---|
974 | }
|
---|
975 | return this;
|
---|
976 | }
|
---|
977 |
|
---|
978 | /**
|
---|
979 | * Set the character to ignore.
|
---|
980 | * <p>
|
---|
981 | * This character is ignored when parsing the String, unless it is
|
---|
982 | * within a quoted region.
|
---|
983 | *
|
---|
984 | * @param ignored the ignored character to use
|
---|
985 | * @return this, to enable chaining
|
---|
986 | */
|
---|
987 | public StrTokenizer setIgnoredChar(char ignored) {
|
---|
988 | return setIgnoredMatcher(StrMatcher.charMatcher(ignored));
|
---|
989 | }
|
---|
990 |
|
---|
991 | // Trimmer
|
---|
992 | //-----------------------------------------------------------------------
|
---|
993 | /**
|
---|
994 | * Gets the trimmer character matcher.
|
---|
995 | * <p>
|
---|
996 | * These characters are trimmed off on each side of the delimiter
|
---|
997 | * until the token or quote is found.
|
---|
998 | * The default value is not to trim anything.
|
---|
999 | *
|
---|
1000 | * @return the trimmer matcher in use
|
---|
1001 | */
|
---|
1002 | public StrMatcher getTrimmerMatcher() {
|
---|
1003 | return trimmerMatcher;
|
---|
1004 | }
|
---|
1005 |
|
---|
1006 | /**
|
---|
1007 | * Sets the matcher for characters to trim.
|
---|
1008 | * <p>
|
---|
1009 | * These characters are trimmed off on each side of the delimiter
|
---|
1010 | * until the token or quote is found.
|
---|
1011 | *
|
---|
1012 | * @param trimmer the trimmer matcher to use, null ignored
|
---|
1013 | * @return this, to enable chaining
|
---|
1014 | */
|
---|
1015 | public StrTokenizer setTrimmerMatcher(StrMatcher trimmer) {
|
---|
1016 | if (trimmer != null) {
|
---|
1017 | this.trimmerMatcher = trimmer;
|
---|
1018 | }
|
---|
1019 | return this;
|
---|
1020 | }
|
---|
1021 |
|
---|
1022 | //-----------------------------------------------------------------------
|
---|
1023 | /**
|
---|
1024 | * Gets whether the tokenizer currently returns empty tokens as null.
|
---|
1025 | * The default for this property is false.
|
---|
1026 | *
|
---|
1027 | * @return true if empty tokens are returned as null
|
---|
1028 | */
|
---|
1029 | public boolean isEmptyTokenAsNull() {
|
---|
1030 | return this.emptyAsNull;
|
---|
1031 | }
|
---|
1032 |
|
---|
1033 | /**
|
---|
1034 | * Sets whether the tokenizer should return empty tokens as null.
|
---|
1035 | * The default for this property is false.
|
---|
1036 | *
|
---|
1037 | * @param emptyAsNull whether empty tokens are returned as null
|
---|
1038 | * @return this, to enable chaining
|
---|
1039 | */
|
---|
1040 | public StrTokenizer setEmptyTokenAsNull(boolean emptyAsNull) {
|
---|
1041 | this.emptyAsNull = emptyAsNull;
|
---|
1042 | return this;
|
---|
1043 | }
|
---|
1044 |
|
---|
1045 | //-----------------------------------------------------------------------
|
---|
1046 | /**
|
---|
1047 | * Gets whether the tokenizer currently ignores empty tokens.
|
---|
1048 | * The default for this property is true.
|
---|
1049 | *
|
---|
1050 | * @return true if empty tokens are not returned
|
---|
1051 | */
|
---|
1052 | public boolean isIgnoreEmptyTokens() {
|
---|
1053 | return ignoreEmptyTokens;
|
---|
1054 | }
|
---|
1055 |
|
---|
1056 | /**
|
---|
1057 | * Sets whether the tokenizer should ignore and not return empty tokens.
|
---|
1058 | * The default for this property is true.
|
---|
1059 | *
|
---|
1060 | * @param ignoreEmptyTokens whether empty tokens are not returned
|
---|
1061 | * @return this, to enable chaining
|
---|
1062 | */
|
---|
1063 | public StrTokenizer setIgnoreEmptyTokens(boolean ignoreEmptyTokens) {
|
---|
1064 | this.ignoreEmptyTokens = ignoreEmptyTokens;
|
---|
1065 | return this;
|
---|
1066 | }
|
---|
1067 |
|
---|
1068 | //-----------------------------------------------------------------------
|
---|
1069 | /**
|
---|
1070 | * Gets the String content that the tokenizer is parsing.
|
---|
1071 | *
|
---|
1072 | * @return the string content being parsed
|
---|
1073 | */
|
---|
1074 | public String getContent() {
|
---|
1075 | if (chars == null) {
|
---|
1076 | return null;
|
---|
1077 | }
|
---|
1078 | return new String(chars);
|
---|
1079 | }
|
---|
1080 |
|
---|
1081 | //-----------------------------------------------------------------------
|
---|
1082 | /**
|
---|
1083 | * Creates a new instance of this Tokenizer. The new instance is reset so
|
---|
1084 | * that it will be at the start of the token list.
|
---|
1085 | * If a {@link CloneNotSupportedException} is caught, return <code>null</code>.
|
---|
1086 | *
|
---|
1087 | * @return a new instance of this Tokenizer which has been reset.
|
---|
1088 | */
|
---|
1089 | public Object clone() {
|
---|
1090 | try {
|
---|
1091 | return cloneReset();
|
---|
1092 | } catch (CloneNotSupportedException ex) {
|
---|
1093 | return null;
|
---|
1094 | }
|
---|
1095 | }
|
---|
1096 |
|
---|
1097 | /**
|
---|
1098 | * Creates a new instance of this Tokenizer. The new instance is reset so that
|
---|
1099 | * it will be at the start of the token list.
|
---|
1100 | *
|
---|
1101 | * @return a new instance of this Tokenizer which has been reset.
|
---|
1102 | * @throws CloneNotSupportedException if there is a problem cloning
|
---|
1103 | */
|
---|
1104 | Object cloneReset() throws CloneNotSupportedException {
|
---|
1105 | // this method exists to enable 100% test coverage
|
---|
1106 | StrTokenizer cloned = (StrTokenizer) super.clone();
|
---|
1107 | if (cloned.chars != null) {
|
---|
1108 | cloned.chars = (char[]) cloned.chars.clone();
|
---|
1109 | }
|
---|
1110 | cloned.reset();
|
---|
1111 | return cloned;
|
---|
1112 | }
|
---|
1113 |
|
---|
1114 | //-----------------------------------------------------------------------
|
---|
1115 | /**
|
---|
1116 | * Gets the String content that the tokenizer is parsing.
|
---|
1117 | *
|
---|
1118 | * @return the string content being parsed
|
---|
1119 | */
|
---|
1120 | public String toString() {
|
---|
1121 | if (tokens == null) {
|
---|
1122 | return "StrTokenizer[not tokenized yet]";
|
---|
1123 | }
|
---|
1124 | return "StrTokenizer" + getTokenList();
|
---|
1125 | }
|
---|
1126 |
|
---|
1127 | }
|
---|