source: src/main/java/genius/core/xml/SimpleDOMParser.java

Last change on this file was 127, checked in by Wouter Pasman, 6 years ago

#41 ROLL BACK of rev.126 . So this version is equal to rev. 125

File size: 6.8 KB
Line 
1package genius.core.xml;
2/*
3 * @(#)SimpleDOMParser.java
4 */
5
6import java.io.IOException;
7import java.io.Reader;
8import java.util.Stack;
9
10/**
11 * <code>SimpleDOMParser</code> is a highly-simplified XML DOM parser.
12 */
13public class SimpleDOMParser {
14 private static final int[] cdata_start = { '<', '!', '[', 'C', 'D', 'A',
15 'T', 'A', '[' };
16 private static final int[] cdata_end = { ']', ']', '>' };
17
18 private Reader reader;
19 private Stack<SimpleElement> elements;
20 private SimpleElement currentElement;
21
22 public SimpleDOMParser() {
23 elements = new Stack<SimpleElement>();
24 currentElement = null;
25 }
26
27 public SimpleElement parse(Reader reader) throws IOException {
28 this.reader = reader;
29
30 // skip xml declaration or DocTypes
31 skipPrologs();
32
33 while (true) {
34 int index;
35 String tagName;
36
37 // remove the prepend or trailing white spaces
38 String currentTag = readTag().trim();
39
40 if (currentTag.startsWith("</")) {
41 // close tag
42 tagName = currentTag.substring(2, currentTag.length() - 1);
43
44 // no open tag
45 if (currentElement == null) {
46 throw new IOException("Got close tag '" + tagName
47 + "' without open tag.");
48 }
49
50 // close tag does not match with open tag
51 if (!tagName.equals(currentElement.getTagName())) {
52 throw new IOException("Expected close tag for '"
53 + currentElement.getTagName() + "' but got '"
54 + tagName + "'.");
55 }
56
57 if (elements.empty()) {
58 // document processing is over
59 return currentElement;
60 } else {
61 // pop up the previous open tag
62 currentElement = elements.pop();
63 }
64 } else {
65 // open tag or tag with both open and close tags
66 index = currentTag.indexOf(" ");
67 if (index < 0) {
68 // tag with no attributes
69 if (currentTag.endsWith("/>")) {
70 // close tag as well
71 tagName = currentTag.substring(1,
72 currentTag.length() - 2);
73 currentTag = "/>";
74 } else {
75 // open tag
76 tagName = currentTag.substring(1,
77 currentTag.length() - 1);
78 currentTag = "";
79 }
80 } else {
81 // tag with attributes
82 tagName = currentTag.substring(1, index);
83 currentTag = currentTag.substring(index + 1);
84 }
85
86 // createFrom new element
87 SimpleElement element = new SimpleElement(tagName);
88
89 // parse the attributes
90 boolean isTagClosed = false;
91 while (currentTag.length() > 0) {
92 // remove the prepend or trailing white spaces
93 currentTag = currentTag.trim();
94
95 if (currentTag.equals("/>")) {
96 // close tag
97 isTagClosed = true;
98 break;
99 } else if (currentTag.equals(">")) {
100 // open tag
101 break;
102 }
103
104 index = currentTag.indexOf("=");
105 if (index < 0) {
106 throw new IOException(
107 "Invalid attribute for tag '" + tagName + "'.");
108 }
109
110 // get attribute name
111 String attributeName = currentTag.substring(0, index);
112 currentTag = currentTag.substring(index + 1);
113
114 // get attribute value
115 String attributeValue;
116 boolean isQuoted = true;
117 if (currentTag.startsWith("\"")) {
118 index = currentTag.indexOf('"', 1);
119 } else if (currentTag.startsWith("'")) {
120 index = currentTag.indexOf('\'', 1);
121 } else {
122 isQuoted = false;
123 index = currentTag.indexOf(' ');
124 if (index < 0) {
125 index = currentTag.indexOf('>');
126 if (index < 0) {
127 index = currentTag.indexOf('/');
128 }
129 }
130 }
131
132 if (index < 0) {
133 throw new IOException(
134 "Invalid attribute for tag '" + tagName + "'.");
135 }
136
137 if (isQuoted) {
138 attributeValue = currentTag.substring(1, index);
139 } else {
140 attributeValue = currentTag.substring(0, index);
141 }
142
143 // add attribute to the new element
144 element.setAttribute(attributeName, attributeValue);
145
146 currentTag = currentTag.substring(index + 1);
147 }
148
149 // read the text between the open and close tag
150 if (!isTagClosed) {
151 element.setText(readText());
152 }
153
154 // add new element as a child element of
155 // the current element
156 if (currentElement != null) {
157 currentElement.addChildElement(element);
158 }
159
160 if (!isTagClosed) {
161 if (currentElement != null) {
162 elements.push(currentElement);
163 }
164
165 currentElement = element;
166 } else if (currentElement == null) {
167 // only has one tag in the document
168 return element;
169 }
170 }
171 }
172 }
173
174 private int peek() throws IOException {
175 reader.mark(1);
176 int result = reader.read();
177 reader.reset();
178
179 return result;
180 }
181
182 private void peek(int[] buffer) throws IOException {
183 reader.mark(buffer.length);
184 for (int i = 0; i < buffer.length; i++) {
185 buffer[i] = reader.read();
186 }
187 reader.reset();
188 }
189
190 private void skipWhitespace() throws IOException {
191 while (Character.isWhitespace((char) peek())) {
192 reader.read();
193 }
194 }
195
196 private void skipProlog() throws IOException {
197 // skip "<?" or "<!"
198 reader.skip(2);
199
200 while (true) {
201 int next = peek();
202
203 if (next == '>') {
204 reader.read();
205 break;
206 } else if (next == '<') {
207 // nesting prolog
208 skipProlog();
209 } else {
210 reader.read();
211 }
212 }
213 }
214
215 private void skipPrologs() throws IOException {
216 while (true) {
217 skipWhitespace();
218
219 int[] next = new int[2];
220 peek(next);
221
222 if (next[0] != '<') {
223 throw new IOException(
224 "Expected '<' but got '" + (char) next[0] + "'.");
225 }
226
227 if ((next[1] == '?') || (next[1] == '!')) {
228 skipProlog();
229 } else {
230 break;
231 }
232 }
233 }
234
235 private String readTag() throws IOException {
236 skipWhitespace();
237
238 StringBuffer sb = new StringBuffer();
239
240 int next = peek();
241 if (next != '<') {
242 throw new IOException("Expected < but got " + (char) next);
243 }
244
245 sb.append((char) reader.read());
246 while (peek() != '>') {
247 sb.append((char) reader.read());
248 }
249 sb.append((char) reader.read());
250
251 return sb.toString();
252 }
253
254 private String readText() throws IOException {
255 StringBuffer sb = new StringBuffer();
256
257 int[] next = new int[cdata_start.length];
258 peek(next);
259 if (compareIntArrays(next, cdata_start) == true) {
260 reader.skip(next.length);
261
262 int[] buffer = new int[cdata_end.length];
263 while (true) {
264 peek(buffer);
265
266 if (compareIntArrays(buffer, cdata_end) == true) {
267 reader.skip(buffer.length);
268 break;
269 } else {
270 sb.append((char) reader.read());
271 }
272 }
273 } else {
274 while (peek() != '<') {
275 sb.append((char) reader.read());
276 }
277 }
278
279 return sb.toString();
280 }
281
282 private boolean compareIntArrays(int[] a1, int[] a2) {
283 if (a1.length != a2.length) {
284 return false;
285 }
286
287 for (int i = 0; i < a1.length; i++) {
288 if (a1[i] != a2[i]) {
289 return false;
290 }
291 }
292
293 return true;
294 }
295}
Note: See TracBrowser for help on using the repository browser.