1 | package genius.core.xml;
2 | /*
3 | * @(#)SimpleDOMParser.java
4 | */
5 |
6 | import java.io.IOException;
7 | import java.io.Reader;
8 | import java.util.Stack;
9 |
10 | /**
11 | * <code>SimpleDOMParser</code> is a highly-simplified XML DOM parser.
12 | */
13 | public class SimpleDOMParser {
14 | private static final int[] cdata_start = { '<', '!', '[', 'C', 'D', 'A',
15 | 'T', 'A', '[' };
16 | private static final int[] cdata_end = { ']', ']', '>' };
17 |
18 | private Reader reader;
19 | private Stack<SimpleElement> elements;
20 | private SimpleElement currentElement;
21 |
22 | public SimpleDOMParser() {
23 | elements = new Stack<SimpleElement>();
24 | currentElement = null;
25 | }
26 |
27 | public SimpleElement parse(Reader reader) throws IOException {
28 | this.reader = reader;
29 |
30 | // skip xml declaration or DocTypes
31 | skipPrologs();
32 |
33 | while (true) {
34 | int index;
35 | String tagName;
36 |
37 | // remove the prepend or trailing white spaces
38 | String currentTag = readTag().trim();
39 |
40 | if (currentTag.startsWith("</")) {
41 | // close tag
42 | tagName = currentTag.substring(2, currentTag.length() - 1);
43 |
44 | // no open tag
45 | if (currentElement == null) {
46 | throw new IOException("Got close tag '" + tagName
47 | + "' without open tag.");
48 | }
49 |
50 | // close tag does not match with open tag
51 | if (!tagName.equals(currentElement.getTagName())) {
52 | throw new IOException("Expected close tag for '"
53 | + currentElement.getTagName() + "' but got '"
54 | + tagName + "'.");
55 | }
56 |
57 | if (elements.empty()) {
58 | // document processing is over
59 | return currentElement;
60 | } else {
61 | // pop up the previous open tag
62 | currentElement = elements.pop();
63 | }
64 | } else {
65 | // open tag or tag with both open and close tags
66 | index = currentTag.indexOf(" ");
67 | if (index < 0) {
68 | // tag with no attributes
69 | if (currentTag.endsWith("/>")) {
70 | // close tag as well
71 | tagName = currentTag.substring(1,
72 | currentTag.length() - 2);
73 | currentTag = "/>";
74 | } else {
75 | // open tag
76 | tagName = currentTag.substring(1,
77 | currentTag.length() - 1);
78 | currentTag = "";
79 | }
80 | } else {
81 | // tag with attributes
82 | tagName = currentTag.substring(1, index);
83 | currentTag = currentTag.substring(index + 1);
84 | }
85 |
86 | // createFrom new element
87 | SimpleElement element = new SimpleElement(tagName);
88 |
89 | // parse the attributes
90 | boolean isTagClosed = false;
91 | while (currentTag.length() > 0) {
92 | // remove the prepend or trailing white spaces
93 | currentTag = currentTag.trim();
94 |
95 | if (currentTag.equals("/>")) {
96 | // close tag
97 | isTagClosed = true;
98 | break;
99 | } else if (currentTag.equals(">")) {
100 | // open tag
101 | break;
102 | }
103 |
104 | index = currentTag.indexOf("=");
105 | if (index < 0) {
106 | throw new IOException(
107 | "Invalid attribute for tag '" + tagName + "'.");
108 | }
109 |
110 | // get attribute name
111 | String attributeName = currentTag.substring(0, index);
112 | currentTag = currentTag.substring(index + 1);
113 |
114 | // get attribute value
115 | String attributeValue;
116 | boolean isQuoted = true;
117 | if (currentTag.startsWith("\"")) {
118 | index = currentTag.indexOf('"', 1);
119 | } else if (currentTag.startsWith("'")) {
120 | index = currentTag.indexOf('\'', 1);
121 | } else {
122 | isQuoted = false;
123 | index = currentTag.indexOf(' ');
124 | if (index < 0) {
125 | index = currentTag.indexOf('>');
126 | if (index < 0) {
127 | index = currentTag.indexOf('/');
128 | }
129 | }
130 | }
131 |
132 | if (index < 0) {
133 | throw new IOException(
134 | "Invalid attribute for tag '" + tagName + "'.");
135 | }
136 |
137 | if (isQuoted) {
138 | attributeValue = currentTag.substring(1, index);
139 | } else {
140 | attributeValue = currentTag.substring(0, index);
141 | }
142 |
143 | // add attribute to the new element
144 | element.setAttribute(attributeName, attributeValue);
145 |
146 | currentTag = currentTag.substring(index + 1);
147 | }
148 |
149 | // read the text between the open and close tag
150 | if (!isTagClosed) {
151 | element.setText(readText());
152 | }
153 |
154 | // add new element as a child element of
155 | // the current element
156 | if (currentElement != null) {
157 | currentElement.addChildElement(element);
158 | }
159 |
160 | if (!isTagClosed) {
161 | if (currentElement != null) {
162 | elements.push(currentElement);
163 | }
164 |
165 | currentElement = element;
166 | } else if (currentElement == null) {
167 | // only has one tag in the document
168 | return element;
169 | }
170 | }
171 | }
172 | }
173 |
174 | private int peek() throws IOException {
175 | reader.mark(1);
176 | int result = reader.read();
177 | reader.reset();
178 |
179 | return result;
180 | }
181 |
182 | private void peek(int[] buffer) throws IOException {
183 | reader.mark(buffer.length);
184 | for (int i = 0; i < buffer.length; i++) {
185 | buffer[i] = reader.read();
186 | }
187 | reader.reset();
188 | }
189 |
190 | private void skipWhitespace() throws IOException {
191 | while (Character.isWhitespace((char) peek())) {
192 | reader.read();
193 | }
194 | }
195 |
196 | private void skipProlog() throws IOException {
197 | // skip "<?" or "<!"
198 | reader.skip(2);
199 |
200 | while (true) {
201 | int next = peek();
202 |
203 | if (next == '>') {
204 | reader.read();
205 | break;
206 | } else if (next == '<') {
207 | // nesting prolog
208 | skipProlog();
209 | } else {
210 | reader.read();
211 | }
212 | }
213 | }
214 |
215 | private void skipPrologs() throws IOException {
216 | while (true) {
217 | skipWhitespace();
218 |
219 | int[] next = new int[2];
220 | peek(next);
221 |
222 | if (next[0] != '<') {
223 | throw new IOException(
224 | "Expected '<' but got '" + (char) next[0] + "'.");
225 | }
226 |
227 | if ((next[1] == '?') || (next[1] == '!')) {
228 | skipProlog();
229 | } else {
230 | break;
231 | }
232 | }
233 | }
234 |
235 | private String readTag() throws IOException {
236 | skipWhitespace();
237 |
238 | StringBuffer sb = new StringBuffer();
239 |
240 | int next = peek();
241 | if (next != '<') {
242 | throw new IOException("Expected < but got " + (char) next);
243 | }
244 |
245 | sb.append((char) reader.read());
246 | while (peek() != '>') {
247 | sb.append((char) reader.read());
248 | }
249 | sb.append((char) reader.read());
250 |
251 | return sb.toString();
252 | }
253 |
254 | private String readText() throws IOException {
255 | StringBuffer sb = new StringBuffer();
256 |
257 | int[] next = new int[cdata_start.length];
258 | peek(next);
259 | if (compareIntArrays(next, cdata_start) == true) {
260 | reader.skip(next.length);
261 |
262 | int[] buffer = new int[cdata_end.length];
263 | while (true) {
264 | peek(buffer);
265 |
266 | if (compareIntArrays(buffer, cdata_end) == true) {
267 | reader.skip(buffer.length);
268 | break;
269 | } else {
270 | sb.append((char) reader.read());
271 | }
272 | }
273 | } else {
274 | while (peek() != '<') {
275 | sb.append((char) reader.read());
276 | }
277 | }
278 |
279 | return sb.toString();
280 | }
281 |
282 | private boolean compareIntArrays(int[] a1, int[] a2) {
283 | if (a1.length != a2.length) {
284 | return false;
285 | }
286 |
287 | for (int i = 0; i < a1.length; i++) {
288 | if (a1[i] != a2[i]) {
289 | return false;
290 | }
291 | }
292 |
293 | return true;
294 | }
295 | }