View Javadoc

1   package org.tinyjee.maven.dim.utils;
2   
3   import org.codehaus.jackson.JsonFactory;
4   import org.codehaus.jackson.JsonLocation;
5   import org.codehaus.jackson.JsonParser;
6   import org.codehaus.jackson.JsonToken;
7   import org.tinyjee.maven.dim.spi.UrlFetcher;
8   import org.w3c.dom.Document;
9   import org.w3c.dom.Element;
10  import org.w3c.dom.Node;
11  import org.w3c.dom.Text;
12  
13  import javax.xml.parsers.DocumentBuilderFactory;
14  import javax.xml.parsers.ParserConfigurationException;
15  import java.io.IOException;
16  import java.io.Reader;
17  import java.net.URL;
18  import java.util.HashMap;
19  import java.util.Map;
20  import java.util.Stack;
21  import java.util.regex.Pattern;
22  
23  /**
24   * Creates a {@link Document} builder that parses JSON instead of XML and adds user-data into the parsed DOM nodes that covers
25   * line number and column information.
26   * The purpose of this implementation is to apply XPath queries on JSON to drive snippet selection.
27   * <p/>
28   * Translations Rules:<ul>
29   * <li>JSON-Objects translate to XML elements with child nodes. The root element is named "json".</li>
30   * <li>JSON-Fields translate to XML elements with text content or child nodes if the value is an JSON-Object.</li>
31   * <li>JSON-Arrays translate to multiple siblings of XML elements that are assembled as if they were a JSON-Field.</li>
32   * <li>The resulting XML is composed of element and text nodes. Attributes are not created.</li>
33   * </ul>
34   * <p/>
35   * Example - Input:<br/>
36   * <code><pre>
37   * {
38   *     "name" : {
39   *         "first" : "Joe",
40   *         "last" : "Sixpack"
41   *     },
42   *     "gender" : "MALE",
43   *     "verified" : false,
44   *     "flags" : ["123", 123, "xyz", {
45   *         "composite": 987,
46   *         "inner flags": [1,{
47   *             "innermost":"abc",
48   *             "innermost-object" : {
49   *                 "x":1
50   *             }
51   *         },3]
52   *     }],
53   *     "userImage" : "Rm9vYmFyIQ=="
54   * }
55   * </pre></code>
56   * Example - Result:<br/>
57   * <pre><code>
58   * &lt;json&gt;
59   *    &lt;name&gt;
60   *        &lt;first&gt;Joe&lt;/first&gt;
61   *        &lt;last&gt;Sixpack&lt;/last&gt;
62   *    &lt;/name&gt;
63   *    &lt;gender&gt;MALE&lt;/gender&gt;
64   *    &lt;verified&gt;false&lt;/verified&gt;
65   *    &lt;flags&gt;123&lt;/flags&gt;&lt;flags&gt;123&lt;/flags&gt;&lt;flags&gt;xyz&lt;/flags&gt;&lt;flags&gt;
66   *        &lt;composite&gt;987&lt;/composite&gt;
67   *        &lt;inner_flags&gt;1&lt;/inner_flags&gt;&lt;inner_flags&gt;
68   *            &lt;innermost&gt;abc&lt;/innermost&gt;
69   *            &lt;innermost-object&gt;
70   *                &lt;x&gt;1&lt;/x&gt;
71   *            &lt;/innermost-object&gt;
72   *        &lt;/inner_flags&gt;&lt;inner_flags&gt;3&lt;/inner_flags&gt;
73   *    &lt;/flags&gt;
74   *    &lt;userImage&gt;Rm9vYmFyIQ==&lt;/userImage&gt;
75   * &lt;/json&gt;
76   * </code></pre>
77   *
78   * @author Juergen_Kellerer, 2011-10-12
79   * @see PositioningDocumentBuilder PositioningDocumentBuilder
80   */
81  public class PositioningJsonDocumentBuilder extends AbstractPositioningDocumentBuilder {
82  
83  	public static final String ROOT_ELEMENT_TAG_NAME = "json";
84  	private final DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
85  	private final JsonFactory jsonFactory = new JsonFactory();
86  
87  	{
88  		jsonFactory.configure(JsonParser.Feature.AUTO_CLOSE_SOURCE, true);
89  
90  		// Make JSON parsing permissive
91  		jsonFactory.configure(JsonParser.Feature.ALLOW_COMMENTS, true);
92  		jsonFactory.configure(JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS, true);
93  		jsonFactory.configure(JsonParser.Feature.ALLOW_NUMERIC_LEADING_ZEROS, true);
94  		jsonFactory.configure(JsonParser.Feature.ALLOW_SINGLE_QUOTES, true);
95  		jsonFactory.configure(JsonParser.Feature.ALLOW_UNQUOTED_CONTROL_CHARS, true);
96  		jsonFactory.configure(JsonParser.Feature.ALLOW_UNQUOTED_FIELD_NAMES, true);
97  
98  		documentBuilderFactory.setNamespaceAware(false);
99  	}
100 
101 	public JsonFactory getJsonFactory() {
102 		return jsonFactory;
103 	}
104 
105 	@Override
106 	public Document parse(URL systemId) throws IOException, ParserConfigurationException {
107 		return parse(systemId, null);
108 	}
109 
110 	@Override
111 	public Document parse(URL systemId, Reader reader) throws IOException, ParserConfigurationException {
112 		final JsonParser parser = jsonFactory.createJsonParser(reader == null ? UrlFetcher.getReadableSource(systemId) : reader);
113 		final Document document = documentBuilderFactory.newDocumentBuilder().newDocument();
114 		final Element rootElement = document.createElement(ROOT_ELEMENT_TAG_NAME);
115 		document.setDocumentURI(systemId.toString());
116 		document.appendChild(rootElement);
117 		new DocumentTreeBuilder(document, rootElement, parser).buildDocumentTree();
118 
119 		return document;
120 	}
121 
122 	private static class DocumentTreeBuilder {
123 
124 		private static final Pattern NON_TAG_NAME_CHARACTERS = Pattern.compile("[\\s<>&']+");
125 
126 		private final Document document;
127 		private final Element rootElement;
128 		private final JsonParser parser;
129 
130 		private final Stack<Element> elementStack = new Stack<Element>();
131 		private final Stack<Element> arrayStack = new Stack<Element>();
132 		private final Map<String, Element> stackElementOnArrayCreation = new HashMap<String, Element>();
133 
134 		private JsonLocation currentLocation;
135 		private int lineNumber;
136 		private int lastLineNumber;
137 		private String currentName;
138 
139 		private DocumentTreeBuilder(Document document, Element rootElement, JsonParser parser) {
140 			this.document = document;
141 			this.rootElement = rootElement;
142 			this.parser = parser;
143 		}
144 
145 		void buildDocumentTree() throws IOException {
146 
147 			Element previousField = null;
148 			for (JsonToken currentToken = parser.nextToken(); currentToken != null; currentToken = parser.nextToken()) {
149 				currentLocation = parser.getCurrentLocation();
150 				lineNumber = currentLocation.getLineNr();
151 				currentName = parser.getCurrentName();
152 
153 				switch (currentToken) {
154 					case START_OBJECT:
155 						if (previousField != null) {
156 							previousField.getParentNode().removeChild(previousField);
157 							previousField = null;
158 						}
159 						if (!elementStack.isEmpty() &&
160 								elementStack.peek().equals(stackElementOnArrayCreation.get(getArrayStackMappingKey()))) {
161 							elementStack.push(addArrayElement());
162 						} else
163 							elementStack.push(addElement());
164 
165 						break;
166 					case END_OBJECT:
167 						Element element = elementStack.pop();
168 						if (element != null && element.hasChildNodes())
169 							addWhitespaces(element, elementStack.size());
170 
171 						break;
172 					case START_ARRAY:
173 						arrayStack.push(previousField);
174 						recordArrayStackToElementMapping();
175 						previousField = null;
176 						break;
177 					case END_ARRAY:
178 						stackElementOnArrayCreation.remove(getArrayStackMappingKey());
179 						arrayStack.pop();
180 						break;
181 					case FIELD_NAME:
182 						previousField = addElement();
183 						break;
184 					case VALUE_FALSE:
185 					case VALUE_TRUE:
186 					case VALUE_NULL:
187 					case VALUE_NUMBER_FLOAT:
188 					case VALUE_NUMBER_INT:
189 					case VALUE_STRING:
190 						if (previousField != null) {
191 							previousField.setTextContent(parser.getText());
192 							previousField = null;
193 						} else {
194 							Element arrayElement = addArrayElement();
195 							arrayElement.setTextContent(parser.getText());
196 							addWhitespaces(arrayElement.getParentNode(), elementStack.size());
197 						}
198 						break;
199 				}
200 
201 				lastLineNumber = lineNumber;
202 			}
203 		}
204 
205 		private void recordArrayStackToElementMapping() {
206 			stackElementOnArrayCreation.put(getArrayStackMappingKey(), elementStack.peek());
207 		}
208 
209 		private String getArrayStackMappingKey() {
210 			int size = arrayStack.size();
211 			return size + ":" + (size == 0 ? "" : arrayStack.peek().getTagName());
212 		}
213 
214 		private Element addArrayElement() {
215 			Element arrayElement = arrayStack.peek();
216 			String textContent = arrayElement.getTextContent();
217 			if (textContent != null && textContent.length() != 0) {
218 				arrayElement = document.createElement(arrayElement.getTagName());
219 				fillInCurrentLocation(arrayElement);
220 				arrayStack.pop().getParentNode().appendChild(arrayElement);
221 				arrayStack.push(arrayElement);
222 			}
223 
224 			recordArrayStackToElementMapping();
225 
226 			return arrayElement;
227 		}
228 
229 		private Element addElement() {
230 			Node parentNode = elementStack.isEmpty() ? null : elementStack.peek();
231 			String tagName = currentName == null ? "anonymous" : currentName;
232 			tagName = NON_TAG_NAME_CHARACTERS.matcher(tagName).replaceAll("_");
233 			Element element = parentNode == null ? rootElement : document.createElement(tagName);
234 			fillInCurrentLocation(element);
235 
236 			addWhitespaces(parentNode, elementStack.size());
237 			if (parentNode != null) parentNode.appendChild(element);
238 
239 			return element;
240 		}
241 
242 		private void fillInCurrentLocation(Element element) {
243 			element.setUserData(KEY_LINE_NUMBER, lineNumber, null);
244 			element.setUserData(KEY_COLUMN_NUMBER, currentLocation.getColumnNr(), null);
245 		}
246 
247 		private void addWhitespaces(Node parentNode, int indentCount) {
248 			if (parentNode != null) {
249 				int newLines = lineNumber - lastLineNumber;
250 				for (; newLines > 0; newLines--) {
251 					StringBuilder builder = new StringBuilder().append('\n');
252 					for (int j = indentCount; j > 0; j--)
253 						builder.append("    ");
254 					Text textNode = document.createTextNode(builder.toString());
255 					parentNode.appendChild(textNode);
256 				}
257 			}
258 		}
259 	}
260 }