View Javadoc

1   package org.tinyjee.maven.dim.utils;
2   
3   import org.tinyjee.maven.dim.spi.UrlFetcher;
4   import org.w3c.dom.Document;
5   import org.w3c.dom.NamedNodeMap;
6   import org.w3c.dom.Node;
7   import org.w3c.dom.NodeList;
8   import org.xml.sax.*;
9   import org.xml.sax.ext.Attributes2Impl;
10  import org.xml.sax.helpers.XMLFilterImpl;
11  
12  import javax.xml.parsers.ParserConfigurationException;
13  import javax.xml.parsers.SAXParserFactory;
14  import javax.xml.transform.Transformer;
15  import javax.xml.transform.TransformerException;
16  import javax.xml.transform.TransformerFactory;
17  import javax.xml.transform.dom.DOMResult;
18  import javax.xml.transform.sax.SAXSource;
19  import java.io.IOException;
20  import java.io.Reader;
21  import java.lang.reflect.Field;
22  import java.net.URL;
23  
24  import static java.util.Arrays.asList;
25  
26  /**
27   * Creates a {@link Document} builder that adds user-data into the parsed DOM nodes that covers line number and column information.
28   * The purpose of this implementation is to get positions of selected elements in order to allow snippet selection via XPath.
29   * <p/>
30   * This implementation is roughly based on the examples found at:<ul>
31   * <li>http://stackoverflow.com/questions/2798376/is-there-a-way-to-parse-xml-via-sax-dom-with-line-numbers-available-per-node</li>
32   * <li>http://stackoverflow.com/questions/4915422/get-line-number-from-xml-node-java</li>
33   * </ul>
34   *
35   * @author Juergen_Kellerer, 2011-10-12
36   * @see PositioningJsonDocumentBuilder PositioningJsonDocumentBuilder
37   */
38  public class PositioningDocumentBuilder extends AbstractPositioningDocumentBuilder {
39  
40  	private final SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
41  	private final TransformerFactory transformerFactory = TransformerFactory.newInstance();
42  
43  	/**
44  	 * Creates a new document builder that adds line and column number to the user data sections of resulting element nodes.
45  	 *
46  	 * @param namespaceAware whether XML processing is namespace aware.
47  	 */
48  	public PositioningDocumentBuilder(boolean namespaceAware) {
49  		saxParserFactory.setNamespaceAware(namespaceAware);
50  	}
51  
52  	public SAXParserFactory getSaxParserFactory() {
53  		return saxParserFactory;
54  	}
55  
56  	public TransformerFactory getTransformerFactory() {
57  		return transformerFactory;
58  	}
59  
60  	/**
61  	 * Parses the given XML document.
62  	 *
63  	 * @param systemId the URL of the document to parse.
64  	 * @return a parsed document with line and column numbers being included for elements.
65  	 * @throws SAXException                 If SAX parses cannot be created.
66  	 * @throws IOException                  If reading fails.
67  	 * @throws TransformerException         If XML transformation is unavailable.
68  	 * @throws ParserConfigurationException If parsers are not configured correctly.
69  	 */
70  	@Override
71  	public Document parse(URL systemId) throws SAXException, IOException, TransformerException, ParserConfigurationException {
72  		return parse(systemId, null);
73  	}
74  
75  	@Override
76  	public Document parse(URL systemIdUrl, Reader reader)
77  			throws SAXException, IOException, TransformerException, ParserConfigurationException {
78  		final String systemId = systemIdUrl.toString();
79  		final boolean namespaceAware = saxParserFactory.isNamespaceAware();
80  
81  		XMLReader xmlReader = saxParserFactory.newSAXParser().getXMLReader();
82  		xmlReader.setEntityResolver(new XhtmlEntityResolver());
83  
84  		InputSource inputSource = reader == null ? new InputSource(UrlFetcher.getSource(systemIdUrl, false)) : new InputSource(reader);
85  		inputSource.setSystemId(systemId);
86  		SAXSource saxSource = new SAXSource(new LocationXmlFilter(xmlReader, namespaceAware), inputSource);
87  
88  		Transformer transformer = transformerFactory.newTransformer();
89  		DOMResult domResult = new DOMResult(null, systemId);
90  		transformer.transform(saxSource, domResult);
91  
92  		Document document = (Document) domResult.getNode();
93  		postProcessDocument(document, namespaceAware);
94  
95  		return document;
96  	}
97  
98  	private static void postProcessDocument(Document document, boolean namespaceAware) {
99  		NodeList allElements = namespaceAware ? document.getElementsByTagNameNS("*", "*") : document.getElementsByTagName("*");
100 		for (int i = 0, length = allElements.getLength(); i < length; i++) {
101 			final Node element = allElements.item(i);
102 			final NamedNodeMap attributes = element.getAttributes();
103 			if (attributes != null) {
104 				Node node;
105 				if (namespaceAware)
106 					node = attributes.removeNamedItemNS(LocationXmlFilter.LOCATION_NS, LocationXmlFilter.LOCATION_ATTRIBUTE_LOCAL);
107 				else
108 					node = attributes.removeNamedItem(LocationXmlFilter.LOCATION_ATTRIBUTE_LOCAL);
109 
110 				if (node != null) {
111 					String location = node.getNodeValue();
112 					String[] locationParts = location == null ? null : location.split(":");
113 					if (locationParts != null && locationParts.length > 1) {
114 						element.setUserData(KEY_LINE_NUMBER, Integer.parseInt(locationParts[0]), null);
115 						element.setUserData(KEY_COLUMN_NUMBER, Integer.parseInt(locationParts[1]), null);
116 					}
117 				}
118 			}
119 		}
120 	}
121 
122 	private static class LocationXmlFilter extends XMLFilterImpl {
123 
124 		static final String LOCATION_NS = LocationXmlFilter.class.getName();
125 		static final String LOCATION_ATTRIBUTE_QUALIFIED = "XdimlocX:dim-element-location";
126 		static final String LOCATION_ATTRIBUTE_LOCAL = "dim-element-location";
127 		static final String CDATA = "CDATA";
128 
129 		private final boolean namespaceAware;
130 
131 		LocationXmlFilter(XMLReader xmlReader, boolean namespaceAware) {
132 			super(xmlReader);
133 			this.namespaceAware = namespaceAware;
134 		}
135 
136 		private Locator locator;
137 
138 		@Override
139 		public void setDocumentLocator(Locator locator) {
140 			super.setDocumentLocator(locator);
141 			this.locator = locator;
142 		}
143 
144 		@Override
145 		public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
146 			// Add extra attribute to elements to hold location
147 			String location = locator.getLineNumber() + ":" + locator.getColumnNumber();
148 			Attributes2Impl modifiedAttributes = new JRE5SafeAttributes2Impl(attributes);
149 			if (namespaceAware)
150 				modifiedAttributes.addAttribute(LOCATION_NS, LOCATION_ATTRIBUTE_LOCAL, LOCATION_ATTRIBUTE_QUALIFIED, CDATA, location);
151 			else
152 				modifiedAttributes.addAttribute("", "", LOCATION_ATTRIBUTE_LOCAL, CDATA, location);
153 
154 			super.startElement(uri, localName, qName, modifiedAttributes);
155 		}
156 
157 
158 		private static class JRE5SafeAttributes2Impl extends Attributes2Impl {
159 
160 			private static final Field declaredField, specifiedField;
161 
162 			static {
163 				try {
164 					declaredField = Attributes2Impl.class.getDeclaredField("declared");
165 					specifiedField = Attributes2Impl.class.getDeclaredField("specified");
166 					for (Field field : asList(declaredField, specifiedField)) field.setAccessible(true);
167 				} catch (NoSuchFieldException e) {
168 					throw new RuntimeException(e);
169 				}
170 			}
171 
172 			JRE5SafeAttributes2Impl(Attributes attributes) {
173 				super(attributes);
174 			}
175 
176 			@Override
177 			public void addAttribute(String uri, String localName, String qName, String type, String value) {
178 				try {
179 					super.addAttribute(uri, localName, qName, type, value);
180 				} catch (Exception ignored) {
181 					// In JRE 1.5 this method is completely broken and has to be patched with the implementation found in JDK 1.6
182 					try {
183 						boolean[] specified = (boolean[]) specifiedField.get(this), declared = (boolean[]) declaredField.get(this);
184 						int length = getLength();
185 						if (specified == null) {
186 							specified = new boolean[length];
187 							declared = new boolean[length];
188 						} else if (length > specified.length) {
189 							boolean[] flags;
190 
191 							flags = new boolean[length];
192 							System.arraycopy(declared, 0, flags, 0, declared.length);
193 							declared = flags;
194 
195 							flags = new boolean[length];
196 							System.arraycopy(specified, 0, flags, 0, specified.length);
197 							specified = flags;
198 						}
199 
200 						specified[length - 1] = true;
201 						declared[length - 1] = !"CDATA".equals(type);
202 
203 						specifiedField.set(this, specified);
204 						declaredField.set(this, declared);
205 					} catch (IllegalAccessException e) {
206 						throw new RuntimeException(e); //NOSONAR - The original exception 'ignored' is ignored as this
207 						// is a workaround to a bug in JDK 1.5 that does always happen and should not produce any output.
208 						// It is still not very secure as we may end up catching something un-expected.
209 					}
210 				}
211 			}
212 		}
213 	}
214 }