Skip to content

Commit

Permalink
Merge pull request #169 from pjfanning/namespaces
Browse files Browse the repository at this point in the history
use namespace aware parsing/xpath resolution
  • Loading branch information
monitorjbl authored Dec 6, 2018
2 parents 76427e0 + a946ecb commit f52ae88
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 14 deletions.
4 changes: 2 additions & 2 deletions src/main/java/com/monitorjbl/xlsx/StreamingReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ public StreamingReader read(File f) {
}

StylesTable styles = reader.getStylesTable();
NodeList workbookPr = searchForNodeList(document(reader.getWorkbookData()), "/workbook/workbookPr");
NodeList workbookPr = searchForNodeList(document(reader.getWorkbookData()), "/ss:workbook/ss:workbookPr");
if (workbookPr.getLength() == 1) {
final Node date1904 = workbookPr.item(0).getAttributes().getNamedItem("date1904");
if (date1904 != null) {
Expand Down Expand Up @@ -369,7 +369,7 @@ private InputStream findSheet(XSSFReader reader) throws IOException, InvalidForm
if(sheetName != null) {
index = -1;
//This file is separate from the worksheet data, and should be fairly small
NodeList nl = searchForNodeList(document(reader.getWorkbookData()), "/workbook/sheets/sheet");
NodeList nl = searchForNodeList(document(reader.getWorkbookData()), "/ss:workbook/ss:sheets/ss:sheet");
for(int i = 0; i < nl.getLength(); i++) {
if(Objects.equals(nl.item(i).getAttributes().getNamedItem("name").getTextContent(), sheetName)) {
index = i;
Expand Down
60 changes: 56 additions & 4 deletions src/main/java/com/monitorjbl/xlsx/XmlUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,29 @@
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

import javax.xml.XMLConstants;
import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import java.io.IOException;
import java.io.InputStream;
import java.util.*;

public class XmlUtils {
public static Document document(InputStream is) {
try {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();

factory.setNamespaceAware(true);
factory.setValidating(false);
factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
factory.setExpandEntityReferences(false);
factory.setXIncludeAware(false);

return factory.newDocumentBuilder().parse(is);
} catch(SAXException | IOException | ParserConfigurationException e) {
throw new ParseException(e);
Expand All @@ -32,11 +36,59 @@ public static Document document(InputStream is) {

public static NodeList searchForNodeList(Document document, String xpath) {
try {
return (NodeList) XPathFactory.newInstance().newXPath().compile(xpath)
.evaluate(document, XPathConstants.NODESET);
XPath xp = XPathFactory.newInstance().newXPath();
NamespaceContextImpl nc = new NamespaceContextImpl();
nc.addNamespace("ss", "http://schemas.openxmlformats.org/spreadsheetml/2006/main");
xp.setNamespaceContext(nc);
return (NodeList)xp.compile(xpath)
.evaluate(document, XPathConstants.NODESET);
} catch(XPathExpressionException e) {
throw new ParseException(e);
}
}

private static class NamespaceContextImpl implements NamespaceContext {
private Map<String, String> urisByPrefix = new HashMap<String, String>();

private Map<String, Set> prefixesByURI = new HashMap<String, Set>();

public NamespaceContextImpl() {
addNamespace(XMLConstants.XML_NS_PREFIX, XMLConstants.XML_NS_URI);
addNamespace(XMLConstants.XMLNS_ATTRIBUTE, XMLConstants.XMLNS_ATTRIBUTE_NS_URI);
}

public void addNamespace(String prefix, String namespaceURI) {
urisByPrefix.put(prefix, namespaceURI);
if (prefixesByURI.containsKey(namespaceURI)) {
(prefixesByURI.get(namespaceURI)).add(prefix);
} else {
Set<String> set = new HashSet<String>();
set.add(prefix);
prefixesByURI.put(namespaceURI, set);
}
}

public String getNamespaceURI(String prefix) {
if (prefix == null)
throw new IllegalArgumentException("prefix cannot be null");
if (urisByPrefix.containsKey(prefix))
return (String) urisByPrefix.get(prefix);
else
return XMLConstants.NULL_NS_URI;
}

public String getPrefix(String namespaceURI) {
return (String) getPrefixes(namespaceURI).next();
}

public Iterator getPrefixes(String namespaceURI) {
if (namespaceURI == null)
throw new IllegalArgumentException("namespaceURI cannot be null");
if (prefixesByURI.containsKey(namespaceURI)) {
return ((Set) prefixesByURI.get(namespaceURI)).iterator();
} else {
return Collections.EMPTY_SET.iterator();
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;

import javax.xml.namespace.QName;
import javax.xml.stream.XMLEventReader;
Expand Down Expand Up @@ -72,7 +71,7 @@ private boolean getRow() {
}
rowCacheIterator = rowCache.iterator();
return rowCacheIterator.hasNext();
} catch(XMLStreamException | SAXException e) {
} catch(XMLStreamException e) {
throw new ParseException("Error reading XML stream", e);
}
}
Expand Down Expand Up @@ -100,9 +99,8 @@ private String[] splitCellRef(String ref) {
* Handles a SAX event.
*
* @param event
* @throws SAXException
*/
private void handleEvent(XMLEvent event) throws SAXException {
private void handleEvent(XMLEvent event) {
if(event.getEventType() == XMLStreamConstants.CHARACTERS) {
Characters c = event.asCharacters();
lastContents += c.getData();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ public void init(File f) {
}

StylesTable styles = reader.getStylesTable();
NodeList workbookPr = searchForNodeList(document(reader.getWorkbookData()), "/workbook/workbookPr");
NodeList workbookPr = searchForNodeList(document(reader.getWorkbookData()), "/ss:workbook/ss:workbookPr");
if(workbookPr.getLength() == 1) {
final Node date1904 = workbookPr.item(0).getAttributes().getNamedItem("date1904");
if(date1904 != null) {
Expand All @@ -141,8 +141,8 @@ public void init(File f) {
}
}

void loadSheets(XSSFReader reader, SharedStringsTable sst, StylesTable stylesTable, int rowCacheSize) throws IOException, InvalidFormatException,
XMLStreamException {
void loadSheets(XSSFReader reader, SharedStringsTable sst, StylesTable stylesTable, int rowCacheSize)
throws IOException, InvalidFormatException, XMLStreamException {
lookupSheetNames(reader);

//Some workbooks have multiple references to the same sheet. Need to filter
Expand All @@ -165,7 +165,7 @@ void loadSheets(XSSFReader reader, SharedStringsTable sst, StylesTable stylesTab

void lookupSheetNames(XSSFReader reader) throws IOException, InvalidFormatException {
sheetProperties.clear();
NodeList nl = searchForNodeList(document(reader.getWorkbookData()), "/workbook/sheets/sheet");
NodeList nl = searchForNodeList(document(reader.getWorkbookData()), "/ss:workbook/ss:sheets/ss:sheet");
for(int i = 0; i < nl.getLength(); i++) {
Map<String, String> props = new HashMap<>();
props.put("name", nl.item(i).getAttributes().getNamedItem("name").getTextContent());
Expand Down

0 comments on commit f52ae88

Please sign in to comment.