Skip to content

Commit

Permalink
Improve entity parsing, supporting multi-line entity definitions
Browse files Browse the repository at this point in the history
  • Loading branch information
matthiasblaesing committed Nov 3, 2018
1 parent 6a8edb1 commit 85e8b47
Show file tree
Hide file tree
Showing 6 changed files with 237 additions and 108 deletions.
2 changes: 1 addition & 1 deletion ide/schema2beans/nbproject/project.properties
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@
is.autoload=true
cp.extra=\
${ant.core.lib}
javac.source=1.6
javac.source=1.8
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,10 @@ public DocDefParser(GenBeans.Config config, DocDefHandler handler) {
if (schemaIn == null) {
schemaIn = new FileInputStream(filename);
}
EntityParser entityParser = new EntityParser(new InputStreamReader(schemaIn));
entityParser.parse();
EntityParser entityParser = new EntityParser();
try (Reader r = new InputStreamReader(schemaIn)) {
entityParser.parse(r);
}
reader = entityParser.getReader();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@

import java.io.*;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* EntityParser.java - parses the DTD file for entity declarations and creates new Reader
* that replaces the entity references with values
Expand All @@ -29,56 +31,59 @@
* @author mkuchtiak
*/
public class EntityParser {
private java.util.Map entityMap;
private final String text;
public EntityParser(Reader reader) throws IOException {
StringWriter w = new StringWriter();
private static final Pattern ENTITY_PATTERN = Pattern.compile("<!ENTITY\\s+%\\s+(\\S+)\\s+\"([^\"]*)\"\\s*>");
private static final Pattern ENTITY_USE_PATTERN = Pattern.compile("%([\\S;]+);");

private final Map<String,String> entityMap = new HashMap<>();
private String remainingText = "";

public EntityParser() throws IOException {
}

/**
* Parses file for ENTITY declaration, creates map with entities
*/
public void parse(Reader reader) throws IOException {
StringBuilder w = new StringBuilder();
char[] buf = new char[4096];
int read;
while ((read = reader.read(buf)) != -1) {
w.write(buf, 0, read);
w.append(buf, 0, read);
}
this.text = w.toString();
entityMap = new java.util.HashMap();
}
/** Parses file for ENTITY declaration, creates map with entities
*/
public void parse() throws IOException {
BufferedReader br = new BufferedReader(new StringReader(text));
String line = null;
while ((line=br.readLine())!=null) {
int startPos = line.indexOf("<!ENTITY ");
if (startPos>=0) addEntity(br,line.substring(startPos+9));

String originalText = w.toString();

StringBuffer buffer = new StringBuffer(originalText.length());
Matcher entityMatcher = ENTITY_PATTERN.matcher(originalText);
while(entityMatcher.find()) {
addEntity(entityMatcher);
entityMatcher.appendReplacement(buffer, "");
}
entityMatcher.appendTail(buffer);

StringBuffer buffer2 = new StringBuffer(originalText.length());
Matcher entityReplacementMatcher = ENTITY_USE_PATTERN.matcher(buffer);
while(entityReplacementMatcher.find()) {
String entity = entityReplacementMatcher.group(1);
if(entityMap.containsKey(entity)) {
entityReplacementMatcher.appendReplacement(buffer2, entityMap.get(entity));
}
}
br.close();
entityReplacementMatcher.appendTail(buffer2);

remainingText = buffer2.toString();
}

private void addEntity(BufferedReader br, String line) throws IOException {
StringTokenizer tok = new StringTokenizer(line);
if (!tok.hasMoreTokens()) return;
String percentage = tok.nextToken();
if (!"%".equals(percentage)) return; //incorrect ENTITY declaration (missing %)
if (!tok.hasMoreTokens()) return; //incorrect ENTITY declaration (missing entity name)

// cut the first part including entity key
String key = tok.nextToken();
int valueStartPos = line.indexOf(key)+key.length();
String rest = line.substring(valueStartPos);

// looking for starting quotes
valueStartPos = rest.indexOf("\"");
if (valueStartPos<0) return;

// looking for entity value
rest = rest.substring(valueStartPos+1);
String value = resolveValue (rest,br);

// write ENTITY into map
private void addEntity(Matcher m) throws IOException {
String key = m.group(1);
String value = m.group(2);

// write ENTITY into map
if (value!=null) {
int refStart = value.indexOf("%");
int refEnd = value.indexOf(";");
if (refStart>=0 && refEnd>refStart) { //references other entity
String entityKey = value.substring(refStart+1,refEnd);
String entityKey = value.substring(refStart+1,refEnd);
String val = (String)entityMap.get(entityKey);
if (val!=null) {
String newValue = value.substring(0,refStart)+val+value.substring(refEnd+1);
Expand All @@ -92,76 +97,12 @@ private void addEntity(BufferedReader br, String line) throws IOException {
}
}
}

private String resolveValue(String lineRest, BufferedReader br) throws IOException {
// looking for closing quotes
int index = lineRest.indexOf("\"");
if (index>=0) return lineRest.substring(0,index);
// value across multiple lines
StringBuffer buf = new StringBuffer(lineRest);
buf.append("\n");
int ch=br.read();
while ( ch!=(int)'"' && ch!=(int)'>' && ch!=-1 ) {
buf.append((char)ch);
ch=br.read();
}
return buf.toString();
}

private boolean containsBlank(String s) {
for (int i=0;i<s.length();i++) {
if (' '==s.charAt(i)) return true;
}
return false;
}

/** Creates a StringReader that removes all ENTITY declarations
* and replaces entity references with corresponding values
*/
public Reader getReader() throws IOException {
StringBuffer buf = new StringBuffer();
BufferedReader br = new BufferedReader(new StringReader(text));
String line = null;
while ((line=br.readLine())!=null) {
// removing line(s) with entity declaration
if (line.indexOf("<!ENTITY ")>=0) line = removeEntityDeclaration(line,br);
// searches for entity reference and replace it with value
int pos = line.indexOf("%");
if (pos>=0) {
StringTokenizer tok = new StringTokenizer(line.substring(pos),";%");
while (tok.hasMoreTokens()) {
String key = tok.nextToken();
if (key.length()>0 && !containsBlank(key)) {
String value = (String)entityMap.get(key);
if (value!=null) line = line.replaceAll("%"+key+";",value);
}
}
}
if (line.length()>0) buf.append(line);
}
br.close();
return new StringReader(buf.toString());
}

/** Removing line(s) containing ENTITY declaration
*/
private String removeEntityDeclaration(String line,BufferedReader br) throws IOException {
int start = line.indexOf("<!ENTITY ");
StringBuffer buf = new StringBuffer();
if (start>0) buf.append(line.substring(0, start));
int endPos = line.indexOf(">", start);
if (endPos>0) {
buf.append(line.substring(endPos+1));
return buf.toString();
}
String ln=null;
while (endPos<0 && (ln=br.readLine())!=null) {
endPos = ln.indexOf(">");
if (endPos>=0) {
buf.append(ln.substring(endPos+1));
}
}
return buf.toString();
return new StringReader(remainingText);
}

}
57 changes: 57 additions & 0 deletions ide/schema2beans/test/unit/data/TestEntity.dtd
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
<!-- The following DTD is a simple view of a book:
book
index
+
word
#PCDATA
page
#PCDATA
chapter+
comment?
#PCDATA
paragraph*
#PCDATA
summary?
#PCDATA

good
EMPTY
-->

<!ENTITY % yesno
"(yes, no)"
>

<!ELEMENT summary (#PCDATA)>

<!ELEMENT book (summary?, chapter+, index+, available)>

<!ELEMENT available EMPTY>

<!ELEMENT index (word, ref+)>

<!ELEMENT ref (page, line)>

<!ELEMENT word (#PCDATA)>

<!ELEMENT line (#PCDATA)>

<!ELEMENT page (#PCDATA)>

<!ELEMENT chapter (comment?, paragraph*)>

<!ELEMENT paragraph (#PCDATA)>

<!ELEMENT comment (#PCDATA)>

<!ATTLIST book good %yesno; 'yes'>
<!ATTLIST summary length CDATA #REQUIRED>
<!ATTLIST summary lang CDATA #IMPLIED>
<!ATTLIST summary size CDATA #FIXED '12'>
<!ATTLIST chapter title CDATA #IMPLIED>
<!ATTLIST index color CDATA #IMPLIED>
<!ATTLIST index cross-ref %yesno; "no">
<!ATTLIST index glossary CDATA #FIXED "nope">
<!ATTLIST word freq CDATA #IMPLIED>


Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
<!-- The following DTD is a simple view of a book:
book
index
+
word
#PCDATA
page
#PCDATA
chapter+
comment?
#PCDATA
paragraph*
#PCDATA
summary?
#PCDATA

good
EMPTY
-->



<!ELEMENT summary (#PCDATA)>

<!ELEMENT book (summary?, chapter+, index+, available)>

<!ELEMENT available EMPTY>

<!ELEMENT index (word, ref+)>

<!ELEMENT ref (page, line)>

<!ELEMENT word (#PCDATA)>

<!ELEMENT line (#PCDATA)>

<!ELEMENT page (#PCDATA)>

<!ELEMENT chapter (comment?, paragraph*)>

<!ELEMENT paragraph (#PCDATA)>

<!ELEMENT comment (#PCDATA)>

<!ATTLIST book good (yes, no) 'yes'>
<!ATTLIST summary length CDATA #REQUIRED>
<!ATTLIST summary lang CDATA #IMPLIED>
<!ATTLIST summary size CDATA #FIXED '12'>
<!ATTLIST chapter title CDATA #IMPLIED>
<!ATTLIST index color CDATA #IMPLIED>
<!ATTLIST index cross-ref (yes, no) "no">
<!ATTLIST index glossary CDATA #FIXED "nope">
<!ATTLIST word freq CDATA #IMPLIED>


74 changes: 74 additions & 0 deletions ide/schema2beans/test/unit/src/tests/SecondaryTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package tests;

import java.io.*;
import java.lang.reflect.Field;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import org.netbeans.junit.*;

import org.netbeans.modules.schema2beansdev.*;

public class SecondaryTest extends NbTestCase {

public SecondaryTest(java.lang.String testName) {
super(testName);
}

public void testEntityParser() throws Exception {
File schemaFile = new File(getDataDir(), "TestEntity.dtd");
try (InputStream dtdIn = new FileInputStream(schemaFile);
Reader reader = new InputStreamReader(dtdIn, StandardCharsets.ISO_8859_1);
InputStream goldenStream = new FileInputStream(getGoldenFile());
Reader goldenReader = new InputStreamReader(goldenStream, StandardCharsets.ISO_8859_1)) {
EntityParser ep = new EntityParser();

ep.parse(reader);

Field entityMapField = EntityParser.class.getDeclaredField("entityMap");
entityMapField.setAccessible(true);

Map entityMap = (Map) entityMapField.get(ep);

assertEquals(1, entityMap.size());

StringBuilder testOutput = new StringBuilder();

try (Reader r = ep.getReader()) {
int read = 0;
char[] buffer = new char[4096];
while ( (read = r.read(buffer)) >= 0) {
testOutput.append(buffer, 0, read);
}
}

StringBuilder reference = new StringBuilder();
{
int read = 0;
char[] buffer = new char[4096];
while ( (read = goldenReader.read(buffer)) >= 0) {
reference.append(buffer, 0, read);
}
}

assertEquals(reference.toString(), testOutput.toString());
}
}
}

0 comments on commit 85e8b47

Please sign in to comment.