Improve entity parsing, supporting multi-line entity definitions

wqskhn · Nov 3, 2018 · 85e8b47 · 85e8b47
1 parent 6a8edb1
commit 85e8b47
Show file tree

Hide file tree

Showing 6 changed files with 237 additions and 108 deletions.
diff --git a/ide/schema2beans/nbproject/project.properties b/ide/schema2beans/nbproject/project.properties
@@ -18,4 +18,4 @@
 is.autoload=true
 cp.extra=\
     ${ant.core.lib}
-javac.source=1.6
+javac.source=1.8
diff --git a/ide/schema2beans/src/org/netbeans/modules/schema2beansdev/DocDefParser.java b/ide/schema2beans/src/org/netbeans/modules/schema2beansdev/DocDefParser.java
@@ -82,8 +82,10 @@ public DocDefParser(GenBeans.Config config, DocDefHandler handler) {
         if (schemaIn == null) {
             schemaIn = new FileInputStream(filename);
         }
-        EntityParser entityParser = new EntityParser(new InputStreamReader(schemaIn));
-        entityParser.parse();
+        EntityParser entityParser = new EntityParser();
+        try (Reader r = new InputStreamReader(schemaIn)) {
+            entityParser.parse(r);
+        }
         reader = entityParser.getReader();
     }
 

diff --git a/ide/schema2beans/src/org/netbeans/modules/schema2beansdev/EntityParser.java b/ide/schema2beans/src/org/netbeans/modules/schema2beansdev/EntityParser.java
@@ -21,6 +21,8 @@
 
 import java.io.*;
 import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 /**
  * EntityParser.java - parses the DTD file for entity declarations and creates new Reader
  * that replaces the entity references with values
@@ -29,56 +31,59 @@
  * @author mkuchtiak
  */
 public class EntityParser {
-    private java.util.Map entityMap;
-    private final String text;
-    public EntityParser(Reader reader) throws IOException {
-        StringWriter w = new StringWriter();
+    private static final Pattern ENTITY_PATTERN = Pattern.compile("<!ENTITY\\s+%\\s+(\\S+)\\s+\"([^\"]*)\"\\s*>");
+    private static final Pattern ENTITY_USE_PATTERN = Pattern.compile("%([\\S;]+);");
+
+    private final Map<String,String> entityMap  = new HashMap<>();
+    private String remainingText = "";
+
+    public EntityParser() throws IOException {
+    }
+
+    /**
+     * Parses file for ENTITY declaration, creates map with entities
+     */
+    public void parse(Reader reader) throws IOException {
+        StringBuilder w = new StringBuilder();
         char[] buf = new char[4096];
         int read;
         while ((read = reader.read(buf)) != -1) {
-            w.write(buf, 0, read);
+            w.append(buf, 0, read);
         }
-        this.text = w.toString();
-        entityMap = new java.util.HashMap();
-    }
-    /** Parses file for ENTITY declaration, creates map with entities
-     */
-    public void parse() throws IOException {
-        BufferedReader br = new BufferedReader(new StringReader(text));
-        String line = null;
-        while ((line=br.readLine())!=null) {
-            int startPos = line.indexOf("<!ENTITY ");
-            if (startPos>=0) addEntity(br,line.substring(startPos+9));
+
+        String originalText = w.toString();
+
+        StringBuffer buffer = new StringBuffer(originalText.length());
+        Matcher entityMatcher = ENTITY_PATTERN.matcher(originalText);
+        while(entityMatcher.find()) {
+            addEntity(entityMatcher);
+            entityMatcher.appendReplacement(buffer, "");
+        }
+        entityMatcher.appendTail(buffer);
+
+        StringBuffer buffer2 = new StringBuffer(originalText.length());
+        Matcher entityReplacementMatcher = ENTITY_USE_PATTERN.matcher(buffer);
+        while(entityReplacementMatcher.find()) {
+            String entity = entityReplacementMatcher.group(1);
+            if(entityMap.containsKey(entity)) {
+                entityReplacementMatcher.appendReplacement(buffer2, entityMap.get(entity));
+            }
         }
-        br.close();
+        entityReplacementMatcher.appendTail(buffer2);
+
+        remainingText = buffer2.toString();
     }
-
-    private void addEntity(BufferedReader br, String line) throws IOException {
-        StringTokenizer tok = new StringTokenizer(line);
-        if (!tok.hasMoreTokens()) return;
-        String percentage = tok.nextToken();
-        if (!"%".equals(percentage)) return; //incorrect ENTITY declaration (missing %)
-        if (!tok.hasMoreTokens()) return; //incorrect ENTITY declaration (missing entity name)
-
-	// cut the first part including entity key
-        String key = tok.nextToken();
-        int valueStartPos = line.indexOf(key)+key.length();
-        String rest = line.substring(valueStartPos);
-
-	// looking for starting quotes
-	valueStartPos =  rest.indexOf("\"");
-	if (valueStartPos<0) return;
-
-	// looking for entity value
-	rest = rest.substring(valueStartPos+1);
-	String value = resolveValue (rest,br);
 
-        // write ENTITY into map	
+    private void addEntity(Matcher m) throws IOException {
+        String key = m.group(1);
+	String value = m.group(2);
+
+        // write ENTITY into map
         if (value!=null) {
 	    int refStart = value.indexOf("%");
 	    int refEnd = value.indexOf(";");
 	    if (refStart>=0 && refEnd>refStart) { //references other entity
-		String entityKey = value.substring(refStart+1,refEnd);		
+		String entityKey = value.substring(refStart+1,refEnd);
                 String val = (String)entityMap.get(entityKey);
 		if (val!=null) {
 		    String newValue = value.substring(0,refStart)+val+value.substring(refEnd+1);
@@ -92,76 +97,12 @@ private void addEntity(BufferedReader br, String line) throws IOException {
             }
         }
     }
-
-    private String resolveValue(String lineRest, BufferedReader br) throws IOException {
-	// looking for closing quotes
-	int index = lineRest.indexOf("\"");
-	if (index>=0) return lineRest.substring(0,index);	
-	// value across multiple lines	
-	StringBuffer buf = new StringBuffer(lineRest);
-        buf.append("\n");
-	int ch=br.read();
-        while ( ch!=(int)'"' && ch!=(int)'>' && ch!=-1 ) {
-	    buf.append((char)ch);
-	    ch=br.read();
-        }
-	return buf.toString();
-    }
-
-    private boolean containsBlank(String s) {
-        for (int i=0;i<s.length();i++) {
-            if (' '==s.charAt(i)) return true;
-        }
-        return false;
-    }    
 
     /** Creates a StringReader that removes all ENTITY declarations
      *  and replaces entity references with corresponding values
      */
     public Reader getReader() throws IOException {
-        StringBuffer buf = new StringBuffer();
-        BufferedReader br = new BufferedReader(new StringReader(text));
-        String line = null;
-        while ((line=br.readLine())!=null) {
-            // removing line(s) with entity declaration
-            if (line.indexOf("<!ENTITY ")>=0) line = removeEntityDeclaration(line,br); 
-            // searches for entity reference and replace it with value
-            int pos = line.indexOf("%");
-            if (pos>=0) {
-                StringTokenizer tok = new StringTokenizer(line.substring(pos),";%");
-                while (tok.hasMoreTokens()) {
-                    String key = tok.nextToken();
-                    if (key.length()>0 && !containsBlank(key)) {
-                        String value = (String)entityMap.get(key);
-                        if (value!=null) line = line.replaceAll("%"+key+";",value);
-                    }
-                }
-            }
-            if (line.length()>0) buf.append(line);
-        }
-        br.close();
-        return new StringReader(buf.toString());
-    }
-
-    /** Removing line(s) containing ENTITY declaration
-     */ 
-    private String removeEntityDeclaration(String line,BufferedReader br) throws IOException {
-        int start = line.indexOf("<!ENTITY ");
-        StringBuffer buf = new StringBuffer();
-        if (start>0) buf.append(line.substring(0, start));
-        int endPos = line.indexOf(">", start);
-        if (endPos>0) {
-            buf.append(line.substring(endPos+1));
-            return buf.toString();
-        }
-        String ln=null;
-        while (endPos<0 && (ln=br.readLine())!=null) {
-            endPos = ln.indexOf(">");
-            if (endPos>=0) {
-                buf.append(ln.substring(endPos+1));
-            }
-        }
-        return buf.toString();
+        return new StringReader(remainingText);
     }
-    
+
 }
diff --git a/ide/schema2beans/test/unit/data/TestEntity.dtd b/ide/schema2beans/test/unit/data/TestEntity.dtd
@@ -0,0 +1,57 @@
+<!-- The following DTD is a simple view of a book:
+book
+     index
+        +
+          word
+               #PCDATA
+          page
+               #PCDATA
+     chapter+
+          comment?
+               #PCDATA
+          paragraph*
+               #PCDATA
+     summary?
+          #PCDATA
+
+     good
+	EMPTY
+-->
+
+<!ENTITY % yesno 
+    "(yes, no)"
+>
+
+<!ELEMENT summary (#PCDATA)>
+
+<!ELEMENT book (summary?, chapter+, index+, available)>
+
+<!ELEMENT available EMPTY>
+
+<!ELEMENT index (word, ref+)>
+
+<!ELEMENT ref (page, line)>
+
+<!ELEMENT word (#PCDATA)>
+
+<!ELEMENT line (#PCDATA)>
+
+<!ELEMENT page (#PCDATA)>
+
+<!ELEMENT chapter (comment?, paragraph*)>
+
+<!ELEMENT paragraph (#PCDATA)>
+
+<!ELEMENT comment (#PCDATA)>
+
+<!ATTLIST book good %yesno; 'yes'>
+<!ATTLIST summary length CDATA #REQUIRED>
+<!ATTLIST summary lang CDATA #IMPLIED>
+<!ATTLIST summary size CDATA #FIXED '12'>
+<!ATTLIST chapter title CDATA #IMPLIED>
+<!ATTLIST index color CDATA #IMPLIED>
+<!ATTLIST index cross-ref %yesno; "no">
+<!ATTLIST index glossary CDATA #FIXED "nope">
+<!ATTLIST word freq CDATA #IMPLIED>
+
+
diff --git a/ide/schema2beans/test/unit/data/goldenfiles/tests/SecondaryTest/testEntityParser.pass b/ide/schema2beans/test/unit/data/goldenfiles/tests/SecondaryTest/testEntityParser.pass
@@ -0,0 +1,55 @@
+<!-- The following DTD is a simple view of a book:
+book
+     index
+        +
+          word
+               #PCDATA
+          page
+               #PCDATA
+     chapter+
+          comment?
+               #PCDATA
+          paragraph*
+               #PCDATA
+     summary?
+          #PCDATA
+
+     good
+	EMPTY
+-->
+
+
+
+<!ELEMENT summary (#PCDATA)>
+
+<!ELEMENT book (summary?, chapter+, index+, available)>
+
+<!ELEMENT available EMPTY>
+
+<!ELEMENT index (word, ref+)>
+
+<!ELEMENT ref (page, line)>
+
+<!ELEMENT word (#PCDATA)>
+
+<!ELEMENT line (#PCDATA)>
+
+<!ELEMENT page (#PCDATA)>
+
+<!ELEMENT chapter (comment?, paragraph*)>
+
+<!ELEMENT paragraph (#PCDATA)>
+
+<!ELEMENT comment (#PCDATA)>
+
+<!ATTLIST book good (yes, no) 'yes'>
+<!ATTLIST summary length CDATA #REQUIRED>
+<!ATTLIST summary lang CDATA #IMPLIED>
+<!ATTLIST summary size CDATA #FIXED '12'>
+<!ATTLIST chapter title CDATA #IMPLIED>
+<!ATTLIST index color CDATA #IMPLIED>
+<!ATTLIST index cross-ref (yes, no) "no">
+<!ATTLIST index glossary CDATA #FIXED "nope">
+<!ATTLIST word freq CDATA #IMPLIED>
+
+
diff --git a/ide/schema2beans/test/unit/src/tests/SecondaryTest.java b/ide/schema2beans/test/unit/src/tests/SecondaryTest.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package tests;
+
+import java.io.*;
+import java.lang.reflect.Field;
+import java.nio.charset.StandardCharsets;
+import java.util.Map;
+import org.netbeans.junit.*;
+
+import org.netbeans.modules.schema2beansdev.*;
+
+public class SecondaryTest extends NbTestCase {
+
+    public SecondaryTest(java.lang.String testName) {
+        super(testName);
+    }
+
+    public void testEntityParser() throws Exception {
+        File schemaFile = new File(getDataDir(), "TestEntity.dtd");
+        try (InputStream dtdIn = new FileInputStream(schemaFile);
+                Reader reader = new InputStreamReader(dtdIn, StandardCharsets.ISO_8859_1);
+                InputStream goldenStream = new FileInputStream(getGoldenFile());
+                Reader goldenReader = new InputStreamReader(goldenStream, StandardCharsets.ISO_8859_1)) {
+            EntityParser ep = new EntityParser();
+
+            ep.parse(reader);
+
+            Field entityMapField = EntityParser.class.getDeclaredField("entityMap");
+            entityMapField.setAccessible(true);
+
+            Map entityMap = (Map) entityMapField.get(ep);
+
+            assertEquals(1, entityMap.size());
+
+            StringBuilder testOutput = new StringBuilder();
+
+            try (Reader r = ep.getReader()) {
+                int read = 0;
+                char[] buffer = new char[4096];
+                while ( (read = r.read(buffer)) >= 0) {
+                    testOutput.append(buffer, 0, read);
+                }
+            }
+
+            StringBuilder reference = new StringBuilder();
+            {
+                int read = 0;
+                char[] buffer = new char[4096];
+                while ( (read = goldenReader.read(buffer)) >= 0) {
+                    reference.append(buffer, 0, read);
+                }
+            }
+
+            assertEquals(reference.toString(), testOutput.toString());
+        }
+    }
+}