Skip to content

Commit

Permalink
AVRO-3239: IDL parsing silently ignores dangling documentation commen…
Browse files Browse the repository at this point in the history
…ts (apache#1377)

* AVRO-3239: Add warnings for ignored doc comments

* AVRO-3239: Add mock Maven Log implementation

* AVRO-3239: Log parser warnings in Maven IDL mojo

* AVRO-3239: Log parser warnings in the IDL tools

* AVRO-3239: Document new (public) parser method

* AVRO-3239: Undo merge error and fix build

* AVRO-3239: Extract helper class for doc comments

The helper class allows better testing of and documentation for handling
doc comments. The .avpr changes all dedented comments.

* AVRO-3239: Add missing license

* AVRO-3239: Fix documentation comment

* AVRO-3239: Fix tests

* AVRO-3239: Apply review comments
  • Loading branch information
opwvhk authored Dec 12, 2021
1 parent 7d85152 commit 7e97a10
Show file tree
Hide file tree
Showing 25 changed files with 795 additions and 37 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.avro.compiler.idl;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* Utility class with {@code ThreadLocal} fields that allow the generated
* classes {@link Idl} and {@link IdlTokenManager} to exchange documentation
* comments without forcing explicit parsing of documentation comments.
*
* The reason this works is that all calls to this class happen within a call to
* the method {@link Idl#CompilationUnit()} (either directly or indirectly).
*/
public class DocCommentHelper {
/**
* Pattern to match the common whitespace indents in a multi-line String.
* Doesn't match a single-line String, fully matches any multi-line String.
*
* To use: match on a {@link String#trim() trimmed} String, and then replace all
* newlines followed by the group "indent" with a newline.
*/
private static final Pattern WS_INDENT = Pattern.compile("(?U).*\\R(?<indent>\\h*).*(?:\\R\\k<indent>.*)*");
/**
* Pattern to match the whitespace indents plus common stars (1 or 2) in a
* multi-line String. If a String fully matches, replace all occurrences of a
* newline followed by whitespace and then the group "stars" with a newline.
*
* Note: partial matches are invalid.
*/
private static final Pattern STAR_INDENT = Pattern.compile("(?U)(?<stars>\\*{1,2}).*(?:\\R\\h*\\k<stars>.*)*");

private static final ThreadLocal<DocComment> DOC = new ThreadLocal<>();
private static final ThreadLocal<List<String>> WARNINGS = ThreadLocal.withInitial(ArrayList::new);

/**
* Return all warnings that were encountered while parsing, once. Subsequent
* calls before parsing again will return an empty list.
*/
static List<String> getAndClearWarnings() {
List<String> warnings = WARNINGS.get();
WARNINGS.remove();
return warnings;
}

static void setDoc(Token token) {
DocComment newDocComment = new DocComment(token);
DocComment oldDocComment = DOC.get();
if (oldDocComment != null) {
WARNINGS.get()
.add(String.format(
"Found documentation comment at line %d, column %d. Ignoring previous one at line %d, column %d: \"%s\"\n"
+ "Did you mean to use a multiline comment ( /* ... */ ) instead?",
newDocComment.line, newDocComment.column, oldDocComment.line, oldDocComment.column, oldDocComment.text));
}
DOC.set(newDocComment);
}

static void clearDoc() {
DocComment oldDocComment = DOC.get();
if (oldDocComment != null) {
WARNINGS.get()
.add(String.format(
"Ignoring out-of-place documentation comment at line %d, column %d: \"%s\"\n"
+ "Did you mean to use a multiline comment ( /* ... */ ) instead?",
oldDocComment.line, oldDocComment.column, oldDocComment.text));
}
DOC.remove();
}

static String getDoc() {
DocComment docComment = DOC.get();
DOC.remove();
return docComment == null ? null : docComment.text;
}

/* Package private to facilitate testing */
static String stripIndents(String doc) {
Matcher starMatcher = STAR_INDENT.matcher(doc);
if (starMatcher.matches()) {
return doc.replaceAll("(?U)(?:^|(\\R)\\h*)\\Q" + starMatcher.group("stars") + "\\E\\h?", "$1");
}

Matcher whitespaceMatcher = WS_INDENT.matcher(doc);
if (whitespaceMatcher.matches()) {
return doc.replaceAll("(?U)(\\R)" + whitespaceMatcher.group("indent"), "$1");
}

return doc;
}

private static class DocComment {
private final String text;
private final int line;
private final int column;

DocComment(Token token) {
// The token is everything after the initial '/**', including all
// whitespace and the ending '*/'
int tokenLength = token.image.length();
this.text = stripIndents(token.image.substring(0, tokenLength - 2).trim());
this.line = token.beginLine;
// The preceding token was "/**", and the current token includes
// everything since (also all whitespace). Thus, we can safely subtract 3
// from the token column to get the start of the doc comment.
this.column = token.beginColumn - 3;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,12 @@ public class Idl implements Closeable {
String namespace;
Map<String,Schema> names = new LinkedHashMap<String,Schema>();

private static final ThreadLocal<String> DOC = new ThreadLocal<String>();
static void setDoc(String doc) { DOC.set(doc.trim()); }
static String getDoc() {
String doc = DOC.get();
DOC.set(null);
return doc;
private List<String> parserWarnings = Collections.emptyList();
/**
* Return all warnings that were encountered while parsing.
*/
public List<String> getWarningsAfterParsing() {
return parserWarnings;
}

public Idl(File inputFile) throws IOException {
Expand Down Expand Up @@ -221,7 +221,7 @@ MORE :
<DOC_COMMENT>
SPECIAL_TOKEN :
{
<"*/" > {Idl.setDoc(image.substring(0, image.length()-2));} : DEFAULT
<"*/" > {DocCommentHelper.setDoc(matchedToken);} : DEFAULT
}

<MULTI_LINE_COMMENT>
Expand Down Expand Up @@ -1021,13 +1021,17 @@ TOKEN :
Protocol CompilationUnit():
{
Protocol p;
DocCommentHelper.getAndClearWarnings(); // Throw away previous results.
}
{
p = ProtocolDeclaration()
( < "\u001a" > )?
( <STUFF_TO_IGNORE: ~[]> )?
<EOF>
{ return SchemaResolver.resolve(p); }
{
parserWarnings = DocCommentHelper.getAndClearWarnings();
return SchemaResolver.resolve(p);
}
}

/*
Expand Down Expand Up @@ -1056,7 +1060,7 @@ private Schema NamedSchemaDeclaration(Map<String, JsonNode> props):
} else if ("aliases".equals(key)) { // aliases
for (String alias : getTextProps("aliases", props, token))
s.addAlias(alias);
} else { // add all other props
} else { // add all other properties
Accessor.addProp(s, key, props.get(key));
}
LogicalType logicalType = LogicalTypes.fromSchemaIgnoreInvalid(s);
Expand Down Expand Up @@ -1106,10 +1110,10 @@ Protocol ProtocolDeclaration():
"protocol"
name = Identifier()
{
p = new Protocol(name, getDoc(), namespace);
p = new Protocol(name, DocCommentHelper.getDoc(), namespace);
for (String key : props.keySet())
if ("namespace".equals(key)) { // already handled: ignore
} else { // add all other props
} else { // add all other properties
Accessor.addProp(p, key, props.get(key));
}
}
Expand All @@ -1127,7 +1131,7 @@ Schema EnumDeclaration():
String defaultSymbol = null;
}
{
"enum" { String doc = getDoc(); }
"enum" { String doc = DocCommentHelper.getDoc(); }
name = Identifier()
symbols = EnumBody()
[ <EQUALS> defaultSymbol=Identifier() <SEMICOLON>]
Expand Down Expand Up @@ -1267,7 +1271,7 @@ Schema FixedDeclaration():
"fixed" name = Identifier() "(" sizeTok = <INTEGER_LITERAL> ")"
";"
{
Schema s = Schema.createFixed(name, getDoc(), this.namespace,
Schema s = Schema.createFixed(name, DocCommentHelper.getDoc(), this.namespace,
Integer.parseInt(sizeTok.image));
names.put(s.getFullName(), s);
return s;
Expand All @@ -1288,7 +1292,7 @@ Schema RecordDeclaration():
name = Identifier()
{
Schema result = Schema.createRecord(
name, getDoc(), this.namespace, isError);
name, DocCommentHelper.getDoc(), this.namespace, isError);
names.put(result.getFullName(), result);
}
"{"
Expand Down Expand Up @@ -1345,13 +1349,13 @@ void VariableDeclarator(Schema type, List<Field> fields):
order = Field.Order.valueOf(getTextProp(key,props,token).toUpperCase());

boolean validate = !SchemaResolver.isUnresolvedSchema(type);
Field field = Accessor.createField(name, type, getDoc(), defaultValue, validate, order);
Field field = Accessor.createField(name, type, DocCommentHelper.getDoc(), defaultValue, validate, order);
for (String key : props.keySet())
if ("order".equals(key)) { // already handled: ignore
} else if ("aliases".equals(key)) { // aliases
for (String alias : getTextProps("aliases", props, token))
field.addAlias(alias);
} else { // add all other props
} else { // add all other properties
Accessor.addProp(field, key, props.get(key));
}
fields.add(field);
Expand All @@ -1364,7 +1368,7 @@ String MessageDocumentation():
{
// Don't parse anything, just return the doc string
{
return getDoc();
return DocCommentHelper.getDoc();
}
}

Expand Down
67 changes: 67 additions & 0 deletions lang/java/compiler/src/test/idl/input/comments.avdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
@namespace("testing")
protocol Comments {
/** Documented Enum */
enum /** Dangling Enum1 */ DocumentedEnum /** Dangling Enum2 */ {
/** Dangling Enum3 */ A,
/** Dangling Enum4 */ B,
/** Dangling Enum5 */ C
/** Dangling Enum6 */}
/** Dangling Enum7 */=
/** Dangling Enum8 */ A
/** Dangling Enum9 */;

// The "Dangling Enum9" doc comment above will be attributed to this enum.
enum NotUndocumentedEnum {D,E}

/** Dangling Fixed1 */ fixed
/** Dangling Fixed2 */ DocumentedFixed
/** Dangling Fixed3 */(
/** Dangling Fixed4 */ 16
/** Dangling Fixed5 */)
/** Documented Fixed Type */;

fixed UndocumentedFixed(16);

/** Dangling Error1 */ error
/** Documented Error */ DocumentedError
/** Dangling Field1 */{
/** Dangling Field2 */string
/** Dangling Field3 */reason
/** Documented Field */;
/** Dangling Error2 */}

// The "Dangling Error2" doc comment above will be attributed to this record.
record NotUndocumentedRecord {
string description;
}

/** Documented Method */ void
/** Dangling Param1 */ documentedMethod
/** Dangling Param2 */(
/** Dangling Param3 */ string
/** Dangling Param4 */ message
/** Documented Parameter */)
/** Dangling Method1 */ throws
/** Dangling Method2 */ DocumentedError
/** Dangling Method3 */;

// The "Dangling Method3" doc comment above will be attributed to this method.
void notUndocumentedMethod(string message);
}
2 changes: 1 addition & 1 deletion lang/java/compiler/src/test/idl/output/baseball.avpr
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"protocol" : "Baseball",
"namespace" : "avro.examples.baseball",
"doc" : "* Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements. See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership. The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License. You may obtain a copy of the License at\n *\n * https://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.",
"doc" : "Licensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements. See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership. The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License. You may obtain a copy of the License at\n\n https://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.",
"types" : [ {
"type" : "enum",
"name" : "Position",
Expand Down
62 changes: 62 additions & 0 deletions lang/java/compiler/src/test/idl/output/comments.avpr
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
{
"protocol" : "Comments",
"namespace" : "testing",
"types" : [ {
"type" : "enum",
"name" : "DocumentedEnum",
"doc" : "Documented Enum",
"symbols" : [ "A", "B", "C" ],
"default" : "A"
}, {
"type" : "enum",
"name" : "NotUndocumentedEnum",
"doc" : "Dangling Enum9",
"symbols" : [ "D", "E" ]
}, {
"type" : "fixed",
"name" : "DocumentedFixed",
"doc" : "Documented Fixed Type",
"size" : 16
}, {
"type" : "fixed",
"name" : "UndocumentedFixed",
"size" : 16
}, {
"type" : "error",
"name" : "DocumentedError",
"doc" : "Documented Error",
"fields" : [ {
"name" : "reason",
"type" : "string",
"doc" : "Documented Field"
} ]
}, {
"type" : "record",
"name" : "NotUndocumentedRecord",
"doc" : "Dangling Error2",
"fields" : [ {
"name" : "description",
"type" : "string"
} ]
} ],
"messages" : {
"documentedMethod" : {
"doc" : "Documented Method",
"request" : [ {
"name" : "message",
"type" : "string",
"doc" : "Documented Parameter"
} ],
"response" : "null",
"errors" : [ "DocumentedError" ]
},
"notUndocumentedMethod" : {
"doc" : "Dangling Method3",
"request" : [ {
"name" : "message",
"type" : "string"
} ],
"response" : "null"
}
}
}
2 changes: 1 addition & 1 deletion lang/java/compiler/src/test/idl/output/import.avpr
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"protocol" : "Import",
"namespace" : "org.foo",
"doc" : "* Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements. See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership. The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License. You may obtain a copy of the License at\n *\n * https://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.",
"doc" : "Licensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements. See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership. The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License. You may obtain a copy of the License at\n\n https://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.",
"types" : [ {
"type" : "enum",
"name" : "Position",
Expand Down
Loading

0 comments on commit 7e97a10

Please sign in to comment.