Skip to content

Commit

Permalink
Merge pull request ampproject#1774 from powdercloud/update-github
Browse files Browse the repository at this point in the history
Validator updates (CSS validation, Turkish İ in htmlparser.js)
  • Loading branch information
powdercloud committed Feb 4, 2016
2 parents 20764ad + 0110ebe commit bf07b12
Show file tree
Hide file tree
Showing 7 changed files with 208 additions and 114 deletions.
23 changes: 12 additions & 11 deletions validator/htmlparser.js
Original file line number Diff line number Diff line change
Expand Up @@ -651,25 +651,26 @@ amp.htmlparser.HtmlParser.prototype.normalizeRCData_ = function(rcdata) {


/**
* TODO(goto): why isn't this in the string package ? does this solves any
* real problem ? move it to the goog.string package if it does.
*
* @param {string} str The string to lower case.
* @return {string} The str in lower case format.
*/
amp.htmlparser.toLowerCase = function(str) {
// The below may not be true on browsers in the Turkish locale.
if ('script' === 'SCRIPT'.toLowerCase()) {
return str.toLowerCase();
} else {
return str.replace(/[A-Z]/g, function(ch) {
return String.fromCharCode(ch.charCodeAt(0) | 32);
});
// htmlparser.js heavily relies on the length of the strings, and
// unfortunately some characters change their length when
// lowercased; for instance, the Turkish İ has a length of 1, but
// when lower-cased, it has a length of 2. So, as a workaround we
// check that the length be the same as before lower-casing, and if
// not, we only lower-case the letters A-Z.
const lowerCased = str.toLowerCase();
if (lowerCased.length == str.length) {
return lowerCased;
}
return str.replace(/[A-Z]/g, function(ch) {
return String.fromCharCode(ch.charCodeAt(0) | 32);
});
};



/**
* An interface to the {@code amp.htmlparser.HtmlParser} visitor, that gets
* called while the HTML is being parsed.
Expand Down
47 changes: 47 additions & 0 deletions validator/htmlparser_test.js
Original file line number Diff line number Diff line change
Expand Up @@ -340,4 +340,51 @@ describe('HtmlParser with location', () => {
':17:0: endTag(html)',
':17:6: endDoc()']);
});

it('Supports Turkish UTF8 İ character in body', () => {
// A Javascript string with this character in it has .length 1, but
// when .toLowerCase()'d it becomes length 2, which would throw off
// the bookkeeping in htmlparser.js. Hence, amp.htmlparser.toLowerCase
// works around the problem.
const handler = new LoggingHandlerWithLocation();
const parser = new amp.htmlparser.HtmlParser();
parser.parse(
handler,
'<!doctype html>\n' +
'<html amp lang="tr">\n' +
'<head>\n' +
'<meta charset="utf-8">\n' +
'<title></title>\n' +
'<script async src="https://cdn.ampproject.org/v0.js"></script>\n' +
'</head>\n' +
'<body>İ</body>\n' +
'</html>');
expect(handler.log).toEqual([
':1:0: startDoc()',
':1:0: startTag(!doctype,[html,html])',
':1:14: pcdata("\n")',
':2:0: startTag(html,[amp,amp,lang,tr])',
':2:19: pcdata("\n")',
':3:0: startTag(head,[])',
':3:5: pcdata("\n")',
':4:0: startTag(meta,[charset,utf-8])',
':4:21: pcdata("\n")',
':5:0: startTag(title,[])',
':5:0: rcdata("")',
':5:7: endTag(title)',
':5:14: pcdata("\n")',
':6:0: startTag(script,[async,async,src,'+
'https://cdn.ampproject.org/v0.js])',
':6:0: cdata("")',
':6:53: endTag(script)',
':6:61: pcdata("\n")',
':7:0: endTag(head)',
':7:6: pcdata("\n")',
':8:0: startTag(body,[])',
':8:5: pcdata("İ")',
':8:7: endTag(body)',
':8:13: pcdata("\n")',
':9:0: endTag(html)',
':9:6: endDoc()' ]);
});
});
47 changes: 47 additions & 0 deletions validator/parse-css.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ goog.provide('parse_css.BlockType');
goog.provide('parse_css.Declaration');
goog.provide('parse_css.QualifiedRule');
goog.provide('parse_css.Rule');
goog.provide('parse_css.RuleVisitor');
goog.provide('parse_css.Stylesheet');
goog.provide('parse_css.TokenStream');
goog.provide('parse_css.extractAFunction');
Expand Down Expand Up @@ -183,6 +184,9 @@ goog.inherits(parse_css.Rule, parse_css.Token);
/** @type {parse_css.TokenType} */
parse_css.Rule.tokenType = parse_css.TokenType.UNKNOWN;

/** @param {!parse_css.RuleVisitor} visitor */
parse_css.Rule.prototype.accept = goog.abstractMethod;

/**
* @param {number=} opt_indent
* @return {string}
Expand Down Expand Up @@ -215,6 +219,14 @@ parse_css.Stylesheet.prototype.toJSON = function() {
return json;
};

/** @param {!parse_css.RuleVisitor} visitor */
parse_css.Stylesheet.prototype.accept = function(visitor) {
visitor.visitStylesheet(this);
for (const rule of this.rules) {
rule.accept(visitor);
}
};

/**
* @param {string} name
* @constructor
Expand Down Expand Up @@ -246,6 +258,17 @@ parse_css.AtRule.prototype.toJSON = function() {
return json;
};

/** @param {!parse_css.RuleVisitor} visitor */
parse_css.AtRule.prototype.accept = function(visitor) {
visitor.visitAtRule(this);
for (const rule of this.rules) {
rule.accept(visitor);
}
for (const declaration of this.declarations) {
declaration.accept(visitor);
}
};

/**
* @constructor
* @extends {parse_css.Rule}
Expand All @@ -271,6 +294,14 @@ parse_css.QualifiedRule.prototype.toJSON = function() {
return json;
};

/** @param {!parse_css.RuleVisitor} visitor */
parse_css.QualifiedRule.prototype.accept = function(visitor) {
visitor.visitQualifiedRule(this);
for (const declaration of this.declarations) {
declaration.accept(visitor);
}
};

/**
* @param {string} name
* @constructor
Expand Down Expand Up @@ -299,6 +330,22 @@ parse_css.Declaration.prototype.toJSON = function() {
return json;
};

/** @param {!parse_css.RuleVisitor} visitor */
parse_css.Declaration.prototype.accept = function(visitor) {
visitor.visitDeclaration(this);
};

/** @constructor */
parse_css.RuleVisitor = function() {};
/** @param {!parse_css.Stylesheet} stylesheet */
parse_css.RuleVisitor.prototype.visitStylesheet = function(stylesheet) {};
/** @param {!parse_css.AtRule} atRule */
parse_css.RuleVisitor.prototype.visitAtRule = function(atRule) {};
/** @param {!parse_css.QualifiedRule} qualifiedRule */
parse_css.RuleVisitor.prototype.visitQualifiedRule = function(qualifiedRule) {};
/** @param {!parse_css.Declaration} declaration */
parse_css.RuleVisitor.prototype.visitDeclaration = function(declaration) {};

/**
* Enum describing how to parse the rules inside a CSS AT Rule.
* @enum {string}
Expand Down
3 changes: 2 additions & 1 deletion validator/testdata/feature_tests/incorrect_custom_style.html
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@
foo.i-amp-class {}
foo.amp-class {}
foo { b: red !important; }
@viewport (mumble mumble) {}
@viewport (mumble mumble) { }
@media (whatever) { @notallowednested }
</style>
<style>body {opacity: 0}</style><noscript><style>body {opacity: 1}</style></noscript>
<script src="https://cdn.ampproject.org/v0.js" async></script>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
FAIL
feature_tests/incorrect_custom_style.html:29:4 CSS syntax error in tag 'author stylesheet' - saw invalid at rule 'import'.
feature_tests/incorrect_custom_style.html:33:4 CSS syntax error in tag 'author stylesheet' - saw invalid at rule 'viewport'.
feature_tests/incorrect_custom_style.html:34:24 CSS syntax error in tag 'author stylesheet' - saw invalid at rule 'notallowednested'.
104 changes: 63 additions & 41 deletions validator/validator.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ goog.require('goog.structs.Map');
goog.require('goog.structs.Set');
goog.require('goog.uri.utils');
goog.require('parse_css.BlockType');
goog.require('parse_css.RuleVisitor');
goog.require('parse_css.parseAStylesheet');
goog.require('parse_css.tokenize');

Expand Down Expand Up @@ -414,6 +415,64 @@ TagNameStack.prototype.hasAncestor = function(ancestor) {
return false;
};


/**
* Returns true if the given AT rule is considered valid.
* @param {!amp.validator.CssSpec} cssSpec
* @param {string} atRuleName
* @return {boolean}
*/
function isAtRuleValid(cssSpec, atRuleName) {
let defaultType = '';

for (const atRuleSpec of cssSpec.atRuleSpec) {
if (atRuleSpec.name === '$DEFAULT') {
defaultType = atRuleSpec.type;
} else if (atRuleSpec.name === atRuleName) {
return atRuleSpec.type !==
amp.validator.AtRuleSpec.BlockType.PARSE_AS_ERROR;
}
}

goog.asserts.assert(defaultType !== '');
return defaultType !== amp.validator.AtRuleSpec.BlockType.PARSE_AS_ERROR;
};

/**
* @param {!amp.validator.TagSpec} tagSpec
* @param {!amp.validator.CssSpec} cssSpec
* @param {!Context} context
* @param {!amp.validator.ValidationResult} result
* @constructor
* @extends {parse_css.RuleVisitor}
*/
const InvalidAtRuleVisitor = function(tagSpec, cssSpec, context, result) {
goog.base(this);
/** @type {!amp.validator.TagSpec} */
this.tagSpec = tagSpec;
/** @type {!amp.validator.CssSpec} */
this.cssSpec = cssSpec;
/** @type {!Context} */
this.context = context;
/** @type {!amp.validator.ValidationResult} */
this.result = result;
/** @type {boolean} */
this.errorsSeen = false;
};
goog.inherits(InvalidAtRuleVisitor, parse_css.RuleVisitor);

/** @param {!parse_css.AtRule} atRule */
InvalidAtRuleVisitor.prototype.visitAtRule = function(atRule) {
if (!isAtRuleValid(this.cssSpec, atRule.name)) {
this.context.addErrorWithLineCol(
new LineCol(atRule.line, atRule.col),
amp.validator.ValidationError.Code.CSS_SYNTAX_INVALID_AT_RULE,
/* params */ [getDetailOrName(this.tagSpec), atRule.name],
/* url */ '', this.result);
this.errorsSeen = true;
}
};

/**
* This matcher maintains a constraint to check which an opening tag
* introduces: a tag's cdata matches constraints set by it's cdata
Expand Down Expand Up @@ -479,28 +538,6 @@ function computeAtRuleDefaultParsingSpec(atRuleParsingSpec) {
return ret;
}

/**
* Returns true if the given AT rule is considered valid.
* @param {!amp.validator.CssSpec} cssSpec
* @param {string} atRuleName
* @return {boolean}
*/
CdataMatcher.prototype.isAtRuleValid = function(cssSpec, atRuleName) {
let defaultType = '';

for (const atRuleSpec of cssSpec.atRuleSpec) {
if (atRuleSpec.name === '$DEFAULT') {
defaultType = atRuleSpec.type;
} else if (atRuleSpec.name === atRuleName) {
return atRuleSpec.type !==
amp.validator.AtRuleSpec.BlockType.PARSE_AS_ERROR;
}
}

goog.asserts.assert(defaultType !== '');
return defaultType !== amp.validator.AtRuleSpec.BlockType.PARSE_AS_ERROR;
};

/**
* Matches the provided cdata against what this matcher expects.
* @param {string} cdata
Expand Down Expand Up @@ -571,36 +608,21 @@ CdataMatcher.prototype.match = function(cdata, context, validationResult) {
const sheet = parse_css.parseAStylesheet(
tokenList, atRuleParsingSpec,
computeAtRuleDefaultParsingSpec(atRuleParsingSpec), cssErrors);
let reportCdataRegexpErrors = (cssErrors.length == 0);
for (const errorToken of cssErrors) {
const lineCol = new LineCol(errorToken.line, errorToken.col);
context.addErrorWithLineCol(
lineCol, amp.validator.ValidationError.Code.CSS_SYNTAX,
/* params */ [getDetailOrName(this.tagSpec_), errorToken.msg],
/* url */ '', validationResult);
}

// TODO(johannes): This needs improvement to validate all fields
// recursively. For now, it's just doing one layer of fields to
// demonstrate the idea and keep tests passing.
for (const cssRule of sheet.rules) {
if (cssRule.tokenType === 'AT_RULE') {
if (!this.isAtRuleValid(cdataSpec.cssSpec, cssRule.name)) {
reportCdataRegexpErrors = false;
const lineCol = new LineCol(cssRule.line, cssRule.col);
context.addErrorWithLineCol(
lineCol,
amp.validator.ValidationError.Code.CSS_SYNTAX_INVALID_AT_RULE,
/* params */ [getDetailOrName(this.tagSpec_), cssRule.name],
/* url */ '', validationResult);
}
}
}
const visitor = new InvalidAtRuleVisitor(
this.tagSpec_, cdataSpec.cssSpec, context, validationResult);
sheet.accept(visitor);

// As a hack to not report some errors twice, both via the css parser
// and via the regular expressions below, we return early if there
// are parser errors and skip the regular expression errors.
if (!reportCdataRegexpErrors)
if (visitor.errorsSeen || cssErrors.length > 0)
return;
}
// } end oneof
Expand Down
Loading

0 comments on commit bf07b12

Please sign in to comment.