Skip to content

Commit

Permalink
Preserve AMP binding attributes through HTML parsing and serialization
Browse files Browse the repository at this point in the history
  • Loading branch information
westonruter committed Jan 24, 2018
1 parent 216ec37 commit d266233
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 1 deletion.
108 changes: 108 additions & 0 deletions includes/utils/class-amp-dom-utils.php
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ public static function get_dom( $document ) {

$dom = new DOMDocument();

$document = self::convert_amp_bind_attributes( $document );

/*
* Wrap in dummy tags, since XML needs one parent node.
* It also makes it easier to loop through nodes.
Expand All @@ -74,6 +76,110 @@ public static function get_dom( $document ) {
return $dom;
}

/**
* Get attribute prefix for converted amp-bind attributes.
*
* This contains a random string to prevent HTML content containing this data- attribute
* originally from being mutated to contain an amp-bind attribute when attributes are restored.
*
* @since 0.7
* @see \AMP_DOM_Utils::convert_amp_bind_attributes()
* @see \AMP_DOM_Utils::restore_amp_bind_attributes()
* @link https://www.ampproject.org/docs/reference/components/amp-bind
*
* @return string HTML5 data-* attribute name prefix for AMP binding attributes.
*/
public static function get_amp_bind_placeholder_attribute_prefix() {
static $attribute_prefix;
if ( ! isset( $attribute_prefix ) ) {
$attribute_prefix = sprintf( 'data-amp-binding-%s-', md5( wp_rand() ) );
}
return $attribute_prefix;
}

/**
* Replace AMP binding attributes with something that libxml can parse (as HTML5 data-* attributes).
*
* This is necessary necessary because attributes in square brackets are not understood in PHP and
* get dropped with an error raised:
* > Warning: DOMDocument::loadHTML(): error parsing attribute name
* This is a reciprocal function of AMP_DOM_Utils::restore_amp_bind_attributes().
*
* @since 0.7
* @see \AMP_DOM_Utils::convert_amp_bind_attributes()
* @link https://www.ampproject.org/docs/reference/components/amp-bind
*
* @param string $html HTML containing amp-bind attributes.
* @return string HTML with AMP binding attributes replaced with HTML5 data-* attributes.
*/
public static function convert_amp_bind_attributes( $html ) {
$amp_bind_attr_prefix = self::get_amp_bind_placeholder_attribute_prefix();

// Pattern for HTML attribute accounting for binding attr name, boolean attribute, single/double-quoted attribute value, and unquoted attribute values.
$attr_regex = '#^\s+(?P<name>\[?[a-zA-Z0-9_\-]+\]?)(?P<value>=(?:"[^"]*"|\'[^\']*\'|[^\'"\s]+))?#';

/**
* Replace callback.
*
* @param array $tag_matches Tag matches.
* @return string Replacement.
*/
$replace_callback = function( $tag_matches ) use ( $amp_bind_attr_prefix, $attr_regex ) {
$old_attrs = rtrim( $tag_matches['attrs'] );
$new_attrs = '';
$offset = 0;
while ( preg_match( $attr_regex, substr( $old_attrs, $offset ), $attr_matches ) ) {
$offset += strlen( $attr_matches[0] );

if ( '[' === $attr_matches['name'][0] ) {
$new_attrs .= ' ' . $amp_bind_attr_prefix . trim( $attr_matches['name'], '[]' );
if ( isset( $attr_matches['value'] ) ) {
$new_attrs .= $attr_matches['value'];
}
} else {
$new_attrs .= $attr_matches[0];
}
}

// Bail on parse error which occurs when the regex isn't able to consume the entire $new_attrs string.
if ( strlen( $old_attrs ) !== $offset ) {
return $tag_matches[0];
}

return '<' . $tag_matches['name'] . $new_attrs . '>';
};

$html = preg_replace_callback(
// Match all start tags that probably contain a binding attribute.
'#<(?P<name>\w\S+)(?P<attrs>\s+[^<]+\]=[^<]+)\s*>#',
$replace_callback,
$html
);

return $html;
}

/**
* Convert AMP bind-attributes back to their original syntax.
*
* This is a reciprocal function of AMP_DOM_Utils::convert_amp_bind_attributes().
*
* @since 0.7
* @see \AMP_DOM_Utils::convert_amp_bind_attributes()
* @link https://www.ampproject.org/docs/reference/components/amp-bind
*
* @param string $html HTML with amp-bind attributes converted.
* @return string HTML with amp-bind attributes restored.
*/
public static function restore_amp_bind_attributes( $html ) {
$html = preg_replace(
'#\s' . self::get_amp_bind_placeholder_attribute_prefix() . '([a-zA-Z0-9_\-]+)#',
' [$1]',
$html
);
return $html;
}

/**
* Return a valid DOMDocument representing arbitrary HTML content passed as a parameter.
*
Expand Down Expand Up @@ -175,6 +281,8 @@ public static function get_content_from_dom_node( $dom, $node ) {
return '';
}

$html = self::restore_amp_bind_attributes( $html );

/*
* Travis w/PHP 7.1 generates <br></br> and <hr></hr> vs. <br/> and <hr/>, respectively.
* Travis w/PHP 7.x generates <source ...></source> vs. <source ... />. Etc.
Expand Down
2 changes: 1 addition & 1 deletion phpcs.xml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
</rule>

<!-- Include sniffs for PHP cross-version compatibility. -->
<config name="testVersion" value="5.2-99.0"/>
<config name="testVersion" value="5.3-99.0"/>
<rule ref="PHPCompatibility">
<exclude-pattern>bin/*</exclude-pattern>
</rule>
Expand Down
29 changes: 29 additions & 0 deletions tests/test-class-amp-dom-utils.php
Original file line number Diff line number Diff line change
Expand Up @@ -90,4 +90,33 @@ public function test__get_content_from_dom__br_no_closing_tag() {

$this->assertEquals( $expected, $actual );
}

/**
* Test convert_amp_bind_attributes.
*
* @covers \AMP_DOM_Utils::convert_amp_bind_attributes()
* @covers \AMP_DOM_Utils::restore_amp_bind_attributes()
* @covers \AMP_DOM_Utils::get_amp_bind_placeholder_attribute_prefix()
* \AMP_DOM_Utils::restore_amp_bind_attributes()
*/
public function test_amp_bind_conversion() {
$original = '<amp-img width=300 height="200" data-foo="bar" selected src="/img/dog.jpg" [src]="myAnimals[currentAnimal].imageUrl"></amp-img>';
$converted = AMP_DOM_Utils::convert_amp_bind_attributes( $original );
$this->assertNotEquals( $converted, $original );
$this->assertContains( AMP_DOM_Utils::get_amp_bind_placeholder_attribute_prefix() . 'src="myAnimals[currentAnimal].imageUrl"', $converted );
$this->assertContains( 'width=300 height="200" data-foo="bar" selected', $converted );
$restored = AMP_DOM_Utils::restore_amp_bind_attributes( $converted );
$this->assertEquals( $original, $restored );

// Test malformed.
$malformed_html = array(
'<amp-img width="123" [text]="..."</amp-img>',
'<amp-img width="123" [text] data-test="asd"></amp-img>',
'<amp-img width="123" [text]="..." *bad*></amp-img>',
);
foreach ( $malformed_html as $html ) {
$converted = AMP_DOM_Utils::convert_amp_bind_attributes( $html );
$this->assertNotContains( AMP_DOM_Utils::get_amp_bind_placeholder_attribute_prefix(), $converted );
}
}
}
4 changes: 4 additions & 0 deletions tests/test-tag-and-attribute-sanitizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,10 @@ public function get_data() {
'<font size="1">Headline</font><span style="color: blue">Span</span>',
'Headline<span>Span</span>',
),

'amp_bind_attr' => array(
'<p [text]="\'Hello \' + foo">Hello World</p><button on="tap:AMP.setState({foo: \'amp-bind\'})">Update</button>',
),
);
}

Expand Down

0 comments on commit d266233

Please sign in to comment.