Skip to content

Commit

Permalink
Merge pull request #44 from afitzke/utf8-issues
Browse files Browse the repository at this point in the history
validation/sanitization fails on URLs containing non-ascii-characters
  • Loading branch information
nilportugues authored Aug 29, 2016
2 parents eb8ae55 + 3aeb963 commit 0bd94c2
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 4 deletions.
31 changes: 27 additions & 4 deletions src/Item/ValidatorTrait.php
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,33 @@ public static function validateString($string)
*/
public static function validateLoc($value)
{
if (\filter_var($value, FILTER_VALIDATE_URL, ['options' => ['flags' => FILTER_FLAG_PATH_REQUIRED]])
&& \strlen($value) > 0
) {
return \htmlentities($value);
/**
* Pattern inspired by https://github.com/symfony/validator/blob/v3.1.3/Constraints/UrlValidator.php
* OriginalAuthor: Bernhard Schussek <[email protected]>
* http://www.phpliveregex.com/p/gUC
*/
$pattern = '~^
(http|https):// # protocol
(
([\pL\pN\pS-\.])+(\.?([\pL]|xn\-\-[\pL\pN-]+)+\.?) # a domain name
| # or
\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} # a IP address
| # or
\[
(?:(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){6})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:::(?:(?:(?:[0-9a-f]{1,4})):){5})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:[0-9a-f]{1,4})))?::(?:(?:(?:[0-9a-f]{1,4})):){4})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,1}(?:(?:[0-9a-f]{1,4})))?::(?:(?:(?:[0-9a-f]{1,4})):){3})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,2}(?:(?:[0-9a-f]{1,4})))?::(?:(?:(?:[0-9a-f]{1,4})):){2})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,3}(?:(?:[0-9a-f]{1,4})))?::(?:(?:[0-9a-f]{1,4})):)(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,4}(?:(?:[0-9a-f]{1,4})))?::)(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,5}(?:(?:[0-9a-f]{1,4})))?::)(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,6}(?:(?:[0-9a-f]{1,4})))?::))))
\] # a IPv6 address
)
(:[0-9]+)? # a port (optional)
([^#\?\&]*)([\?|\&][^#]*)?(\#\S*)? # a /, nothing, a / with something, a query or a fragment
$~ixu';

if (\strlen($value) < 1) {
return false;
}

if (preg_match($pattern, $value, $result)) {
$path = implode("/", array_map("rawurlencode", explode("/", @$result[7])));
return $result[1].'://'.$result[2].@$result[6].$path.\htmlspecialchars(@$result[8]).@$result[9];
}

return false;
Expand Down
55 changes: 55 additions & 0 deletions tests/Item/ValidatorTraitTest.php
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,50 @@ class ValidatorTraitTest extends \PHPUnit_Framework_TestCase
{
use ValidatorTrait;

protected $testLocs = [
[
'http://example.com/product/Sombrano-Ø-350-cmc',
'http://example.com/product/Sombrano-%C3%98-350-cmc'
],
[
'https://www.example.com/foo/bär/index.php?query=string#anchor',
'https://www.example.com/foo/b%C3%A4r/index.php?query=string#anchor',
],
[
'https://www.example.com/foo/bär/index.php#anchor',
'https://www.example.com/foo/b%C3%A4r/index.php#anchor',
],
[
'https://www.example.com/foo/bär/index.php',
'https://www.example.com/foo/b%C3%A4r/index.php'
],
[
'https://www.example.com',
'https://www.example.com'
],
[
'http://www.example.com/ümlaut?query=param&foo=bar#anchor',
'http://www.example.com/%C3%BCmlaut?query=param&amp;foo=bar#anchor',
],
[
'http://www.example.com:8080/ümlaut?query=param&foo=bar',
'http://www.example.com:8080/%C3%BCmlaut?query=param&amp;foo=bar',
],
[
'http://127.0.0.1:8080/ümlaut?query=param&foo=bar',
'http://127.0.0.1:8080/%C3%BCmlaut?query=param&amp;foo=bar',
],
[
'http://xn--exmple-cua.com:8080/ümlaut?query=param&foo=bar',
'http://xn--exmple-cua.com:8080/%C3%BCmlaut?query=param&amp;foo=bar',
],
[
'http://[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]:8080/ümlaut?query=param&foo=bar',
'http://[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]:8080/%C3%BCmlaut?query=param&amp;foo=bar',
]

];

public function __construct()
{
}
Expand All @@ -24,6 +68,17 @@ public function itShouldValidateLoc()
$this->assertEquals('http://google.com/news', $result);
}

/**
* @test
*/
public function itShouldValidateTestLocs()
{
foreach ($this->testLocs as $test) {
$result = $this->validateLoc($test[0]);
$this->assertEquals($test[1], $result);
}
}

/**
* @test
*/
Expand Down

0 comments on commit 0bd94c2

Please sign in to comment.