Skip to content

Commit

Permalink
Fixed django#18456 -- Added path escaping to HttpRequest.get_full_pat…
Browse files Browse the repository at this point in the history
…h().
  • Loading branch information
Unai Zalakain authored and timgraham committed Nov 3, 2014
1 parent d3db878 commit c548c8d
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 4 deletions.
6 changes: 4 additions & 2 deletions django/http/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
from django.http.multipartparser import MultiPartParser, MultiPartParserError
from django.utils import six
from django.utils.datastructures import MultiValueDict, ImmutableList
from django.utils.encoding import force_bytes, force_text, force_str, iri_to_uri
from django.utils.encoding import (
force_bytes, force_text, force_str, escape_uri_path, iri_to_uri,
)
from django.utils.six.moves.urllib.parse import parse_qsl, urlencode, quote, urljoin, urlsplit


Expand Down Expand Up @@ -98,7 +100,7 @@ def get_full_path(self):
# RFC 3986 requires query string arguments to be in the ASCII range.
# Rather than crash if this doesn't happen, we encode defensively.
return '%s%s' % (
self.path,
escape_uri_path(self.path),
('?' + iri_to_uri(self.META.get('QUERY_STRING', ''))) if self.META.get('QUERY_STRING', '') else ''
)

Expand Down
17 changes: 17 additions & 0 deletions django/utils/encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,23 @@ def uri_to_iri(uri):
return repercent_broken_unicode(iri).decode('utf-8')


def escape_uri_path(path):
"""
Escape the unsafe characters from the path portion of a Uniform Resource
Identifier (URI).
"""
# These are the "reserved" and "unreserved" characters specified in
# sections 2.2 and 2.3 of RFC 2396:
# reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
# unreserved = alphanum | mark
# mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
# The list of safe characters here is constructed substracting ";", "=",
# and "?" according to section 3.3 of RFC 2396.
# The reason for not subtracting and escaping "/" is that we are escaping
# the entire path, not a path segment.
return quote(force_bytes(path), safe=b"/:@&+$,-_.!~*'()")


def repercent_broken_unicode(path):
"""
As per section 3.2 of RFC 3987, step three of converting a URI into an IRI,
Expand Down
7 changes: 7 additions & 0 deletions docs/ref/utils.txt
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,13 @@ The functions defined in this module share the following properties:

Returns an ASCII string containing the encoded result.

.. function:: escape_uri_path(path)

.. versionadded:: 1.8

Escapes the unsafe characters from the path portion of a Uniform Resource
Identifier (URI).

``django.utils.feedgenerator``
==============================

Expand Down
4 changes: 4 additions & 0 deletions docs/releases/1.8.txt
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,10 @@ Requests and Responses
* ``WSGIRequestHandler`` now follows RFC in converting URI to IRI, using
``uri_to_iri()``.

* The :meth:`HttpRequest.get_full_path()
<django.http.HttpRequest.get_full_path>` method now escapes unsafe characters
from the path portion of a Uniform Resource Identifier (URI) properly.

Tests
^^^^^

Expand Down
13 changes: 13 additions & 0 deletions tests/requests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,19 @@ def test_httprequest(self):
# and FILES should be MultiValueDict
self.assertEqual(request.FILES.getlist('foo'), [])

def test_httprequest_full_path(self):
request = HttpRequest()
request.path = request.path_info = '/;some/?awful/=path/foo:bar/'
request.META['QUERY_STRING'] = ';some=query&+query=string'
expected = '/%3Bsome/%3Fawful/%3Dpath/foo:bar/?;some=query&+query=string'
self.assertEqual(request.get_full_path(), expected)

def test_httprequest_full_path_with_query_string_and_fragment(self):
request = HttpRequest()
request.path = request.path_info = '/foo#bar'
request.META['QUERY_STRING'] = 'baz#quux'
self.assertEqual(request.get_full_path(), '/foo%23bar?baz#quux')

def test_httprequest_repr(self):
request = HttpRequest()
request.path = '/somepath/'
Expand Down
14 changes: 12 additions & 2 deletions tests/utils_tests/test_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
import datetime

from django.utils import six
from django.utils.encoding import (filepath_to_uri, force_bytes, force_text,
iri_to_uri, uri_to_iri)
from django.utils.encoding import (
filepath_to_uri, force_bytes, force_text, escape_uri_path,
iri_to_uri, uri_to_iri,
)
from django.utils.http import urlquote_plus


Expand Down Expand Up @@ -40,6 +42,14 @@ def test_force_bytes_strings_only(self):
today = datetime.date.today()
self.assertEqual(force_bytes(today, strings_only=True), today)

def test_escape_uri_path(self):
self.assertEqual(
escape_uri_path('/;some/=awful/?path/:with/@lots/&of/+awful/chars'),
'/%3Bsome/%3Dawful/%3Fpath/:with/@lots/&of/+awful/chars'
)
self.assertEqual(escape_uri_path('/foo#bar'), '/foo%23bar')
self.assertEqual(escape_uri_path('/foo?bar'), '/foo%3Fbar')


class TestRFC3987IEncodingUtils(unittest.TestCase):

Expand Down

0 comments on commit c548c8d

Please sign in to comment.