From 2ff6ac1f8bba78e2d608960d076b3090dd26e76e Mon Sep 17 00:00:00 2001 From: Andrew Krasichkov Date: Fri, 31 Mar 2017 01:12:44 +0300 Subject: [PATCH] Initial commit --- .gitignore | 62 + AUTHORS | 4 + CONTRIBUTING.md | 8 + LICENSE | 355 ++++++ MANIFEST.in | 1 + README.md | 78 ++ gixy/__init__.py | 5 + gixy/cli/__init__.py | 0 gixy/cli/argparser.py | 160 +++ gixy/cli/main.py | 173 +++ gixy/core/__init__.py | 0 gixy/core/builtin_variables.py | 266 +++++ gixy/core/config.py | 30 + gixy/core/context.py | 91 ++ gixy/core/issue.py | 16 + gixy/core/manager.py | 59 + gixy/core/plugins_manager.py | 75 ++ gixy/core/regexp.py | 1021 +++++++++++++++++ gixy/core/severity.py | 9 + gixy/core/sre_parse/__init__.py | 0 gixy/core/sre_parse/sre_constants.py | 222 ++++ gixy/core/sre_parse/sre_parse.py | 829 +++++++++++++ gixy/core/utils.py | 2 + gixy/core/variable.py | 114 ++ gixy/directives/__init__.py | 26 + gixy/directives/block.py | 175 +++ gixy/directives/directive.py | 119 ++ gixy/formatters/__init__.py | 23 + gixy/formatters/base.py | 84 ++ gixy/formatters/console.py | 13 + gixy/formatters/json.py | 10 + gixy/formatters/templates/console.j2 | 36 + gixy/formatters/templates/text.j2 | 35 + gixy/formatters/text.py | 13 + gixy/parser/__init__.py | 0 gixy/parser/nginx_parser.py | 142 +++ gixy/parser/raw_parser.py | 164 +++ gixy/plugins/__init__.py | 0 gixy/plugins/_internal_rewrite.py | 87 ++ gixy/plugins/add_header_multiline.py | 46 + gixy/plugins/add_header_redefinition.py | 69 ++ gixy/plugins/force_https.py | 20 + gixy/plugins/host_spoofing.py | 23 + gixy/plugins/http_splitting.py | 43 + gixy/plugins/origins.py | 71 ++ gixy/plugins/plugin.py | 30 + gixy/plugins/ssrf.py | 62 + gixy/plugins/valid_referers.py | 18 + requirements.dev.pip | 4 + requirements.pip | 6 + setup.py | 40 + tests/__init__.py | 0 tests/core/__init__.py | 0 tests/core/test_context.py | 137 +++ tests/core/test_regexp.py | 401 +++++++ tests/core/test_variable.py | 99 ++ tests/directives/__init__.py | 0 tests/directives/test_block.py | 208 ++++ tests/directives/test_directive.py | 104 ++ tests/parser/__init__.py | 0 tests/parser/test_nginx_parser.py | 114 ++ tests/parser/test_raw_parser.py | 470 ++++++++ tests/plugins/__init__.py | 0 .../add_header_multiline/add_header.conf | 3 + .../add_header_multiline/add_header_fp.conf | 1 + .../simply/add_header_multiline/config.json | 3 + .../more_set_headers.conf | 3 + .../more_set_headers_fp.conf | 2 + .../more_set_headers_multiple.conf | 7 + .../more_set_headers_replace.conf | 2 + .../more_set_headers_replace_fp.conf | 1 + .../more_set_headers_status_fp.conf | 1 + .../more_set_headers_type_fp.conf | 2 + .../add_header_redefinition/config.json | 3 + .../add_header_redefinition/duplicate_fp.conf | 9 + .../add_header_redefinition/if_replaces.conf | 5 + .../location_replaces.conf | 5 + .../add_header_redefinition/non_block_fp.conf | 3 + .../not_secure_both_fp.conf | 5 + .../not_secure_outer_fp.conf | 5 + .../step_replaces.conf | 8 + tests/plugins/simply/force_https/config.json | 3 + tests/plugins/simply/force_https/return.conf | 1 + tests/plugins/simply/force_https/rewrite.conf | 1 + .../plugins/simply/force_https/simple_fp.conf | 2 + .../plugins/simply/host_spoofing/config.json | 3 + .../plugins/simply/host_spoofing/http_fp.conf | 1 + .../simply/host_spoofing/http_host.conf | 1 + .../host_spoofing/http_host_diff_case.conf | 1 + .../simply/host_spoofing/some_arg.conf | 1 + .../simply/http_splitting/add_header_uri.conf | 1 + .../plugins/simply/http_splitting/config.json | 3 + .../dont_report_not_resolved_var_fp.conf | 3 + .../proxy_from_location_var.conf | 3 + .../proxy_from_location_var_var.conf | 4 + .../proxy_from_location_var_var_fp.conf | 4 + .../proxy_from_location_var_var_var.conf | 4 + .../proxy_pass_ducument_uri.conf | 1 + .../proxy_set_header_ducument_uri.conf | 1 + .../simply/http_splitting/return_403_fp.conf | 1 + .../http_splitting/return_request_uri_fp.conf | 1 + .../http_splitting/rewrite_extract_fp.conf | 1 + .../simply/http_splitting/rewrite_uri.conf | 1 + .../http_splitting/rewrite_uri_after_var.conf | 1 + tests/plugins/simply/origins/config.json | 3 + tests/plugins/simply/origins/metrika.conf | 3 + tests/plugins/simply/origins/origin.conf | 3 + tests/plugins/simply/origins/origin_fp.conf | 3 + .../origins/origin_w_slash_anchored_fp.conf | 3 + .../simply/origins/origin_w_slash_fp.conf | 3 + .../simply/origins/origin_wo_slash.conf | 3 + tests/plugins/simply/origins/referer.conf | 3 + tests/plugins/simply/origins/referer_fp.conf | 3 + .../simply/origins/referer_subdomain.conf | 3 + .../simply/origins/referer_subdomain_fp.conf | 3 + tests/plugins/simply/origins/webvisor.conf | 3 + tests/plugins/simply/ssrf/config.json | 3 + .../plugins/simply/ssrf/have_internal_fp.conf | 4 + .../simply/ssrf/host_w_const_start.conf | 3 + .../simply/ssrf/host_w_const_start_arg.conf | 3 + .../plugins/simply/ssrf/not_host_var_fp.conf | 3 + tests/plugins/simply/ssrf/request_uri_fp.conf | 3 + .../simply/ssrf/request_uri_var_fp.conf | 4 + tests/plugins/simply/ssrf/scheme_var.conf | 3 + tests/plugins/simply/ssrf/single_var.conf | 3 + tests/plugins/simply/ssrf/used_arg.conf | 3 + tests/plugins/simply/ssrf/vars_from_loc.conf | 6 + .../simply/ssrf/with_const_scheme.conf | 10 + .../plugins/simply/valid_referers/config.json | 3 + .../simply/valid_referers/none_first.conf | 1 + .../simply/valid_referers/none_last.conf | 3 + .../simply/valid_referers/none_middle.conf | 2 + .../simply/valid_referers/wo_none_fp.conf | 1 + tests/plugins/test_simply.py | 98 ++ tests/utils.py | 69 ++ tox.ini | 19 + 136 files changed, 6863 insertions(+) create mode 100644 .gitignore create mode 100644 AUTHORS create mode 100644 CONTRIBUTING.md create mode 100644 LICENSE create mode 100644 MANIFEST.in create mode 100644 README.md create mode 100644 gixy/__init__.py create mode 100644 gixy/cli/__init__.py create mode 100644 gixy/cli/argparser.py create mode 100644 gixy/cli/main.py create mode 100644 gixy/core/__init__.py create mode 100644 gixy/core/builtin_variables.py create mode 100644 gixy/core/config.py create mode 100644 gixy/core/context.py create mode 100644 gixy/core/issue.py create mode 100644 gixy/core/manager.py create mode 100644 gixy/core/plugins_manager.py create mode 100644 gixy/core/regexp.py create mode 100644 gixy/core/severity.py create mode 100644 gixy/core/sre_parse/__init__.py create mode 100644 gixy/core/sre_parse/sre_constants.py create mode 100644 gixy/core/sre_parse/sre_parse.py create mode 100644 gixy/core/utils.py create mode 100644 gixy/core/variable.py create mode 100644 gixy/directives/__init__.py create mode 100644 gixy/directives/block.py create mode 100644 gixy/directives/directive.py create mode 100644 gixy/formatters/__init__.py create mode 100644 gixy/formatters/base.py create mode 100644 gixy/formatters/console.py create mode 100644 gixy/formatters/json.py create mode 100644 gixy/formatters/templates/console.j2 create mode 100644 gixy/formatters/templates/text.j2 create mode 100644 gixy/formatters/text.py create mode 100644 gixy/parser/__init__.py create mode 100644 gixy/parser/nginx_parser.py create mode 100644 gixy/parser/raw_parser.py create mode 100644 gixy/plugins/__init__.py create mode 100644 gixy/plugins/_internal_rewrite.py create mode 100644 gixy/plugins/add_header_multiline.py create mode 100644 gixy/plugins/add_header_redefinition.py create mode 100644 gixy/plugins/force_https.py create mode 100644 gixy/plugins/host_spoofing.py create mode 100644 gixy/plugins/http_splitting.py create mode 100644 gixy/plugins/origins.py create mode 100644 gixy/plugins/plugin.py create mode 100644 gixy/plugins/ssrf.py create mode 100644 gixy/plugins/valid_referers.py create mode 100644 requirements.dev.pip create mode 100644 requirements.pip create mode 100644 setup.py create mode 100644 tests/__init__.py create mode 100644 tests/core/__init__.py create mode 100644 tests/core/test_context.py create mode 100644 tests/core/test_regexp.py create mode 100644 tests/core/test_variable.py create mode 100644 tests/directives/__init__.py create mode 100644 tests/directives/test_block.py create mode 100644 tests/directives/test_directive.py create mode 100644 tests/parser/__init__.py create mode 100644 tests/parser/test_nginx_parser.py create mode 100644 tests/parser/test_raw_parser.py create mode 100644 tests/plugins/__init__.py create mode 100644 tests/plugins/simply/add_header_multiline/add_header.conf create mode 100644 tests/plugins/simply/add_header_multiline/add_header_fp.conf create mode 100644 tests/plugins/simply/add_header_multiline/config.json create mode 100644 tests/plugins/simply/add_header_multiline/more_set_headers.conf create mode 100644 tests/plugins/simply/add_header_multiline/more_set_headers_fp.conf create mode 100644 tests/plugins/simply/add_header_multiline/more_set_headers_multiple.conf create mode 100644 tests/plugins/simply/add_header_multiline/more_set_headers_replace.conf create mode 100644 tests/plugins/simply/add_header_multiline/more_set_headers_replace_fp.conf create mode 100644 tests/plugins/simply/add_header_multiline/more_set_headers_status_fp.conf create mode 100644 tests/plugins/simply/add_header_multiline/more_set_headers_type_fp.conf create mode 100644 tests/plugins/simply/add_header_redefinition/config.json create mode 100644 tests/plugins/simply/add_header_redefinition/duplicate_fp.conf create mode 100644 tests/plugins/simply/add_header_redefinition/if_replaces.conf create mode 100644 tests/plugins/simply/add_header_redefinition/location_replaces.conf create mode 100644 tests/plugins/simply/add_header_redefinition/non_block_fp.conf create mode 100644 tests/plugins/simply/add_header_redefinition/not_secure_both_fp.conf create mode 100644 tests/plugins/simply/add_header_redefinition/not_secure_outer_fp.conf create mode 100644 tests/plugins/simply/add_header_redefinition/step_replaces.conf create mode 100644 tests/plugins/simply/force_https/config.json create mode 100644 tests/plugins/simply/force_https/return.conf create mode 100644 tests/plugins/simply/force_https/rewrite.conf create mode 100644 tests/plugins/simply/force_https/simple_fp.conf create mode 100644 tests/plugins/simply/host_spoofing/config.json create mode 100644 tests/plugins/simply/host_spoofing/http_fp.conf create mode 100644 tests/plugins/simply/host_spoofing/http_host.conf create mode 100644 tests/plugins/simply/host_spoofing/http_host_diff_case.conf create mode 100644 tests/plugins/simply/host_spoofing/some_arg.conf create mode 100644 tests/plugins/simply/http_splitting/add_header_uri.conf create mode 100644 tests/plugins/simply/http_splitting/config.json create mode 100644 tests/plugins/simply/http_splitting/dont_report_not_resolved_var_fp.conf create mode 100644 tests/plugins/simply/http_splitting/proxy_from_location_var.conf create mode 100644 tests/plugins/simply/http_splitting/proxy_from_location_var_var.conf create mode 100644 tests/plugins/simply/http_splitting/proxy_from_location_var_var_fp.conf create mode 100644 tests/plugins/simply/http_splitting/proxy_from_location_var_var_var.conf create mode 100644 tests/plugins/simply/http_splitting/proxy_pass_ducument_uri.conf create mode 100644 tests/plugins/simply/http_splitting/proxy_set_header_ducument_uri.conf create mode 100644 tests/plugins/simply/http_splitting/return_403_fp.conf create mode 100644 tests/plugins/simply/http_splitting/return_request_uri_fp.conf create mode 100644 tests/plugins/simply/http_splitting/rewrite_extract_fp.conf create mode 100644 tests/plugins/simply/http_splitting/rewrite_uri.conf create mode 100644 tests/plugins/simply/http_splitting/rewrite_uri_after_var.conf create mode 100644 tests/plugins/simply/origins/config.json create mode 100644 tests/plugins/simply/origins/metrika.conf create mode 100644 tests/plugins/simply/origins/origin.conf create mode 100644 tests/plugins/simply/origins/origin_fp.conf create mode 100644 tests/plugins/simply/origins/origin_w_slash_anchored_fp.conf create mode 100644 tests/plugins/simply/origins/origin_w_slash_fp.conf create mode 100644 tests/plugins/simply/origins/origin_wo_slash.conf create mode 100644 tests/plugins/simply/origins/referer.conf create mode 100644 tests/plugins/simply/origins/referer_fp.conf create mode 100644 tests/plugins/simply/origins/referer_subdomain.conf create mode 100644 tests/plugins/simply/origins/referer_subdomain_fp.conf create mode 100644 tests/plugins/simply/origins/webvisor.conf create mode 100644 tests/plugins/simply/ssrf/config.json create mode 100644 tests/plugins/simply/ssrf/have_internal_fp.conf create mode 100644 tests/plugins/simply/ssrf/host_w_const_start.conf create mode 100644 tests/plugins/simply/ssrf/host_w_const_start_arg.conf create mode 100644 tests/plugins/simply/ssrf/not_host_var_fp.conf create mode 100644 tests/plugins/simply/ssrf/request_uri_fp.conf create mode 100644 tests/plugins/simply/ssrf/request_uri_var_fp.conf create mode 100644 tests/plugins/simply/ssrf/scheme_var.conf create mode 100644 tests/plugins/simply/ssrf/single_var.conf create mode 100644 tests/plugins/simply/ssrf/used_arg.conf create mode 100644 tests/plugins/simply/ssrf/vars_from_loc.conf create mode 100644 tests/plugins/simply/ssrf/with_const_scheme.conf create mode 100644 tests/plugins/simply/valid_referers/config.json create mode 100644 tests/plugins/simply/valid_referers/none_first.conf create mode 100644 tests/plugins/simply/valid_referers/none_last.conf create mode 100644 tests/plugins/simply/valid_referers/none_middle.conf create mode 100644 tests/plugins/simply/valid_referers/wo_none_fp.conf create mode 100644 tests/plugins/test_simply.py create mode 100644 tests/utils.py create mode 100644 tox.ini diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ca5cee2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,62 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover +cover + +# Translations +*.mo +*.pot + +# Django stuff: +*.log + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +venv/ +venv3/ +.idea/ \ No newline at end of file diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..2d4643a --- /dev/null +++ b/AUTHORS @@ -0,0 +1,4 @@ +The following authors have created the source code of "Gixy" +published and distributed by YANDEX LLC as the owner: + +Andrew Krasichkov \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..c841439 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,8 @@ +Contributions to Gixy are always welcome! You can help us in different ways: + * Open an issue with suggestions for improvements and errors you're facing; + * Fork this repository and submit a pull request; + * Improve the documentation. + +# Code guidelines: + * Python code style should follow [PEP8 standards][pep8] standards whenever possible; + * Pull requests with new plugins must contain unit tests for it. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d5036fd --- /dev/null +++ b/LICENSE @@ -0,0 +1,355 @@ +(C) YANDEX LLC, 2017 + +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..a498327 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +include gixy/formatters/templates/* \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..f2eaafa --- /dev/null +++ b/README.md @@ -0,0 +1,78 @@ +GIXY +==== + +# Overview +Gixy is a tool for Nginx configuration analyzing. The main goal of Gixy is to prevent misconfiguration and automate flaw detection. +Currently supported Python versions is 2.7 and 3.4+. +Disclaimer: Gixy is well tested only on GNU/Linux, in other OS may have some issues. + +# Installation +Gixy is distributed on PyPI. The best way to install it is with pip: +```bash +pip install bandit +``` + +Run Gixy and check results: +```bash +gixy +``` + +# Usage +By default Gixy will try to analyze Nginx configuration placed in `/etc/nginx/nginx.conf`. But you can always specify needed path: +```bash +$ gixy /etc/nginx/nginx.conf + +==================== Results =================== + +Problem: [http_splitting] Possible HTTP-Splitting vulnerability. +Description: Using variables that can contain "\n" may lead to http injection. +Additional info: https://github.com/yandex/gixy/wiki/ru/httpsplitting +Reason: At least variable "$action" can contain "\n" +Pseudo config: +include /etc/nginx/sites/default.conf; + + server { + + location ~ /v1/((?[^.]*)\.json)?$ { + add_header X-Action $action; + } + } + + +==================== Summary =================== +Total issues: + Unspecified: 0 + Low: 0 + Medium: 0 + High: 1 +``` + +Or skip some tests: +``` +$ ./gixy-cli.py --skips http_splitting /etc/nginx/nginx.conf + +==================== Results =================== +No issues found. + +==================== Summary =================== +Total issues: + Unspecified: 0 + Low: 0 + Medium: 0 + High: 0 +``` + +You can achieve all other `gixy` arguments with the help command: `gixy --help` + +# Documentation +Full documentation and recommendations can be found [here](https://github.com/yandex/gixy/wiki/ru/) (sorry, but Russian language only so far) + +# Contributing +Contributions to Gixy are always welcome! You can help us in different ways: + * Open an issue with suggestions for improvements and errors you're facing; + * Fork this repository and submit a pull request; + * Improve the documentation. + +Code guidelines: + * Python code style should follow [PEP8 standards][pep8] standards whenever possible; + * Pull requests with new plugins must contain unit tests for it. diff --git a/gixy/__init__.py b/gixy/__init__.py new file mode 100644 index 0000000..153ef7d --- /dev/null +++ b/gixy/__init__.py @@ -0,0 +1,5 @@ +# flake8: noqa + +from gixy.core import severity + +version = '0.0.14' diff --git a/gixy/cli/__init__.py b/gixy/cli/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/gixy/cli/argparser.py b/gixy/cli/argparser.py new file mode 100644 index 0000000..9107bc4 --- /dev/null +++ b/gixy/cli/argparser.py @@ -0,0 +1,160 @@ +# flake8: noqa + +from configargparse import * +from six.moves import StringIO + +from gixy.core.plugins_manager import PluginsManager + + +# used while parsing args to keep track of where they came from +_COMMAND_LINE_SOURCE_KEY = 'command_line' +_ENV_VAR_SOURCE_KEY = 'environment_variables' +_CONFIG_FILE_SOURCE_KEY = 'config_file' +_DEFAULTS_SOURCE_KEY = 'defaults' + + +class GixyConfigParser(DefaultConfigFileParser): + def get_syntax_description(self): + return '' + + def parse(self, stream): + """Parses the keys + values from a config file.""" + + items = OrderedDict() + prefix = '' + for i, line in enumerate(stream): + line = line.strip() + if not line or line[0] in ['#', ';'] or line.startswith('---'): + continue + if line[0] == '[': + prefix = '%s-' % line[1:-1].replace('_', '-') + continue + + white_space = '\\s*' + key = '(?P[^:=;#\s]+?)' + value = white_space+'[:=\s]'+white_space+'(?P.+?)' + comment = white_space+'(?P\\s[;#].*)?' + + key_only_match = re.match('^' + key + comment + '$', line) + if key_only_match: + key = key_only_match.group('key') + items[key] = 'true' + continue + + key_value_match = re.match('^'+key+value+comment+'$', line) + if key_value_match: + key = key_value_match.group('key') + value = key_value_match.group('value') + + if value.startswith('[') and value.endswith(']'): + # handle special case of lists + value = [elem.strip() for elem in value[1:-1].split(',')] + + items[prefix + key] = value + continue + + raise ConfigFileParserException('Unexpected line %s in %s: %s' % (i, + getattr(stream, 'name', 'stream'), line)) + return items + + def serialize(self, items): + """Does the inverse of config parsing by taking parsed values and + converting them back to a string representing config file contents. + """ + r = StringIO() + for key, value in items.items(): + if type(value) == OrderedDict: + r.write('\n[%s]\n' % key) + r.write(self.serialize(value)) + else: + value, help = value + if help: + r.write('; %s\n' % help) + r.write('%s = %s\n' % (key, value)) + return r.getvalue() + + +class GixyHelpFormatter(HelpFormatter): + def format_help(self): + manager = PluginsManager() + help_message = super(GixyHelpFormatter, self).format_help() + if 'plugins options:' in help_message: + # Print available blugins _only_ if we prints options for it + plugins = '\n'.join('\t' + plugin.__name__ for plugin in manager.plugins_classes) + help_message = '{orig}\n\navailable plugins:\n{plugins}\n'.format(orig=help_message, plugins=plugins) + return help_message + + +class ArgsParser(ArgumentParser): + def get_possible_config_keys(self, action): + """This method decides which actions can be set in a config file and + what their keys will be. It returns a list of 0 or more config keys that + can be used to set the given action's value in a config file. + """ + keys = [] + for arg in action.option_strings: + if arg in {'--config', '--write-config', '--version'}: + continue + if any([arg.startswith(2*c) for c in self.prefix_chars]): + keys += [arg[2:], arg] # eg. for '--bla' return ['bla', '--bla'] + + return keys + + def get_items_for_config_file_output(self, source_to_settings, + parsed_namespace): + """Converts the given settings back to a dictionary that can be passed + to ConfigFormatParser.serialize(..). + + Args: + source_to_settings: the dictionary described in parse_known_args() + parsed_namespace: namespace object created within parse_known_args() + Returns: + an OrderedDict where keys are strings and values are either strings + or lists + """ + config_file_items = OrderedDict() + for source, settings in source_to_settings.items(): + if source == _COMMAND_LINE_SOURCE_KEY: + _, existing_command_line_args = settings[''] + for action in self._actions: + config_file_keys = self.get_possible_config_keys(action) + if config_file_keys and not action.is_positional_arg and \ + already_on_command_line(existing_command_line_args, + action.option_strings): + value = getattr(parsed_namespace, action.dest, None) + if value is not None: + if type(value) is bool: + value = str(value).lower() + if ':' in action.dest: + section, key = action.dest.split(':', 2) + key = key.replace('_', '-') + if section not in config_file_items: + config_file_items[section] = OrderedDict() + config_file_items[section][key] = (value, action.help) + else: + config_file_items[config_file_keys[0]] = (value, action.help) + elif source.startswith(_CONFIG_FILE_SOURCE_KEY): + for key, (action, value) in settings.items(): + if ':' in action.dest: + section, key = action.dest.split(':', 2) + key = key.replace('_', '-') + if section not in config_file_items: + config_file_items[section] = OrderedDict() + config_file_items[section][key] = (value, action.help) + else: + config_file_items[key] = (value, action.help) + return config_file_items + + +def create_parser(): + return ArgsParser( + description='Gixy - a Nginx configuration [sec]analyzer\n\n', + formatter_class=GixyHelpFormatter, + config_file_parser_class=GixyConfigParser, + auto_env_var_prefix='GIXY_', + add_env_var_help=False, + default_config_files=['/etc/gixy/gixy.cfg', '~/.config/gixy/gixy.conf'], + args_for_setting_config_path=['-c', '--config'], + args_for_writing_out_config_file=['--write-config'], + add_config_file_help=False + ) \ No newline at end of file diff --git a/gixy/cli/main.py b/gixy/cli/main.py new file mode 100644 index 0000000..09b5fd2 --- /dev/null +++ b/gixy/cli/main.py @@ -0,0 +1,173 @@ +import os +import sys +import logging +import copy + +import gixy +from gixy.core.manager import Manager as Gixy +from gixy.formatters import get_all as formatters +from gixy.core.plugins_manager import PluginsManager +from gixy.core.config import Config +from gixy.cli.argparser import create_parser + +LOG = logging.getLogger() + + +def _init_logger(debug=False): + LOG.handlers = [] + log_level = logging.DEBUG if debug else logging.INFO + logging.captureWarnings(True) + + LOG.setLevel(log_level) + handler = logging.StreamHandler(sys.stderr) + handler.setFormatter(logging.Formatter('[%(module)s]\t%(levelname)s\t%(message)s')) + LOG.addHandler(handler) + LOG.debug("logging initialized") + + +def _create_plugin_help(option): + if isinstance(option, (tuple, list, set)): + default = ','.join(list(option)) + else: + default = str(option) + + return 'Default: {}'.format(default) + + +def _get_cli_parser(): + parser = create_parser() + parser.add_argument('nginx_file', nargs='?', type=str, default='/etc/nginx/nginx.conf', metavar='nginx.conf', + help='Path to nginx.conf, e.g. /etc/nginx/nginx.conf') + + parser.add_argument( + '-v', '--version', action='version', + version='Gixy v{}'.format(gixy.version)) + + parser.add_argument( + '-l', '--level', dest='level', action='count', default=0, + help='Report issues of a given severity level or higher (-l for LOW, -ll for MEDIUM, -lll for HIGH)') + + default_formatter = 'console' if sys.stdout.isatty() else 'text' + available_formatters = formatters().keys() + parser.add_argument( + '-f', '--format', dest='output_format', choices=available_formatters, default=default_formatter, + type=str, help='Specify output format') + + parser.add_argument( + '-o', '--output', dest='output_file', type=str, + help='Write report to file') + + parser.add_argument( + '-d', '--debug', dest='debug', action='store_true', default=False, + help='Turn on debug mode') + + parser.add_argument( + '--tests', dest='tests', type=str, + help='Comma-separated list of tests to run') + + parser.add_argument( + '--skips', dest='skips', type=str, + help='Comma-separated list of tests to skip') + + parser.add_argument( + '--disable-includes', dest='disable_includes', action='store_true', default=False, + help='Disable "include" directive processing') + + group = parser.add_argument_group('plugins options') + for plugin_cls in PluginsManager().plugins_classes: + name = plugin_cls.__name__ + if not plugin_cls.options: + continue + + options = copy.deepcopy(plugin_cls.options) + for opt_key, opt_val in options.items(): + option_name = '--{plugin}-{key}'.format(plugin=name, key=opt_key).replace('_', '-') + dst_name = '{plugin}:{key}'.format(plugin=name, key=opt_key) + opt_type = str if isinstance(opt_val, (tuple, list, set)) else type(opt_val) + group.add_argument( + option_name, metavar=opt_key, dest=dst_name, type=opt_type, + help=_create_plugin_help(opt_val) + ) + + return parser + + +def _is_nginx_file(file_path): + s = open(file_path).read() + return 'server {' in s or 'http {' in s + + +def main(): + parser = _get_cli_parser() + args = parser.parse_args() + _init_logger(args.debug) + + path = os.path.expanduser(args.nginx_file) + if not os.path.isfile(path): + sys.stderr.write('Please specify path to Nginx configuration.\n\n') + parser.print_help() + sys.exit(1) + + if not _is_nginx_file(path): + sys.stderr.write('This is nginx config? Rly?\n') + sys.exit(1) + + try: + severity = gixy.severity.ALL[args.level] + except IndexError: + sys.stderr.write('Too high level filtering. Maximum level: -{}\n'.format('l' * (len(gixy.severity.ALL) - 1))) + sys.exit(1) + + if args.tests: + tests = [x.strip() for x in args.tests.split(',')] + else: + tests = None + + if args.skips: + skips = [x.strip() for x in args.skips.split(',')] + else: + skips = None + + config = Config( + severity=severity, + output_format=args.output_format, + output_file=args.output_file, + plugins=tests, + skips=skips, + allow_includes=not args.disable_includes + ) + + for plugin_cls in PluginsManager().plugins_classes: + name = plugin_cls.__name__ + options = copy.deepcopy(plugin_cls.options) + for opt_key, opt_val in options.items(): + option_name = '{}:{}'.format(name, opt_key) + if option_name not in args: + continue + + val = getattr(args, option_name) + if val is None: + continue + + if isinstance(opt_val, tuple): + val = tuple([x.strip() for x in val.split(',')]) + elif isinstance(opt_val, set): + val = set([x.strip() for x in val.split(',')]) + elif isinstance(opt_val, list): + val = [x.strip() for x in val.split(',')] + options[opt_key] = val + config.set_for(name, options) + + with Gixy(config=config) as yoda: + yoda.audit(path) + formatted = formatters()[config.output_format]().format(yoda) + if args.output_file: + with open(config.output_file, 'w') as f: + f.write(formatted) + else: + print(formatted) + + if sum(yoda.stats.values()) > 0: + # If something found - exit code must be 1, otherwise 0 + sys.exit(1) + sys.exit(0) diff --git a/gixy/core/__init__.py b/gixy/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/gixy/core/builtin_variables.py b/gixy/core/builtin_variables.py new file mode 100644 index 0000000..1e98b54 --- /dev/null +++ b/gixy/core/builtin_variables.py @@ -0,0 +1,266 @@ +from gixy.core.regexp import Regexp +from gixy.core.variable import Variable + + +BUILTIN_VARIABLES = { + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_uri + 'uri': '/[^\x20\t]*', + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_document_uri + 'document_uri': '/[^\x20\t]*', + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_arg_ + 'arg_': '[^\s&]+', + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_args + 'args': '[^\s]+', + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_query_string + 'query_string': '[^\s]+', + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_request_uri + 'request_uri': '/[^\s]*', + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_http_ + 'http_': '[\x21-\x7e]', + + # http://nginx.org/en/docs/http/ngx_http_upstream_module.html#var_upstream_http_ + 'upstream_http_': '', + # http://nginx.org/en/docs/http/ngx_http_upstream_module.html#var_upstream_cookie_ + 'upstream_cookie_': '', + # http://nginx.org/en/docs/http/ngx_http_proxy_module.html#var_proxy_add_x_forwarded_for + 'proxy_add_x_forwarded_for': '', + # http://nginx.org/en/docs/http/ngx_http_proxy_module.html#var_proxy_host + 'proxy_host': '', + # http://nginx.org/en/docs/http/ngx_http_proxy_module.html#var_proxy_port + 'proxy_port': '', + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_proxy_protocol_addr + # http://nginx.org/en/docs/stream/ngx_stream_core_module.html#var_proxy_protocol_addr + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_proxy_protocol_port + # http://nginx.org/en/docs/stream/ngx_stream_core_module.html#var_proxy_protocol_port + 'proxy_protocol_port': '', + # http://nginx.org/en/docs/http/ngx_http_fastcgi_module.html#var_fastcgi_path_info + 'fastcgi_path_info': '', + # http://nginx.org/en/docs/http/ngx_http_fastcgi_module.html#var_fastcgi_script_name + 'fastcgi_script_name': '', + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_content_type + 'content_type': '', + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_cookie_ + 'cookie_': '', + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_host + 'host': '', + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_hostname + # http://nginx.org/en/docs/stream/ngx_stream_core_module.html#var_hostname + 'hostname': '', + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_limit_rate + 'limit_rate': '', + # http://nginx.org/en/docs/http/ngx_http_memcached_module.html#var_memcached_key + 'memcached_key': '', + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_realpath_root + 'realpath_root': '', + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_remote_user + 'remote_user': '', + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_request + 'request': '', + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_request_body + 'request_body': '', + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_request_completion + 'request_completion': '', + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_request_filename + 'request_filename': '', + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_request_id + 'request_id': '', + # http://nginx.org/en/docs/http/ngx_http_slice_module.html#var_slice_range + 'slice_range': '', + # http://nginx.org/en/docs/http/ngx_http_secure_link_module.html#var_secure_link + 'secure_link': '', + # http://nginx.org/en/docs/http/ngx_http_secure_link_module.html#var_secure_link_expires + 'secure_link_expires': '', + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_sent_http_ + 'sent_http_': '', + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_server_name + 'server_name': '', + + # "Secure" variables that can't content or strictly limited user input + + # http://nginx.org/en/docs/http/ngx_http_browser_module.html#var_ancient_browser + 'ancient_browser': None, + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_binary_remote_addr + # http://nginx.org/en/docs/stream/ngx_stream_core_module.html#var_binary_remote_addr + 'binary_remote_addr': None, + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_body_bytes_sent + 'body_bytes_sent': None, + # http://nginx.org/en/docs/stream/ngx_stream_core_module.html#var_bytes_received + 'bytes_received': None, + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_bytes_sent + # http://nginx.org/en/docs/http/ngx_http_log_module.html#var_bytes_sent + # http://nginx.org/en/docs/stream/ngx_stream_core_module.html#var_bytes_sent + 'bytes_sent': None, + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_connection + # http://nginx.org/en/docs/http/ngx_http_log_module.html#var_connection + # http://nginx.org/en/docs/stream/ngx_stream_core_module.html#var_connection + 'connection': None, + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_connection_requests + # http://nginx.org/en/docs/http/ngx_http_log_module.html#var_connection_requests + 'connection_requests': None, + # http://nginx.org/en/docs/http/ngx_http_stub_status_module.html#var_connections_active + 'connections_active': None, + # http://nginx.org/en/docs/http/ngx_http_stub_status_module.html#var_connections_reading + 'connections_reading': None, + # http://nginx.org/en/docs/http/ngx_http_stub_status_module.html#var_connections_waiting + 'connections_waiting': None, + # http://nginx.org/en/docs/http/ngx_http_stub_status_module.html#var_connections_writing + 'connections_writing': None, + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_content_length + 'content_length': None, + # http://nginx.org/en/docs/http/ngx_http_ssi_module.html#var_date_gmt + 'date_gmt': None, + # http://nginx.org/en/docs/http/ngx_http_ssi_module.html#var_date_local + 'date_local': None, + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_document_root + 'document_root': '/etc/nginx', + # http://nginx.org/en/docs/http/ngx_http_geoip_module.html + # http://nginx.org/en/docs/stream/ngx_stream_geoip_module.html + 'geoip_': None, + # http://nginx.org/en/docs/http/ngx_http_gzip_module.html#var_gzip_ratio + 'gzip_ratio': None, + # http://nginx.org/en/docs/http/ngx_http_v2_module.html#var_http2 + 'http2': None, + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_https + 'https': None, + # http://nginx.org/en/docs/http/ngx_http_referer_module.html#var_invalid_referer + 'invalid_referer': None, + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_is_args + 'is_args': None, + # http://nginx.org/en/docs/http/ngx_http_auth_jwt_module.html + 'jwt_': None, + # http://nginx.org/en/docs/http/ngx_http_browser_module.html#var_modern_browser + 'modern_browser': None, + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_msec + # http://nginx.org/en/docs/http/ngx_http_log_module.html#var_msec + # http://nginx.org/en/docs/stream/ngx_stream_core_module.html#var_msec + 'msec': None, + # http://nginx.org/en/docs/http/ngx_http_browser_module.html#var_msie + 'msie': None, + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_nginx_version + # http://nginx.org/en/docs/stream/ngx_stream_core_module.html#var_nginx_version + 'nginx_version': None, + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_pid + # http://nginx.org/en/docs/stream/ngx_stream_core_module.html#var_pid + 'pid': None, + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_pipe + # http://nginx.org/en/docs/http/ngx_http_log_module.html#var_pipe + 'pipe': None, + # http://nginx.org/en/docs/stream/ngx_stream_core_module.html#var_protocol + 'protocol': None, + # http://nginx.org/en/docs/http/ngx_http_realip_module.html#var_realip_remote_addr + # http://nginx.org/en/docs/stream/ngx_stream_realip_module.html#var_realip_remote_addr + # http://nginx.org/en/docs/http/ngx_http_realip_module.html#var_realip_remote_port + # http://nginx.org/en/docs/stream/ngx_stream_realip_module.html#var_realip_remote_port + 'realip_remote_port': None, + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_remote_addr + # http://nginx.org/en/docs/stream/ngx_stream_core_module.html#var_remote_addr + 'remote_addr': None, + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_remote_port + # http://nginx.org/en/docs/stream/ngx_stream_core_module.html#var_remote_port + 'remote_port': None, + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_request_body_file + 'request_body_file': None, + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_request_length + # http://nginx.org/en/docs/http/ngx_http_log_module.html#var_request_length + 'request_length': None, + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_request_method + 'request_method': None, + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_request_time + # http://nginx.org/en/docs/http/ngx_http_log_module.html#var_request_time + 'request_time': None, + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_scheme + 'scheme': None, + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_server_addr + # http://nginx.org/en/docs/stream/ngx_stream_core_module.html#var_server_addr + 'server_addr': None, + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_server_port + # http://nginx.org/en/docs/stream/ngx_stream_core_module.html#var_server_port + 'server_port': None, + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_server_protocol + 'server_protocol': None, + # http://nginx.org/en/docs/http/ngx_http_session_log_module.html#var_session_log_binary_id + 'session_log_binary_id': None, + # http://nginx.org/en/docs/http/ngx_http_session_log_module.html#var_session_log_id + 'session_log_id': None, + # http://nginx.org/en/docs/stream/ngx_stream_core_module.html#var_session_time + 'session_time': None, + # http://nginx.org/en/docs/http/ngx_http_spdy_module.html#var_spdy + 'spdy': None, + # http://nginx.org/en/docs/http/ngx_http_spdy_module.html#var_spdy_request_priority + 'spdy_request_priority': None, + # http://nginx.org/en/docs/http/ngx_http_ssl_module.html + # http://nginx.org/en/docs/stream/ngx_stream_ssl_module.html + 'ssl_': None, + # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_status + # http://nginx.org/en/docs/http/ngx_http_log_module.html#var_status + # http://nginx.org/en/docs/stream/ngx_stream_core_module.html#var_status + 'status': None, + # http://nginx.org/en/docs/http/ngx_http_core_module.html + 'tcpinfo_': None, + # http://nginx.org/en/docs/http/ngx_http_core_module.html + # http://nginx.org/en/docs/http/ngx_http_log_module.html + # http://nginx.org/en/docs/stream/ngx_stream_core_module.html + 'time_iso8601': None, + # http://nginx.org/en/docs/http/ngx_http_core_module.html + # http://nginx.org/en/docs/http/ngx_http_log_module.html + # http://nginx.org/en/docs/stream/ngx_stream_core_module.html + 'time_local': None, + # http://nginx.org/en/docs/http/ngx_http_userid_module.html#var_uid_got + 'uid_got': None, + # http://nginx.org/en/docs/http/ngx_http_userid_module.html#var_uid_reset + 'uid_reset': None, + # http://nginx.org/en/docs/http/ngx_http_userid_module.html#var_uid_set + 'uid_set': None, + # http://nginx.org/en/docs/http/ngx_http_upstream_module.html#var_upstream_addr + # http://nginx.org/en/docs/stream/ngx_stream_upstream_module.html#var_upstream_addr + 'upstream_addr': None, + # http://nginx.org/en/docs/http/ngx_http_upstream_module.html#var_upstream_bytes_received + # http://nginx.org/en/docs/stream/ngx_stream_upstream_module.html#var_upstream_bytes_received + 'upstream_bytes_received': None, + # http://nginx.org/en/docs/stream/ngx_stream_upstream_module.html#var_upstream_bytes_sent + 'upstream_bytes_sent': None, + # http://nginx.org/en/docs/http/ngx_http_upstream_module.html#var_upstream_cache_status + 'upstream_cache_status': None, + # http://nginx.org/en/docs/http/ngx_http_upstream_module.html#var_upstream_connect_time + # http://nginx.org/en/docs/stream/ngx_stream_upstream_module.html#var_upstream_connect_time + 'upstream_connect_time': None, + # http://nginx.org/en/docs/stream/ngx_stream_upstream_module.html#var_upstream_first_byte_time + 'upstream_first_byte_time': None, + # http://nginx.org/en/docs/http/ngx_http_upstream_module.html#var_upstream_header_time + 'upstream_header_time': None, + # http://nginx.org/en/docs/http/ngx_http_upstream_module.html#var_upstream_response_length + 'upstream_response_length': None, + # http://nginx.org/en/docs/http/ngx_http_upstream_module.html#var_upstream_response_time + 'upstream_response_time': None, + # http://nginx.org/en/docs/stream/ngx_stream_upstream_module.html#var_upstream_session_time + 'upstream_session_time': None, + # http://nginx.org/en/docs/http/ngx_http_upstream_module.html#var_upstream_status + 'upstream_status': None +} + + +def is_builtin(name): + if isinstance(name, int): + # Indexed variables can't be builtin + return False + for builtin in BUILTIN_VARIABLES: + if builtin.endswith('_'): + if name.startswith(builtin): + return True + elif name == builtin: + return True + return False + + +def builtin_var(name): + for builtin, regexp in BUILTIN_VARIABLES.items(): + if builtin.endswith('_'): + if not name.startswith(builtin): + continue + elif name != builtin: + continue + + if regexp: + return Variable(name=name, value=Regexp(regexp, strict=True, case_sensitive=False)) + return Variable(name=name, value='builtin', have_script=False) + return None diff --git a/gixy/core/config.py b/gixy/core/config.py new file mode 100644 index 0000000..8237ef4 --- /dev/null +++ b/gixy/core/config.py @@ -0,0 +1,30 @@ +import gixy + + +class Config(object): + def __init__(self, + plugins=None, + skips=None, + severity=gixy.severity.UNSPECIFIED, + output_format=None, + output_file=None, + allow_includes=True): + + self.severity = severity + self.output_format = output_format + self.output_file = output_file + self.plugins = plugins + self.skips = skips + self.allow_includes = allow_includes + self.plugins_options = {} + + def set_for(self, name, options): + self.plugins_options[name] = options + + def get_for(self, name): + if self.has_for(name): + return self.plugins_options[name] + return {} + + def has_for(self, name): + return name in self.plugins_options diff --git a/gixy/core/context.py b/gixy/core/context.py new file mode 100644 index 0000000..0a938e8 --- /dev/null +++ b/gixy/core/context.py @@ -0,0 +1,91 @@ +import logging +import copy + +from gixy.core.utils import is_indexed_name + + +LOG = logging.getLogger(__name__) + +CONTEXTS = [] + + +def get_context(): + return CONTEXTS[-1] + + +def purge_context(): + del CONTEXTS[:] + + +def push_context(block): + if len(CONTEXTS): + context = copy.deepcopy(get_context()) + else: + context = Context() + context.set_block(block) + CONTEXTS.append(context) + return context + + +def pop_context(): + return CONTEXTS.pop() + + +class Context(object): + def __init__(self): + self.block = None + self.variables = { + 'index': {}, + 'name': {} + } + + def set_block(self, directive): + self.block = directive + return self + + def clear_index_vars(self): + self.variables['index'] = {} + return self + + def add_var(self, name, var): + if is_indexed_name(name): + var_type = 'index' + name = int(name) + else: + var_type = 'name' + + self.variables[var_type][name] = var + return self + + def get_var(self, name): + if is_indexed_name(name): + var_type = 'index' + name = int(name) + else: + var_type = 'name' + + result = None + try: + result = self.variables[var_type][name] + except KeyError: + if var_type == 'name': + # Only named variables can be builtins + import gixy.core.builtin_variables as builtins + + if builtins.is_builtin(name): + result = builtins.builtin_var(name) + + if not result: + LOG.info("Can't find variable '{}'".format(name)) + return result + + def __deepcopy__(self, memo): + cls = self.__class__ + result = cls.__new__(cls) + memo[id(self)] = result + result.block = copy.copy(self.block) + result.variables = { + 'index': copy.copy(self.variables['index']), + 'name': copy.copy(self.variables['name']) + } + return result diff --git a/gixy/core/issue.py b/gixy/core/issue.py new file mode 100644 index 0000000..fe86cfe --- /dev/null +++ b/gixy/core/issue.py @@ -0,0 +1,16 @@ +class Issue(object): + + def __init__(self, plugin, summary=None, description=None, + severity=None, reason=None, help_url=None, directives=None): + self.plugin = plugin + self.summary = summary + self.description = description + self.severity = severity + self.reason = reason + self.help_url = help_url + if not directives: + self.directives = [] + elif not hasattr(directives, '__iter__'): + self.directives = [directives] + else: + self.directives = directives diff --git a/gixy/core/manager.py b/gixy/core/manager.py new file mode 100644 index 0000000..2017469 --- /dev/null +++ b/gixy/core/manager.py @@ -0,0 +1,59 @@ +import gixy +from gixy.core.plugins_manager import PluginsManager +from gixy.core.context import get_context, pop_context, push_context, purge_context +from gixy.parser.nginx_parser import NginxParser +from gixy.core.config import Config + + +class Manager(object): + def __init__(self, config=None): + self.root = None + self.parser = None + self.auditor = None + self.config = config or Config() + self.stats = {gixy.severity.UNSPECIFIED: 0, + gixy.severity.LOW: 0, + gixy.severity.MEDIUM: 0, + gixy.severity.HIGH: 0} + + def audit(self, file_path): + self.auditor = PluginsManager(config=self.config) + self.parser = NginxParser(file_path, allow_includes=self.config.allow_includes) + self.root = self.parser.parse(file_path) + push_context(self.root) + self._audit_recursive(self.root.children) + + def get_results(self): + for plugin in self.auditor.plugins: + if plugin.issues: + self.stats[plugin.severity] += len(plugin.issues) + yield plugin + + def _audit_recursive(self, tree): + for directive in tree: + self._update_variables(directive) + self.auditor.audit(directive) + if directive.is_block: + if directive.self_context: + push_context(directive) + self._audit_recursive(directive.children) + if directive.self_context: + pop_context() + + def _update_variables(self, directive): + # TODO(buglloc): finish him! + if not directive.provide_variables: + return + + context = get_context() + for var in directive.variables: + if var.name == 0: + # All regexps must clean indexed variables + context.clear_index_vars() + context.add_var(var.name, var) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + purge_context() diff --git a/gixy/core/plugins_manager.py b/gixy/core/plugins_manager.py new file mode 100644 index 0000000..98c0eae --- /dev/null +++ b/gixy/core/plugins_manager.py @@ -0,0 +1,75 @@ +import os + +import gixy +from gixy.plugins.plugin import Plugin + + +class PluginsManager(object): + + def __init__(self, config=None): + self.imported = False + self.config = config + self._plugins = [] + + def import_plugins(self): + if self.imported: + return + + files_list = os.listdir(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'plugins')) + for plugin_file in files_list: + if not plugin_file.endswith('.py') or plugin_file.startswith('_'): + continue + __import__('gixy.plugins.'+os.path.splitext(plugin_file)[0], None, None, ['']) + + self.imported = True + + def init_plugins(self): + self.import_plugins() + + exclude = self.config.skips if self.config else None + include = self.config.plugins if self.config else None + severity = self.config.severity if self.config else None + for plugin_cls in Plugin.__subclasses__(): + name = plugin_cls.__name__ + if include and name not in include: + # Skip not needed plugins + continue + if exclude and name in exclude: + # Skipped plugins + continue + if severity and not gixy.severity.is_acceptable(plugin_cls.severity, severity): + # Skip plugin by severity level + continue + if self.config and self.config.has_for(name): + options = self.config.get_for(name) + else: + options = plugin_cls.options + self._plugins.append(plugin_cls(options)) + + @property + def plugins(self): + if not self._plugins: + self.init_plugins() + return self._plugins + + @property + def plugins_classes(self): + self.import_plugins() + return Plugin.__subclasses__() + + def get_plugins_descriptions(self): + return map(lambda a: a.name, self.plugins) + + def audit(self, directive): + for plugin in self.plugins: + if plugin.directives and directive.name not in plugin.directives: + continue + plugin.audit(directive) + + def issues(self): + result = [] + for plugin in self.plugins: + if not plugin.issues: + continue + result.extend(plugin.issues) + return result diff --git a/gixy/core/regexp.py b/gixy/core/regexp.py new file mode 100644 index 0000000..30b66d9 --- /dev/null +++ b/gixy/core/regexp.py @@ -0,0 +1,1021 @@ +import six +import logging +import re +import random +import itertools +from cached_property import cached_property + +import gixy.core.sre_parse.sre_parse as sre_parse + +LOG = logging.getLogger(__name__) + + +def _build_reverse_list(original): + result = [] + for c in range(1, 126): + c = six.unichr(c) + if c not in original: + result.append(c) + return frozenset(result) + + +FIX_NAMED_GROUPS_RE = re.compile(r"(?|')") + +CATEGORIES = { + # TODO(buglloc): unicode? + sre_parse.CATEGORY_SPACE: sre_parse.WHITESPACE, + sre_parse.CATEGORY_NOT_SPACE: _build_reverse_list(sre_parse.WHITESPACE), + sre_parse.CATEGORY_DIGIT: sre_parse.DIGITS, + sre_parse.CATEGORY_NOT_DIGIT: _build_reverse_list(sre_parse.DIGITS), + sre_parse.CATEGORY_WORD: frozenset('abcdefghijklmnopqrstuvwxyz' + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + '0123456789_'), + sre_parse.CATEGORY_NOT_WORD: _build_reverse_list(frozenset('abcdefghijklmnopqrstuvwxyz' + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + '0123456789_')), + sre_parse.CATEGORY_LINEBREAK: frozenset('\n'), + sre_parse.CATEGORY_NOT_LINEBREAK: _build_reverse_list(frozenset('\n')), + 'ANY': [six.unichr(x) for x in range(1, 127) if x != 10] +} + +CATEGORIES_NAMES = { + sre_parse.CATEGORY_DIGIT: r'\d', + sre_parse.CATEGORY_NOT_DIGIT: r'\D', + sre_parse.CATEGORY_SPACE: r'\s', + sre_parse.CATEGORY_NOT_SPACE: r'\S', + sre_parse.CATEGORY_WORD: r'\w', + sre_parse.CATEGORY_NOT_WORD: r'\W', +} + + +def extract_groups(parsed, top=True): + result = {} + if top: + result[0] = parsed + for token in parsed: + if not token: + # Skip empty tokens + pass + elif token[0] == sre_parse.SUBPATTERN: + if isinstance(token[1][0], int): + # Captured group index can't be a string. E.g. for pattern "(?:la)" group name is "None" + result[token[1][0]] = token[1][1] + result.update(extract_groups(token[1][1], False)) + elif token[0] == sre_parse.MIN_REPEAT: + result.update(extract_groups(token[1][2], False)) + elif token[0] == sre_parse.MAX_REPEAT: + result.update(extract_groups(token[1][2], False)) + elif token[0] == sre_parse.BRANCH: + result.update(extract_groups(token[1][1], False)) + elif token[0] == sre_parse.SUBPATTERN: + result.update(extract_groups(token[1][1], False)) + elif token[0] == sre_parse.IN: + result.update(extract_groups(token[1], False)) + elif isinstance(token, sre_parse.SubPattern): + result.update(extract_groups(token, False)) + return result + + +def _gen_combinator(variants, _merge=True): + if not hasattr(variants, '__iter__'): + return [variants] if variants is not None else [] + + res = [] + need_product = False + for var in variants: + if isinstance(var, list): + sol = _gen_combinator(var, _merge=False) + res.append(sol) + need_product = True + elif var is not None: + res.append(var) + + if need_product: + producted = itertools.product(*res) + if _merge: + # TODO(buglloc): ??! + return list(six.moves.map(_merge_variants, producted)) + return producted + elif _merge: + return list(six.moves.map(_merge_variants, [res])) + return res + + +def _merge_variants(variants): + result = [] + for var in variants: + if isinstance(var, tuple): + result.append(_merge_variants(var)) + else: + result.append(var) + return ''.join(result) + + +class Token(object): + type = None + + def __init__(self, token, parent, regexp): + self.token = token + self.childs = None + self.parent = parent + self.regexp = regexp + self._parse() + + def parse(self): + pass + + def _parse(self): + pass + + def _parse_childs(self, childs): + self.childs = parse(childs, self, regexp=self.regexp) + + def _get_group(self, gid): + return self.regexp.group(gid) + + def _reg_group(self, gid): + self.regexp.reg_group(gid, self) + + def can_contain(self, char, skip_literal=True): + raise NotImplementedError('can_contain must be implemented') + + def can_startswith(self, char, strict=False): + return self.can_contain(char, skip_literal=False) + + def must_contain(self, char): + raise NotImplementedError('must_contain must be implemented') + + def must_startswith(self, char, strict=False): + return self.must_contain(char) + + def generate(self, context): + raise NotImplementedError('generate must be implemented') + + def __str__(self): + raise NotImplementedError('__str__ must be implemented') + + +class AnyToken(Token): + type = sre_parse.ANY + + def can_contain(self, char, skip_literal=True): + return char in CATEGORIES['ANY'] + + def must_contain(self, char, skip_literal=True): + # Char may not be present in ANY token + return False + + def generate(self, context): + if context.char in CATEGORIES['ANY']: + return context.char + return 'a' + + def __str__(self): + return '.' + + +class LiteralToken(Token): + type = sre_parse.LITERAL + + def _parse(self): + self.char = six.unichr(self.token[1]) + + def can_contain(self, char, skip_literal=True): + if skip_literal: + return False + return self.char == char + + def must_contain(self, char, skip_literal=True): + return self.char == char + + def generate(self, context): + return self.char + + def __str__(self): + return re.escape(self.char) + + +class NotLiteralToken(Token): + type = sre_parse.NOT_LITERAL + + def _parse(self): + self.char = six.unichr(self.token[1]) + self.gen_char_list = list(_build_reverse_list(frozenset(self.char))) + + def can_contain(self, char, skip_literal=True): + return self.char != char + + def must_contain(self, char): + # Any char MAY not be present in NotLiteral, e.g.: "a" not present in "[^b]" + return False + + def generate(self, context): + if self.can_contain(context.char): + return context.char + + return random.choice(self.gen_char_list) + + def __str__(self): + return '[^{char}]'.format(char=self.char) + + +class RangeToken(Token): + type = sre_parse.RANGE + + def _parse(self): + self.left_code = self.token[1][0] + self.right_code = self.token[1][1] + self.left = six.unichr(self.left_code) + self.right = six.unichr(self.right_code) + + def can_contain(self, char, skip_literal=True): + return self.left <= char <= self.right + + def must_contain(self, char, skip_literal=True): + return self.left == char == self.right + + def generate(self, context): + if self.can_contain(context.char): + return context.char + + return six.unichr(random.randint(self.token[1][0], self.token[1][1])) + + def __str__(self): + return '{left}-{right}'.format(left=self.left, right=self.right) + + +class CategoryToken(Token): + type = sre_parse.CATEGORY + + def _parse(self): + self.char_list = CATEGORIES.get(self.token[1], ['']) + + def can_contain(self, char, skip_literal=True): + return char in self.char_list + + def must_contain(self, char, skip_literal=True): + return frozenset([char]) == self.char_list + + def generate(self, context): + if self.can_contain(context.char): + return context.char + + for c in self.char_list: + return c + + def __str__(self): + return CATEGORIES_NAMES.get(self.token[1], '\\C') + + +class MinRepeatToken(Token): + type = sre_parse.MIN_REPEAT + + def _parse(self): + self._parse_childs(self.token[1][2]) + self.min = self.token[1][0] + self.max = self.token[1][1] + + def can_contain(self, char, skip_literal=True): + if self.max == 0: + # [a-z]{0} + return False + for child in self.childs: + if child.can_contain(char, skip_literal=skip_literal): + return True + return False + + def must_contain(self, char): + if self.max == 0: + # [a-z]{0} + return False + if self.min == 0: + # [a-z]*? + return False + for child in self.childs: + if child.must_contain(char): + return True + return False + + def can_startswith(self, char, strict=False): + if self.max == 0: + # [a-z]{0} + if self.childs[0].can_startswith(char, strict): + return False + return None + return self.childs[0].can_startswith(char, strict) + + def must_startswith(self, char, strict=False): + if self.min == 0: + # [a-z]*? + return None + if self.max == 0: + # [a-z]{0} + return None + return self.childs[0].must_startswith(char, strict=strict) + + def generate(self, context): + res = [] + if self.min == 0: + # [a-z]* + res.append('') + if self.max == 0: + # [a-z]{0} + return res + + for child in self.childs: + res.extend(child.generate(context)) + + result = [] + repeat = self.max if self.max <= context.max_repeat else context.max_repeat + for val in _gen_combinator([res]): + result.append(val * repeat) + return result + + def __str__(self): + childs = ''.join(str(x) for x in self.childs) + if self.min == self.max: + return '{childs}{{{count}}}?'.format(childs=childs, count=self.min) + if self.min == 0 and self.max == 1: + return '{childs}?'.format(childs=childs) + if self.min == 0 and self.max == sre_parse.MAXREPEAT: + return '{childs}*?'.format(childs=childs) + if self.min == 1 and self.max == sre_parse.MAXREPEAT: + return '{childs}+?'.format(childs=childs) + return '{childs}{{{min},{max}}}?'.format(childs=childs, min=self.min, max=self.max) + + +class MaxRepeatToken(Token): + type = sre_parse.MAX_REPEAT + + def _parse(self): + self._parse_childs(self.token[1][2]) + self.min = self.token[1][0] + self.max = self.token[1][1] + + def can_contain(self, char, skip_literal=True): + if self.max == 0: + # [a-z]{0} + return False + for child in self.childs: + if child.can_contain(char, skip_literal=skip_literal): + return True + return False + + def must_contain(self, char): + if self.max == 0: + # [a-z]{0} + return False + if self.min == 0: + # [a-z]? + return False + for child in self.childs: + if child.must_contain(char): + return True + return False + + def can_startswith(self, char, strict=False): + if self.max == 0: + # [a-z]{0} + if self.childs[0].can_startswith(char, strict): + return False + return None + return self.childs[0].can_startswith(char, strict) + + def must_startswith(self, char, strict=False): + if self.min == 0: + # [a-z]* + return None + if self.max == 0: + # [a-z]{0} + return None + return self.childs[0].must_startswith(char, strict=strict) + + def generate(self, context): + res = [] + if self.min == 0: + # [a-z]* + res.append('') + if self.max == 0: + # [a-z]{0} + return res + + for child in self.childs: + res.extend(child.generate(context)) + + result = [] + repeat = self.max if self.max <= context.max_repeat else context.max_repeat + for val in _gen_combinator([res]): + result.append(val * repeat) + return result + + def __str__(self): + childs = ''.join(str(x) for x in self.childs) + if self.min == self.max: + return '{childs}{{{count}}}'.format(childs=childs, count=self.min) + if self.min == 0 and self.max == 1: + return '{childs}?'.format(childs=childs) + if self.min == 0 and self.max == sre_parse.MAXREPEAT: + return '{childs}*'.format(childs=childs) + if self.min == 1 and self.max == sre_parse.MAXREPEAT: + return '{childs}+'.format(childs=childs) + return '{childs}{{{min},{max}}}'.format(childs=childs, min=self.min, max=self.max) + + +class BranchToken(Token): + type = sre_parse.BRANCH + + def _parse(self): + self.childs = [] + for token in self.token[1][1]: + if not token: + self.childs.append(EmptyToken(token=token, parent=self.parent, regexp=self.regexp)) + elif isinstance(token, sre_parse.SubPattern): + self.childs.append(InternalSubpatternToken(token=token, parent=self.parent, regexp=self.regexp)) + else: + raise RuntimeError('Unexpected token {} in branch'.format(token)) + + def can_contain(self, char, skip_literal=True): + for child in self.childs: + if child.can_contain(char, skip_literal=skip_literal): + return True + return False + + def must_contain(self, char): + return all(child.must_contain(char) for child in self.childs) + + def can_startswith(self, char, strict=False): + return any(x.can_startswith(char, strict) for x in self.childs) + + def must_startswith(self, char, strict=False): + return all(x.must_startswith(char, strict) for x in self.childs) + + def generate(self, context): + res = [] + for child in self.childs: + values = child.generate(context) + if isinstance(values, list): + res.extend(child.generate(context)) + else: + res.append(values) + + return res + + def __str__(self): + return '(?:{})'.format('|'.join(str(x) for x in self.childs)) + + +class SubpatternToken(Token): + type = sre_parse.SUBPATTERN + + def _parse(self): + self._parse_childs(self.token[1][1]) + self.group = self.token[1][0] + if isinstance(self.group, int): + # Captured group index can't be a string. E.g. for pattern "(?:la)" group name is "None" + self._reg_group(self.group) + + def can_contain(self, char, skip_literal=True): + for child in self.childs: + if child.can_contain(char, skip_literal=skip_literal): + return True + return False + + def must_contain(self, char): + for child in self.childs: + if child.must_contain(char): + return True + return False + + def can_startswith(self, char, strict=False): + if isinstance(self.childs[0], AtToken): + if len(self.childs) > 1: + for child in self.childs[1:]: + can = child.can_startswith(char, strict) + if can is None: + continue + return can + return False + elif not strict and not isinstance(self.childs[0], (SubpatternToken, InternalSubpatternToken)): + # Not strict regexp w/o ^ can starts with any character + return char in CATEGORIES['ANY'] + + for child in self.childs: + can = child.can_startswith(char, strict) + if can is None: + continue + return can + return None + + def must_startswith(self, char, strict=False): + if isinstance(self.childs[0], AtToken): + if len(self.childs) > 1: + for child in self.childs[1:]: + must = child.must_startswith(char, strict=True) + if must is None: + continue + return must + return False + elif not strict and not isinstance(self.childs[0], (SubpatternToken, InternalSubpatternToken)): + # Not strict regexp w/o ^ MAY NOT starts with any character + return False + + for child in self.childs: + must = child.must_startswith(char, strict=strict) + if must is None: + continue + return must + return None + + def generate(self, context): + res = [] + for child in self.childs: + res.append(child.generate(context)) + + return _gen_combinator(res) + + def __str__(self): + childs = ''.join(str(x) for x in self.childs) + if self.group is None: + return '(?:{childs})'.format(childs=childs) + return '({childs})'.format(childs=childs) + + +class InternalSubpatternToken(Token): + type = sre_parse.SUBPATTERN + + def _parse(self): + self._parse_childs(self.token) + self.group = None + + def can_contain(self, char, skip_literal=True): + for child in self.childs: + if child.can_contain(char, skip_literal=skip_literal): + return True + return False + + def must_contain(self, char): + for child in self.childs: + if child.must_contain(char): + return True + return False + + def can_startswith(self, char, strict=False): + if isinstance(self.childs[0], AtToken): + if len(self.childs) > 1: + for child in self.childs[1:]: + can = child.can_startswith(char, strict) + if can is None: + continue + return can + return False + elif not strict and not isinstance(self.childs[0], (SubpatternToken, InternalSubpatternToken)): + # Not strict regexp w/o ^ can starts with any character + return char in CATEGORIES['ANY'] + + for child in self.childs: + can = child.can_startswith(char, strict) + if can is None: + continue + return can + return None + + def must_startswith(self, char, strict=False): + if isinstance(self.childs[0], AtToken): + if len(self.childs) > 1: + for child in self.childs[1:]: + must = child.must_startswith(char, strict=True) + if must is None: + continue + return must + return False + elif not strict and not isinstance(self.childs[0], (SubpatternToken, InternalSubpatternToken)): + # Not strict regexp w/o ^ MAY NOT starts with any character + return False + + for child in self.childs: + must = child.must_startswith(char, strict=strict) + if must is None: + continue + return must + return None + + def generate(self, context): + res = [] + for child in self.childs: + res.append(child.generate(context)) + + return _gen_combinator(res) + + def __str__(self): + return ''.join(str(x) for x in self.childs) + + +class InToken(Token): + type = sre_parse.IN + + def _parse(self): + self.childs = parse(self.token[1], self) + + def can_contain(self, char, skip_literal=True): + can = False + negative = False + for child in self.childs: + if isinstance(child, NegateToken): + negative = True + else: + can = child.can_contain(char, skip_literal=False) + + if can: + break + if can and not negative: + # a in [a-z] + return True + if not can and negative: + # a in [^b-z] + return True + return False + + def must_contain(self, char): + # Any character MAY not be present in IN + return False + + def _generate_positive(self, context): + result = [] + for child in self.childs: + if isinstance(child, (NegateToken, EmptyToken)): + pass + else: + result.append(child.generate(context=context)) + return result + + def _generate_negative(self, context): + blacklisted = set() + # TODO(buglloc): move chars list into the tokens? + for child in self.childs: + if isinstance(child, (NegateToken, EmptyToken)): + pass + elif isinstance(child, LiteralToken): + blacklisted.add(child.char) + elif isinstance(child, RangeToken): + blacklisted.update(six.unichr(c) for c in six.moves.range(child.left_code, child.right_code + 1)) + elif isinstance(child, CategoryToken): + blacklisted.update(child.char_list) + else: + LOG.info('Unexpected child "{!r}"'.format(child)) + + for char in _build_reverse_list(set()): + if char not in blacklisted: + return char + + def generate(self, context): + if self.can_contain(context.char, skip_literal=False): + return context.char + + is_negative = self.childs and isinstance(self.childs[0], NegateToken) + if is_negative: + # [^a-z] + return self._generate_negative(context) + # [a-z] + return self._generate_positive(context) + + def __str__(self): + return '[{childs}]'.format(childs=''.join(str(x) for x in self.childs)) + + +class AtToken(Token): + type = sre_parse.AT + + def _parse(self): + self.begin = self.token[1] == sre_parse.AT_BEGINNING + self.end = self.token[1] == sre_parse.AT_END + + def can_contain(self, char, skip_literal=True): + return False + + def must_contain(self, char): + return False + + def generate(self, context): + if context.anchored: + if self.begin: + return '^' + if self.end: + return '$' + return None + + def __str__(self): + if self.begin: + return '^' + if self.end: + return '$' + LOG.warn('unexpected AT token: %s', self.token) + + +class NegateToken(Token): + type = sre_parse.NEGATE + + def can_contain(self, char, skip_literal=True): + return False + + def must_contain(self, char): + return False + + def can_startswith(self, char, strict=False): + return None + + def must_startswith(self, char, strict=False): + return None + + def generate(self, context): + return None + + def __str__(self): + return '^' + + +class GroupRefToken(Token): + type = sre_parse.GROUPREF + + def _parse(self): + self.id = self.token[1] + self.group = self._get_group(self.id) + + def can_contain(self, char, skip_literal=True): + return self.group.can_contain(char, skip_literal=skip_literal) + + def must_contain(self, char): + return self.group.must_contain(char) + + def can_startswith(self, char, strict=False): + return self.group.can_startswith(char, strict=strict) + + def must_startswith(self, char, strict=False): + return self.group.must_startswith(char, strict=strict) + + def generate(self, context): + return self.group.generate(context) + + def __str__(self): + return '\\\\{}'.format(self.id) + + +class AssertToken(Token): + type = sre_parse.ASSERT + + def can_contain(self, char, skip_literal=True): + # TODO(buglloc): Do it! + return False + + def must_contain(self, char): + # TODO(buglloc): Do it! + return False + + def can_startswith(self, char, strict=False): + return None + + def must_startswith(self, char, strict=False): + return None + + +class AssertNotToken(Token): + type = sre_parse.ASSERT_NOT + + def can_contain(self, char, skip_literal=True): + # TODO(buglloc): Do it! + return False + + def must_contain(self, char): + # TODO(buglloc): Do it! + return False + + def can_startswith(self, char, strict=False): + return None + + def must_startswith(self, char, strict=False): + return None + + +class EmptyToken(Token): + type = None + + def can_contain(self, char, skip_literal=True): + return False + + def must_contain(self, char): + # TODO(buglloc): Do it! + return False + + def can_startswith(self, char, strict=False): + return None + + def must_startswith(self, char, strict=False): + return None + + def generate(self, context): + return '' + + def __str__(self): + return '' + + +def parse(sre_obj, parent=None, regexp=None): + result = [] + for token in sre_obj: + if not token: + result.append(EmptyToken(token=token, parent=parent, regexp=regexp)) + elif token[0] == sre_parse.ANY: + result.append(AnyToken(token=token, parent=parent, regexp=regexp)) + elif token[0] == sre_parse.LITERAL: + result.append(LiteralToken(token=token, parent=parent, regexp=regexp)) + elif token[0] == sre_parse.NOT_LITERAL: + result.append(NotLiteralToken(token=token, parent=parent, regexp=regexp)) + elif token[0] == sre_parse.RANGE: + result.append(RangeToken(token=token, parent=parent, regexp=regexp)) + elif token[0] == sre_parse.CATEGORY: + result.append(CategoryToken(token=token, parent=parent, regexp=regexp)) + elif token[0] == sre_parse.MIN_REPEAT: + result.append(MinRepeatToken(token=token, parent=parent, regexp=regexp)) + elif token[0] == sre_parse.MAX_REPEAT: + result.append(MaxRepeatToken(token=token, parent=parent, regexp=regexp)) + elif token[0] == sre_parse.BRANCH: + result.append(BranchToken(token=token, parent=parent, regexp=regexp)) + elif token[0] == sre_parse.SUBPATTERN: + result.append(SubpatternToken(token=token, parent=parent, regexp=regexp)) + elif token[0] == sre_parse.IN: + result.append(InToken(token=token, parent=parent, regexp=regexp)) + elif token[0] == sre_parse.NEGATE: + result.append(NegateToken(token=token, parent=parent, regexp=regexp)) + elif token[0] == sre_parse.AT: + result.append(AtToken(token=token, parent=parent, regexp=regexp)) + elif token[0] == sre_parse.GROUPREF: + result.append(GroupRefToken(token=token, parent=parent, regexp=regexp)) + elif token[0] == sre_parse.ASSERT: + pass # TODO(buglloc): Do it! + elif token[0] == sre_parse.ASSERT_NOT: + pass # TODO(buglloc): Do it! + else: + LOG.info('Unexpected token "{}"'.format(token[0])) + + return result + + +class GenerationContext(object): + def __init__(self, char, max_repeat=5, strict=False, anchored=True): + self.char = char + self.max_repeat = max_repeat + self.strict = strict + self.anchored = anchored + + +class Regexp(object): + def __init__(self, source, strict=False, case_sensitive=True, root=None, parsed=None): + self.source = source + self.strict = strict + self.case_sensitive = case_sensitive + self._root = root + self._parsed = parsed + self._groups = {} + + def can_startswith(self, char): + """ + Checks if regex can starts with the specified char. + Example: + Regexp('[a-z][0-9]').can_startswith('s') -> True + Regexp('[a-z][0-9]').can_startswith('0') -> True + Regexp('^[a-z][0-9]').can_startswith('0') -> False + Regexp('[a-z][0-9]', strict=True).can_startswith('0') -> False + + :param str char: character to test. + :return bool: True if regex can starts with the specified char, False otherwise. + """ + + return self.root.can_startswith( + char=char if self.case_sensitive else char.lower(), + strict=self.strict + ) + + def can_contain(self, char, skip_literal=True): + """ + Checks if regex can contain the specified char. + Example: + Regexp('[a-z][0-9]').can_contain('s') -> True + Regexp('[a-z][0-9]').can_contain('0') -> True + Regexp('[a-z][0-9]').can_contain('/') -> False + Regexp('[a-z][0-9]/').can_contain('/') -> False + Regexp('[a-z][0-9]/').can_contain('/', skip_literal=False) -> True + + :param str char: character to test. + :param bool skip_literal: skip literal tokens. + :return bool: True if regex can contain the specified char, False otherwise. + """ + + return self.root.can_contain( + char=char if self.case_sensitive else char.lower(), + skip_literal=skip_literal + ) + + def must_startswith(self, char): + """ + Checks if regex MUST starts with the specified char. + Example: + Regexp('[a-z][0-9]').must_startswith('s') -> False + Regexp('s[a-z]').must_startswith('s') -> False + Regexp('^s[a-z]').must_startswith('s') -> True + Regexp('s[a-z]', strict=True).must_startswith('s') -> True + + :param str char: character to test. + :return bool: True if regex must starts with the specified char, False otherwise. + """ + + return self.root.must_startswith( + char=char if self.case_sensitive else char.lower(), + strict=self.strict + ) + + def must_contain(self, char): + """ + Checks if regex MUST contain the specified char. + Example: + Regexp('[a-z][0-9]').must_contain('s') -> False + Regexp('[a-z][0-9]s').must_contain('s') -> True + + :param str char: character to test. + :return bool: True if regex can contain the specified char, False otherwise. + """ + + return self.root.must_contain( + char=char if self.case_sensitive else char.lower() + ) + + def generate(self, char, anchored=False, max_repeat=5): + """ + Generate values that match regex. + Example: + Regexp('.a?').generate('s') -> ['s', 'sa'] + Regexp('(?:^http|https)://.').generate('s') -> ['http://s', 'https://s'] + Regexp('(?:^http|https)://.').generate('s', anchored=True) -> ['^http://s', 'https://s'] + + + :param str char: "dangerous" character, generator try to place it wherever possible. + :param bool anchored: place anchors in generated values. + :param int max_repeat: maximum count of repeated group (e.g. "a+" provides "aaaaa"). + :return list of str: True if regex can contain the specified char, False otherwise. + """ + + context = GenerationContext(char, anchored=anchored, max_repeat=max_repeat) + for val in self.root.generate(context=context): + if anchored and self.strict and not val.startswith('^'): + yield '^' + val + else: + yield val + + def group(self, name): + """ + Returns group by specified name. + + :param name: name of the group. + :return Regexp: Regexp object for this group. + """ + + if name in self.groups: + return self.groups[name] + return Regexp('') + + def reg_group(self, gid, token): + self._groups[gid] = token + + def get_group(self, gid): + return self._groups[gid] + + @cached_property + def groups(self): + # self.root.parse() + result = {} + # for name, token in self._groups.items(): + # result[name] = Regexp(str(self), root=token, strict=True, case_sensitive=self.case_sensitive) + for name, parsed in extract_groups(self.parsed).items(): + result[name] = Regexp('compiled', parsed=parsed, strict=True, case_sensitive=self.case_sensitive) + for name, group in self.parsed.pattern.groupdict.items(): + result[name] = result[group] + return result + + @property + def root(self): + if self._root: + return self._root + + self._root = InternalSubpatternToken(self.parsed, parent=None, regexp=self) + self._groups[0] = self._root + return self._root + + @property + def parsed(self): + # TODO(buglloc): Ugly hack! + if self._parsed: + return self._parsed + + self._parsed = sre_parse.parse(FIX_NAMED_GROUPS_RE.sub('(?P<\\1>', self.source)) + return self._parsed + + def __str__(self): + return str(self.root) diff --git a/gixy/core/severity.py b/gixy/core/severity.py new file mode 100644 index 0000000..5d60832 --- /dev/null +++ b/gixy/core/severity.py @@ -0,0 +1,9 @@ +UNSPECIFIED = 'UNSPECIFIED' +LOW = 'LOW' +MEDIUM = 'MEDIUM' +HIGH = 'HIGH' +ALL = [UNSPECIFIED, LOW, MEDIUM, HIGH] + + +def is_acceptable(current_severity, min_severity): + return ALL.index(current_severity) >= ALL.index(min_severity) diff --git a/gixy/core/sre_parse/__init__.py b/gixy/core/sre_parse/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/gixy/core/sre_parse/sre_constants.py b/gixy/core/sre_parse/sre_constants.py new file mode 100644 index 0000000..51f2e71 --- /dev/null +++ b/gixy/core/sre_parse/sre_constants.py @@ -0,0 +1,222 @@ +# flake8: noqa + +# +# Secret Labs' Regular Expression Engine +# +# various symbols used by the regular expression engine. +# run this script to update the _sre include files! +# +# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. +# +# See the sre.py file for information on usage and redistribution. +# + +"""Internal support module for sre""" + +# update when constants are added or removed + +MAGIC = 20031017 + +try: + from _sre import MAXREPEAT +except ImportError: + import _sre + MAXREPEAT = _sre.MAXREPEAT = 65535 + +# SRE standard exception (access as sre.error) +# should this really be here? + +class error(Exception): + pass + +# operators + +FAILURE = "failure" +SUCCESS = "success" + +ANY = "any" +ANY_ALL = "any_all" +ASSERT = "assert" +ASSERT_NOT = "assert_not" +AT = "at" +BIGCHARSET = "bigcharset" +BRANCH = "branch" +CALL = "call" +CATEGORY = "category" +CHARSET = "charset" +GROUPREF = "groupref" +GROUPREF_IGNORE = "groupref_ignore" +GROUPREF_EXISTS = "groupref_exists" +IN = "in" +IN_IGNORE = "in_ignore" +INFO = "info" +JUMP = "jump" +LITERAL = "literal" +LITERAL_IGNORE = "literal_ignore" +MARK = "mark" +MAX_REPEAT = "max_repeat" +MAX_UNTIL = "max_until" +MIN_REPEAT = "min_repeat" +MIN_UNTIL = "min_until" +NEGATE = "negate" +NOT_LITERAL = "not_literal" +NOT_LITERAL_IGNORE = "not_literal_ignore" +RANGE = "range" +REPEAT = "repeat" +REPEAT_ONE = "repeat_one" +SUBPATTERN = "subpattern" +MIN_REPEAT_ONE = "min_repeat_one" + +# positions +AT_BEGINNING = "at_beginning" +AT_BEGINNING_LINE = "at_beginning_line" +AT_BEGINNING_STRING = "at_beginning_string" +AT_BOUNDARY = "at_boundary" +AT_NON_BOUNDARY = "at_non_boundary" +AT_END = "at_end" +AT_END_LINE = "at_end_line" +AT_END_STRING = "at_end_string" +AT_LOC_BOUNDARY = "at_loc_boundary" +AT_LOC_NON_BOUNDARY = "at_loc_non_boundary" +AT_UNI_BOUNDARY = "at_uni_boundary" +AT_UNI_NON_BOUNDARY = "at_uni_non_boundary" + +# categories +CATEGORY_DIGIT = "category_digit" +CATEGORY_NOT_DIGIT = "category_not_digit" +CATEGORY_SPACE = "category_space" +CATEGORY_NOT_SPACE = "category_not_space" +CATEGORY_WORD = "category_word" +CATEGORY_NOT_WORD = "category_not_word" +CATEGORY_LINEBREAK = "category_linebreak" +CATEGORY_NOT_LINEBREAK = "category_not_linebreak" +CATEGORY_LOC_WORD = "category_loc_word" +CATEGORY_LOC_NOT_WORD = "category_loc_not_word" +CATEGORY_UNI_DIGIT = "category_uni_digit" +CATEGORY_UNI_NOT_DIGIT = "category_uni_not_digit" +CATEGORY_UNI_SPACE = "category_uni_space" +CATEGORY_UNI_NOT_SPACE = "category_uni_not_space" +CATEGORY_UNI_WORD = "category_uni_word" +CATEGORY_UNI_NOT_WORD = "category_uni_not_word" +CATEGORY_UNI_LINEBREAK = "category_uni_linebreak" +CATEGORY_UNI_NOT_LINEBREAK = "category_uni_not_linebreak" + +OPCODES = [ + + # failure=0 success=1 (just because it looks better that way :-) + FAILURE, SUCCESS, + + ANY, ANY_ALL, + ASSERT, ASSERT_NOT, + AT, + BRANCH, + CALL, + CATEGORY, + CHARSET, BIGCHARSET, + GROUPREF, GROUPREF_EXISTS, GROUPREF_IGNORE, + IN, IN_IGNORE, + INFO, + JUMP, + LITERAL, LITERAL_IGNORE, + MARK, + MAX_UNTIL, + MIN_UNTIL, + NOT_LITERAL, NOT_LITERAL_IGNORE, + NEGATE, + RANGE, + REPEAT, + REPEAT_ONE, + SUBPATTERN, + MIN_REPEAT_ONE + +] + +ATCODES = [ + AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY, + AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING, + AT_LOC_BOUNDARY, AT_LOC_NON_BOUNDARY, AT_UNI_BOUNDARY, + AT_UNI_NON_BOUNDARY +] + +CHCODES = [ + CATEGORY_DIGIT, CATEGORY_NOT_DIGIT, CATEGORY_SPACE, + CATEGORY_NOT_SPACE, CATEGORY_WORD, CATEGORY_NOT_WORD, + CATEGORY_LINEBREAK, CATEGORY_NOT_LINEBREAK, CATEGORY_LOC_WORD, + CATEGORY_LOC_NOT_WORD, CATEGORY_UNI_DIGIT, CATEGORY_UNI_NOT_DIGIT, + CATEGORY_UNI_SPACE, CATEGORY_UNI_NOT_SPACE, CATEGORY_UNI_WORD, + CATEGORY_UNI_NOT_WORD, CATEGORY_UNI_LINEBREAK, + CATEGORY_UNI_NOT_LINEBREAK +] + +def makedict(list): + d = {} + i = 0 + for item in list: + d[item] = i + i = i + 1 + return d + +OPCODES = makedict(OPCODES) +ATCODES = makedict(ATCODES) +CHCODES = makedict(CHCODES) + +# replacement operations for "ignore case" mode +OP_IGNORE = { + GROUPREF: GROUPREF_IGNORE, + IN: IN_IGNORE, + LITERAL: LITERAL_IGNORE, + NOT_LITERAL: NOT_LITERAL_IGNORE +} + +AT_MULTILINE = { + AT_BEGINNING: AT_BEGINNING_LINE, + AT_END: AT_END_LINE +} + +AT_LOCALE = { + AT_BOUNDARY: AT_LOC_BOUNDARY, + AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY +} + +AT_UNICODE = { + AT_BOUNDARY: AT_UNI_BOUNDARY, + AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY +} + +CH_LOCALE = { + CATEGORY_DIGIT: CATEGORY_DIGIT, + CATEGORY_NOT_DIGIT: CATEGORY_NOT_DIGIT, + CATEGORY_SPACE: CATEGORY_SPACE, + CATEGORY_NOT_SPACE: CATEGORY_NOT_SPACE, + CATEGORY_WORD: CATEGORY_LOC_WORD, + CATEGORY_NOT_WORD: CATEGORY_LOC_NOT_WORD, + CATEGORY_LINEBREAK: CATEGORY_LINEBREAK, + CATEGORY_NOT_LINEBREAK: CATEGORY_NOT_LINEBREAK +} + +CH_UNICODE = { + CATEGORY_DIGIT: CATEGORY_UNI_DIGIT, + CATEGORY_NOT_DIGIT: CATEGORY_UNI_NOT_DIGIT, + CATEGORY_SPACE: CATEGORY_UNI_SPACE, + CATEGORY_NOT_SPACE: CATEGORY_UNI_NOT_SPACE, + CATEGORY_WORD: CATEGORY_UNI_WORD, + CATEGORY_NOT_WORD: CATEGORY_UNI_NOT_WORD, + CATEGORY_LINEBREAK: CATEGORY_UNI_LINEBREAK, + CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK +} + +# flags +SRE_FLAG_TEMPLATE = 1 # template mode (disable backtracking) +SRE_FLAG_IGNORECASE = 2 # case insensitive +SRE_FLAG_LOCALE = 4 # honour system locale +SRE_FLAG_MULTILINE = 8 # treat target as multiline string +SRE_FLAG_DOTALL = 16 # treat target as a single string +SRE_FLAG_UNICODE = 32 # use unicode locale +SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments +SRE_FLAG_DEBUG = 128 # debugging + +# flags for INFO primitive +SRE_INFO_PREFIX = 1 # has prefix +SRE_INFO_LITERAL = 2 # entire pattern is literal (given by prefix) +SRE_INFO_CHARSET = 4 # pattern starts with character from given set + diff --git a/gixy/core/sre_parse/sre_parse.py b/gixy/core/sre_parse/sre_parse.py new file mode 100644 index 0000000..df69044 --- /dev/null +++ b/gixy/core/sre_parse/sre_parse.py @@ -0,0 +1,829 @@ +# flake8: noqa + +# +# Secret Labs' Regular Expression Engine +# +# convert re-style regular expression to sre pattern +# +# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. +# +# See the sre.py file for information on usage and redistribution. +# + +from __future__ import print_function + +"""Internal support module for sre""" + +from sre_constants import * + +SPECIAL_CHARS = ".\\[{()*+?^$|" +REPEAT_CHARS = "*+?{" + +DIGITS = set("0123456789") + +OCTDIGITS = set("01234567") +HEXDIGITS = set("0123456789abcdefABCDEF") + +WHITESPACE = set(" \t\n\r\v\f") + +ESCAPES = { + r"\a": (LITERAL, ord("\a")), + r"\b": (LITERAL, ord("\b")), + r"\f": (LITERAL, ord("\f")), + r"\n": (LITERAL, ord("\n")), + r"\r": (LITERAL, ord("\r")), + r"\t": (LITERAL, ord("\t")), + r"\v": (LITERAL, ord("\v")), + r"\\": (LITERAL, ord("\\")) +} + +CATEGORIES = { + r"\A": (AT, AT_BEGINNING_STRING), # start of string + r"\b": (AT, AT_BOUNDARY), + r"\B": (AT, AT_NON_BOUNDARY), + r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]), + r"\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]), + r"\s": (IN, [(CATEGORY, CATEGORY_SPACE)]), + r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]), + r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]), + r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]), + r"\Z": (AT, AT_END_STRING), # end of string +} + +FLAGS = { + # standard flags + "i": SRE_FLAG_IGNORECASE, + "L": SRE_FLAG_LOCALE, + "m": SRE_FLAG_MULTILINE, + "s": SRE_FLAG_DOTALL, + "x": SRE_FLAG_VERBOSE, + # extensions + "t": SRE_FLAG_TEMPLATE, + "u": SRE_FLAG_UNICODE, +} + + +class Pattern: + # master pattern object. keeps track of global attributes + def __init__(self): + self.flags = 0 + self.open = [] + self.groups = 1 + self.groupdict = {} + self.lookbehind = 0 + + def opengroup(self, name=None): + gid = self.groups + self.groups = gid + 1 + if name is not None: + ogid = self.groupdict.get(name, None) + if ogid is not None: + raise error(("redefinition of group name %s as group %d; " + "was group %d" % (repr(name), gid, ogid))) + self.groupdict[name] = gid + self.open.append(gid) + return gid + + def closegroup(self, gid): + self.open.remove(gid) + + def checkgroup(self, gid): + return gid < self.groups and gid not in self.open + + +class SubPattern: + # a subpattern, in intermediate form + def __init__(self, pattern, data=None): + self.pattern = pattern + if data is None: + data = [] + self.data = data + self.width = None + + def __repr__(self): + return repr(self.data) + + def __len__(self): + return len(self.data) + + def __delitem__(self, index): + del self.data[index] + + def __getitem__(self, index): + if isinstance(index, slice): + return SubPattern(self.pattern, self.data[index]) + return self.data[index] + + def __setitem__(self, index, code): + self.data[index] = code + + def insert(self, index, code): + self.data.insert(index, code) + + def append(self, code): + self.data.append(code) + + def getwidth(self): + # determine the width (min, max) for this subpattern + if self.width: + return self.width + lo = hi = 0 + UNITCODES = (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY) + REPEATCODES = (MIN_REPEAT, MAX_REPEAT) + for op, av in self.data: + if op is BRANCH: + i = MAXREPEAT - 1 + j = 0 + for av in av[1]: + l, h = av.getwidth() + i = min(i, l) + j = max(j, h) + lo = lo + i + hi = hi + j + elif op is CALL: + i, j = av.getwidth() + lo = lo + i + hi = hi + j + elif op is SUBPATTERN: + i, j = av[1].getwidth() + lo = lo + i + hi = hi + j + elif op in REPEATCODES: + i, j = av[2].getwidth() + lo = lo + i * av[0] + hi = hi + j * av[1] + elif op in UNITCODES: + lo = lo + 1 + hi = hi + 1 + elif op == SUCCESS: + break + self.width = min(lo, MAXREPEAT - 1), min(hi, MAXREPEAT) + return self.width + + +class Tokenizer: + def __init__(self, string): + self.string = string + self.index = 0 + self.__next() + + def __next(self): + if self.index >= len(self.string): + self.next = None + return + char = self.string[self.index] + if char[0] == "\\": + try: + c = self.string[self.index + 1] + except IndexError: + raise error("bogus escape (end of line)") + char = char + c + self.index = self.index + len(char) + self.next = char + + def match(self, char, skip=1): + if char == self.next: + if skip: + self.__next() + return 1 + return 0 + + def get(self): + this = self.next + self.__next() + return this + + def tell(self): + return self.index, self.next + + def seek(self, index): + self.index, self.next = index + + +def isident(char): + return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_" + + +def isdigit(char): + return "0" <= char <= "9" + + +def isname(name): + # check that group name is a valid string + if not isident(name[0]): + return False + for char in name[1:]: + if not isident(char) and not isdigit(char): + return False + return True + + +def _class_escape(source, escape): + # handle escape code inside character class + code = ESCAPES.get(escape) + if code: + return code + code = CATEGORIES.get(escape) + if code and code[0] == IN: + return code + try: + c = escape[1:2] + if c == "x": + # hexadecimal escape (exactly two digits) + while source.next in HEXDIGITS and len(escape) < 4: + escape = escape + source.get() + escape = escape[2:] + if len(escape) != 2: + raise error("bogus escape: %s" % repr("\\" + escape)) + return LITERAL, int(escape, 16) & 0xff + elif c in OCTDIGITS: + # octal escape (up to three digits) + while source.next in OCTDIGITS and len(escape) < 4: + escape = escape + source.get() + escape = escape[1:] + return LITERAL, int(escape, 8) & 0xff + elif c in DIGITS: + raise error("bogus escape: %s" % repr(escape)) + if len(escape) == 2: + return LITERAL, ord(escape[1]) + except ValueError: + pass + raise error("bogus escape: %s" % repr(escape)) + + +def _escape(source, escape, state): + # handle escape code in expression + code = CATEGORIES.get(escape) + if code: + return code + code = ESCAPES.get(escape) + if code: + return code + try: + c = escape[1:2] + if c == "x": + # hexadecimal escape + while source.next in HEXDIGITS and len(escape) < 4: + escape = escape + source.get() + if len(escape) != 4: + raise ValueError + return LITERAL, int(escape[2:], 16) & 0xff + elif c == "0": + # octal escape + while source.next in OCTDIGITS and len(escape) < 4: + escape = escape + source.get() + return LITERAL, int(escape[1:], 8) & 0xff + elif c in DIGITS: + # octal escape *or* decimal group reference (sigh) + if source.next in DIGITS: + escape = escape + source.get() + if (escape[1] in OCTDIGITS and escape[2] in OCTDIGITS and + source.next in OCTDIGITS): + # got three octal digits; this is an octal escape + escape = escape + source.get() + return LITERAL, int(escape[1:], 8) & 0xff + # not an octal escape, so this is a group reference + group = int(escape[1:]) + if group < state.groups: + if not state.checkgroup(group): + raise error("cannot refer to open group") + if state.lookbehind: + import warnings + warnings.warn('group references in lookbehind ' + 'assertions are not supported', + RuntimeWarning) + return GROUPREF, group + raise ValueError + if len(escape) == 2: + return LITERAL, ord(escape[1]) + except ValueError: + pass + raise error("bogus escape: %s" % repr(escape)) + + +def _parse_sub(source, state, nested=1): + # parse an alternation: a|b|c + + items = [] + itemsappend = items.append + sourcematch = source.match + while 1: + itemsappend(_parse(source, state)) + if sourcematch("|"): + continue + if not nested: + break + if not source.next or sourcematch(")", 0): + break + else: + raise error("pattern not properly closed") + + if len(items) == 1: + return items[0] + + subpattern = SubPattern(state) + subpatternappend = subpattern.append + + # check if all items share a common prefix + while 1: + prefix = None + for item in items: + if not item: + break + if prefix is None: + prefix = item[0] + elif item[0] != prefix: + break + else: + # all subitems start with a common "prefix". + # move it out of the branch + for item in items: + del item[0] + subpatternappend(prefix) + continue # check next one + break + + # check if the branch can be replaced by a character set + for item in items: + if len(item) != 1 or item[0][0] != LITERAL: + break + else: + # we can store this as a character set instead of a + # branch (the compiler may optimize this even more) + set = [] + setappend = set.append + for item in items: + setappend(item[0]) + subpatternappend((IN, set)) + return subpattern + + subpattern.append((BRANCH, (None, items))) + return subpattern + + +def _parse_sub_cond(source, state, condgroup): + item_yes = _parse(source, state) + if source.match("|"): + item_no = _parse(source, state) + if source.match("|"): + raise error("conditional backref with more than two branches") + else: + item_no = None + if source.next and not source.match(")", 0): + raise error("pattern not properly closed") + subpattern = SubPattern(state) + subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no))) + return subpattern + + +_PATTERNENDERS = set("|)") +_ASSERTCHARS = set("=!<") +_LOOKBEHINDASSERTCHARS = set("=!") +_REPEATCODES = set([MIN_REPEAT, MAX_REPEAT]) + + +def _parse(source, state): + # parse a simple pattern + subpattern = SubPattern(state) + + # precompute constants into local variables + subpatternappend = subpattern.append + sourceget = source.get + sourcematch = source.match + _len = len + PATTERNENDERS = _PATTERNENDERS + ASSERTCHARS = _ASSERTCHARS + LOOKBEHINDASSERTCHARS = _LOOKBEHINDASSERTCHARS + REPEATCODES = _REPEATCODES + + while 1: + + if source.next in PATTERNENDERS: + break # end of subpattern + this = sourceget() + if this is None: + break # end of pattern + + if state.flags & SRE_FLAG_VERBOSE: + # skip whitespace and comments + if this in WHITESPACE: + continue + if this == "#": + while 1: + this = sourceget() + if this in (None, "\n"): + break + continue + + if this and this[0] not in SPECIAL_CHARS: + subpatternappend((LITERAL, ord(this))) + + elif this == "[": + # character set + set = [] + setappend = set.append + ## if sourcematch(":"): + ## pass # handle character classes + if sourcematch("^"): + setappend((NEGATE, None)) + # check remaining characters + start = set[:] + while 1: + this = sourceget() + if this == "]" and set != start: + break + elif this and this[0] == "\\": + code1 = _class_escape(source, this) + elif this: + code1 = LITERAL, ord(this) + else: + raise error("unexpected end of regular expression") + if sourcematch("-"): + # potential range + this = sourceget() + if this == "]": + if code1[0] is IN: + code1 = code1[1][0] + setappend(code1) + setappend((LITERAL, ord("-"))) + break + elif this: + if this[0] == "\\": + code2 = _class_escape(source, this) + else: + code2 = LITERAL, ord(this) + if code1[0] != LITERAL or code2[0] != LITERAL: + raise error("bad character range") + lo = code1[1] + hi = code2[1] + if hi < lo: + raise error("bad character range") + setappend((RANGE, (lo, hi))) + else: + raise error("unexpected end of regular expression") + else: + if code1[0] is IN: + code1 = code1[1][0] + setappend(code1) + + # XXX: should move set optimization to compiler! + if _len(set) == 1 and set[0][0] is LITERAL: + subpatternappend(set[0]) # optimization + elif _len(set) == 2 and set[0][0] is NEGATE and set[1][0] is LITERAL: + subpatternappend((NOT_LITERAL, set[1][1])) # optimization + else: + # XXX: should add charmap optimization here + subpatternappend((IN, set)) + + elif this and this[0] in REPEAT_CHARS: + # repeat previous item + if this == "?": + min, max = 0, 1 + elif this == "*": + min, max = 0, MAXREPEAT + + elif this == "+": + min, max = 1, MAXREPEAT + elif this == "{": + if source.next == "}": + subpatternappend((LITERAL, ord(this))) + continue + here = source.tell() + min, max = 0, MAXREPEAT + lo = hi = "" + while source.next in DIGITS: + lo = lo + source.get() + if sourcematch(","): + while source.next in DIGITS: + hi = hi + sourceget() + else: + hi = lo + if not sourcematch("}"): + subpatternappend((LITERAL, ord(this))) + source.seek(here) + continue + if lo: + min = int(lo) + if min >= MAXREPEAT: + raise OverflowError("the repetition number is too large") + if hi: + max = int(hi) + if max >= MAXREPEAT: + raise OverflowError("the repetition number is too large") + if max < min: + raise error("bad repeat interval") + else: + raise error("not supported") + # figure out which item to repeat + if subpattern: + item = subpattern[-1:] + else: + item = None + if not item or (_len(item) == 1 and item[0][0] == AT): + raise error("nothing to repeat") + if item[0][0] in REPEATCODES: + raise error("multiple repeat") + if sourcematch("?"): + subpattern[-1] = (MIN_REPEAT, (min, max, item)) + else: + subpattern[-1] = (MAX_REPEAT, (min, max, item)) + + elif this == ".": + subpatternappend((ANY, None)) + + elif this == "(": + group = 1 + name = None + condgroup = None + if sourcematch("?"): + group = 0 + # options + if sourcematch("P"): + # python extensions + if sourcematch("<"): + # named group: skip forward to end of name + name = "" + while 1: + char = sourceget() + if char is None: + raise error("unterminated name") + if char == ">": + break + name = name + char + group = 1 + if not name: + raise error("missing group name") + if not isname(name): + raise error("bad character in group name %r" % + name) + elif sourcematch("="): + # named backreference + name = "" + while 1: + char = sourceget() + if char is None: + raise error("unterminated name") + if char == ")": + break + name = name + char + if not name: + raise error("missing group name") + if not isname(name): + raise error("bad character in backref group name " + "%r" % name) + gid = state.groupdict.get(name) + if gid is None: + msg = "unknown group name: {0!r}".format(name) + raise error(msg) + if state.lookbehind: + import warnings + warnings.warn('group references in lookbehind ' + 'assertions are not supported', + RuntimeWarning) + subpatternappend((GROUPREF, gid)) + continue + else: + char = sourceget() + if char is None: + raise error("unexpected end of pattern") + raise error("unknown specifier: ?P%s" % char) + elif sourcematch(":"): + # non-capturing group + group = 2 + elif sourcematch("#"): + # comment + while 1: + if source.next is None or source.next == ")": + break + sourceget() + if not sourcematch(")"): + raise error("unbalanced parenthesis") + continue + elif source.next in ASSERTCHARS: + # lookahead assertions + char = sourceget() + dir = 1 + if char == "<": + if source.next not in LOOKBEHINDASSERTCHARS: + raise error("syntax error") + dir = -1 # lookbehind + char = sourceget() + state.lookbehind += 1 + p = _parse_sub(source, state) + if dir < 0: + state.lookbehind -= 1 + if not sourcematch(")"): + raise error("unbalanced parenthesis") + if char == "=": + subpatternappend((ASSERT, (dir, p))) + else: + subpatternappend((ASSERT_NOT, (dir, p))) + continue + elif sourcematch("("): + # conditional backreference group + condname = "" + while 1: + char = sourceget() + if char is None: + raise error("unterminated name") + if char == ")": + break + condname = condname + char + group = 2 + if not condname: + raise error("missing group name") + if isname(condname): + condgroup = state.groupdict.get(condname) + if condgroup is None: + msg = "unknown group name: {0!r}".format(condname) + raise error(msg) + else: + try: + condgroup = int(condname) + except ValueError: + raise error("bad character in group name") + if state.lookbehind: + import warnings + warnings.warn('group references in lookbehind ' + 'assertions are not supported', + RuntimeWarning) + else: + # flags + if not source.next in FLAGS: + raise error("unexpected end of pattern") + while source.next in FLAGS: + state.flags = state.flags | FLAGS[sourceget()] + if group: + # parse group contents + if group == 2: + # anonymous group + group = None + else: + group = state.opengroup(name) + if condgroup: + p = _parse_sub_cond(source, state, condgroup) + else: + p = _parse_sub(source, state) + if not sourcematch(")"): + raise error("unbalanced parenthesis") + if group is not None: + state.closegroup(group) + subpatternappend((SUBPATTERN, (group, p))) + else: + while 1: + char = sourceget() + if char is None: + raise error("unexpected end of pattern") + if char == ")": + break + raise error("unknown extension") + + elif this == "^": + subpatternappend((AT, AT_BEGINNING)) + + elif this == "$": + subpattern.append((AT, AT_END)) + + elif this and this[0] == "\\": + code = _escape(source, this, state) + subpatternappend(code) + + else: + raise error("parser error") + + return subpattern + + +def parse(str, flags=0, pattern=None): + # parse 're' pattern into list of (opcode, argument) tuples + + source = Tokenizer(str) + + if pattern is None: + pattern = Pattern() + pattern.flags = flags + pattern.str = str + + p = _parse_sub(source, pattern, 0) + + tail = source.get() + if tail == ")": + raise error("unbalanced parenthesis") + elif tail: + raise error("bogus characters at end of regular expression") + + if not (flags & SRE_FLAG_VERBOSE) and p.pattern.flags & SRE_FLAG_VERBOSE: + # the VERBOSE flag was switched on inside the pattern. to be + # on the safe side, we'll parse the whole thing again... + return parse(str, p.pattern.flags) + + if flags & SRE_FLAG_DEBUG: + p.dump() + + return p + + +def parse_template(source, pattern): + # parse 're' replacement string into list of literals and + # group references + s = Tokenizer(source) + sget = s.get + p = [] + a = p.append + + def literal(literal, p=p, pappend=a): + if p and p[-1][0] is LITERAL: + p[-1] = LITERAL, p[-1][1] + literal + else: + pappend((LITERAL, literal)) + + sep = source[:0] + if type(sep) is type(""): + makechar = chr + else: + makechar = unichr + while 1: + this = sget() + if this is None: + break # end of replacement string + if this and this[0] == "\\": + # group + c = this[1:2] + if c == "g": + name = "" + if s.match("<"): + while 1: + char = sget() + if char is None: + raise error("unterminated group name") + if char == ">": + break + name = name + char + if not name: + raise error("missing group name") + try: + index = int(name) + if index < 0: + raise error("negative group number") + except ValueError: + if not isname(name): + raise error("bad character in group name") + try: + index = pattern.groupindex[name] + except KeyError: + msg = "unknown group name: {0!r}".format(name) + raise IndexError(msg) + a((MARK, index)) + elif c == "0": + if s.next in OCTDIGITS: + this = this + sget() + if s.next in OCTDIGITS: + this = this + sget() + literal(makechar(int(this[1:], 8) & 0xff)) + elif c in DIGITS: + isoctal = False + if s.next in DIGITS: + this = this + sget() + if (c in OCTDIGITS and this[2] in OCTDIGITS and + s.next in OCTDIGITS): + this = this + sget() + isoctal = True + literal(makechar(int(this[1:], 8) & 0xff)) + if not isoctal: + a((MARK, int(this[1:]))) + else: + try: + this = makechar(ESCAPES[this][1]) + except KeyError: + pass + literal(this) + else: + literal(this) + # convert template to groups and literals lists + i = 0 + groups = [] + groupsappend = groups.append + literals = [None] * len(p) + for c, s in p: + if c is MARK: + groupsappend((i, s)) + # literal[i] is already None + else: + literals[i] = s + i = i + 1 + return groups, literals + + +def expand_template(template, match): + g = match.group + sep = match.string[:0] + groups, literals = template + literals = literals[:] + try: + for index, group in groups: + literals[index] = s = g(group) + if s is None: + raise error("unmatched group") + except IndexError: + raise error("invalid group reference") + return sep.join(literals) diff --git a/gixy/core/utils.py b/gixy/core/utils.py new file mode 100644 index 0000000..08fdc57 --- /dev/null +++ b/gixy/core/utils.py @@ -0,0 +1,2 @@ +def is_indexed_name(name): + return isinstance(name, int) or (len(name) == 1 and '1' <= name <= '9') diff --git a/gixy/core/variable.py b/gixy/core/variable.py new file mode 100644 index 0000000..f41cef8 --- /dev/null +++ b/gixy/core/variable.py @@ -0,0 +1,114 @@ +import re +import logging + +from gixy.core.regexp import Regexp +from gixy.core.context import get_context + + +LOG = logging.getLogger(__name__) +# See ngx_http_script_compile in http/ngx_http_script.c +EXTRACT_RE = re.compile(r'\$([1-9]|[a-z_][a-z0-9_]*|\{[a-z0-9_]+\})', re.IGNORECASE) + + +def compile_script(script): + depends = [] + context = get_context() + for i, var in enumerate(EXTRACT_RE.split(str(script))): + if i % 2: + # Variable + var = var.strip('{}\x20') + var = context.get_var(var) + if var: + depends.append(var) + elif var: + # Literal + depends.append(Variable(name=None, value=var, have_script=False)) + return depends + + +class Variable(object): + def __init__(self, name, value=None, boundary=None, provider=None, have_script=True): + self.name = name + self.value = value + self.regexp = None + self.depends = None + self.boundary = boundary + self.provider = provider + if isinstance(value, Regexp): + self.regexp = value + elif have_script: + self.depends = compile_script(value) + + def can_contain(self, char): + # First of all check boundary set + if self.boundary and not self.boundary.can_contain(char): + return False + + # Then regexp + if self.regexp: + return self.regexp.can_contain(char, skip_literal=True) + + # Then dependencies + if self.depends: + return any(dep.can_contain(char) for dep in self.depends) + + # Otherwise user can't control value of this variable + return False + + def can_startswith(self, char): + # First of all check boundary set + if self.boundary and not self.boundary.can_startswith(char): + return False + + # Then regexp + if self.regexp: + return self.regexp.can_startswith(char) + + # Then dependencies + if self.depends: + return self.depends[0].can_startswith(char) + + # Otherwise user can't control value of this variable + return False + + def must_contain(self, char): + # First of all check boundary set + if self.boundary and self.boundary.must_contain(char): + return True + + # Then regexp + if self.regexp: + return self.regexp.must_contain(char) + + # Then dependencies + if self.depends: + return any(dep.must_contain(char) for dep in self.depends) + + # Otherwise checks literal + return self.value and char in self.value + + def must_startswith(self, char): + # First of all check boundary set + if self.boundary and self.boundary.must_startswith(char): + return True + + # Then regexp + if self.regexp: + return self.regexp.must_startswith(char) + + # Then dependencies + if self.depends: + return self.depends[0].must_startswith(char) + + # Otherwise checks literal + return self.value and self.value[0] == char + + @property + def providers(self): + result = [] + if self.provider: + result.append(self.provider) + if self.depends: + for dep in self.depends: + result += dep.providers + return result diff --git a/gixy/directives/__init__.py b/gixy/directives/__init__.py new file mode 100644 index 0000000..ae08050 --- /dev/null +++ b/gixy/directives/__init__.py @@ -0,0 +1,26 @@ +import os +from gixy.directives.directive import Directive + + +DIRECTIVES = {} + + +def import_directives(): + files_list = os.listdir(os.path.dirname(__file__)) + for directive_file in files_list: + if not directive_file.endswith(".py") or directive_file.startswith('_'): + continue + __import__('gixy.directives.' + os.path.splitext(directive_file)[0], None, None, ['']) + + +def get_all(): + if len(DIRECTIVES): + return DIRECTIVES + + import_directives() + for klass in Directive.__subclasses__(): + if not klass.nginx_name: + continue + DIRECTIVES[klass.nginx_name] = klass + + return DIRECTIVES diff --git a/gixy/directives/block.py b/gixy/directives/block.py new file mode 100644 index 0000000..e55b725 --- /dev/null +++ b/gixy/directives/block.py @@ -0,0 +1,175 @@ +from cached_property import cached_property + +from gixy.directives.directive import Directive +from gixy.core.variable import Variable +from gixy.core.regexp import Regexp + + +def get_overrides(): + result = {} + for klass in Block.__subclasses__(): + if not klass.nginx_name: + continue + + if not klass.__name__.endswith('Block'): + continue + + result[klass.nginx_name] = klass + return result + + +class Block(Directive): + nginx_name = None + is_block = True + self_context = True + + def __init__(self, name, args): + super(Block, self).__init__(name, args) + self.children = [] + + def some(self, name, flat=True): + for child in self.children: + if child.name == name: + return child + if flat and child.is_block and not child.self_context: + result = child.some(name, flat=flat) + if result: + return result + return None + + def find(self, name, flat=False): + result = [] + for child in self.children: + if child.name == name: + result.append(child) + if flat and child.is_block and not child.self_context: + result += child.find(name) + return result + + def find_recursive(self, name): + result = [] + for child in self.children: + if child.name == name: + result.append(child) + if child.is_block: + result += child.find_recursive(name) + return result + + def append(self, directive): + directive.set_parent(self) + self.children.append(directive) + + def __str__(self): + return '{} {} {}'.format(self.name, ' '.join(self.args), '{') + + +class Root(Block): + nginx_name = None + + def __init__(self): + super(Root, self).__init__(None, []) + + +class HttpBlock(Block): + nginx_name = 'http' + + def __init__(self, name, args): + super(HttpBlock, self).__init__(name, args) + + +class ServerBlock(Block): + nginx_name = 'server' + + def __init__(self, name, args): + super(ServerBlock, self).__init__(name, args) + + def get_names(self): + return self.find('server_name') + + def __str__(self): + server_names = [str(sn) for sn in self.find('server_name')] + if server_names: + return 'server {{\n{}'.format('\n'.join(server_names[:2])) + return 'server {' + + +class LocationBlock(Block): + nginx_name = 'location' + provide_variables = True + + def __init__(self, name, args): + super(LocationBlock, self).__init__(name, args) + if len(args) == 2: + self.modifier, self.path = args + else: + self.modifier = None + self.path = args[0] + + @property + def is_internal(self): + return self.some('internal') is not None + + @cached_property + def variables(self): + if not self.modifier or self.modifier not in ('~', '~*'): + return [] + + regexp = Regexp(self.path, case_sensitive=self.modifier == '~') + result = [] + for name, group in regexp.groups.items(): + result.append(Variable(name=name, value=group, boundary=None, provider=self)) + return result + + +class IfBlock(Block): + nginx_name = 'if' + self_context = False + + def __init__(self, name, args): + super(IfBlock, self).__init__(name, args) + self.operand = None + self.value = None + self.variable = None + + if len(args) == 1: + # if ($slow) + self.variable = args[0] + elif len(args) == 2: + # if (!-e $foo) + self.operand, self.value = args + elif len(args) == 3: + # if ($request_method = POST) + self.variable, self.operand, self.value = args + else: + raise Exception('Unknown "if" definition') + + def __str__(self): + return '{} ({}) {{'.format(self.name, ' '.join(self.args)) + + +class IncludeBlock(Block): + nginx_name = 'include' + self_context = False + + def __init__(self, name, args): + super(IncludeBlock, self).__init__(name, args) + self.file_path = args[0] + + def __str__(self): + return 'include {};'.format(self.file_path) + + +class MapBlock(Block): + nginx_name = 'map' + self_context = False + provide_variables = True + + def __init__(self, name, args): + super(MapBlock, self).__init__(name, args) + self.source = args[0] + self.variable = args[1].strip('$') + + @cached_property + def variables(self): + # TODO(buglloc): Finish him! + return [Variable(name=self.variable, value='', boundary=None, provider=self, have_script=False)] diff --git a/gixy/directives/directive.py b/gixy/directives/directive.py new file mode 100644 index 0000000..d9283bc --- /dev/null +++ b/gixy/directives/directive.py @@ -0,0 +1,119 @@ +from gixy.core.variable import Variable +from gixy.core.regexp import Regexp + + +def get_overrides(): + result = {} + for klass in Directive.__subclasses__(): + if not klass.nginx_name: + continue + + if not klass.__name__.endswith('Directive'): + continue + + result[klass.nginx_name] = klass + return result + + +class Directive(object): + nginx_name = None + is_block = False + provide_variables = False + + def __init__(self, name, args, raw=None): + self.name = name + self.parent = None + self.args = args + self._raw = raw + + def set_parent(self, parent): + self.parent = parent + + @property + def parents(self): + parent = self.parent + while parent: + yield parent + parent = parent.parent + + @property + def variables(self): + raise NotImplementedError() + + def __str__(self): + return '{} {};'.format(self.name, ' '.join(self.args)) + + +class AddHeaderDirective(Directive): + nginx_name = 'add_header' + + def __init__(self, name, args): + super(AddHeaderDirective, self).__init__(name, args) + self.header = args[0].lower() + self.value = args[1] + self.always = False + if len(args) > 2 and args[2] == 'always': + self.always = True + + +class SetDirective(Directive): + nginx_name = 'set' + provide_variables = True + + def __init__(self, name, args): + super(SetDirective, self).__init__(name, args) + self.variable = args[0].strip('$') + self.value = args[1] + + @property + def variables(self): + return [Variable(name=self.variable, value=self.value, provider=self)] + + +class SetByLuaDirective(Directive): + nginx_name = 'set_by_lua' + provide_variables = True + + def __init__(self, name, args): + super(SetByLuaDirective, self).__init__(name, args) + self.variable = args[0].strip('$') + self.value = args[1] + + @property + def variables(self): + return [Variable(name=self.variable, provider=self, have_script=False)] + + +class RewriteDirective(Directive): + nginx_name = 'rewrite' + provide_variables = True + boundary = Regexp('[^\s\r\n]') + + def __init__(self, name, args): + super(RewriteDirective, self).__init__(name, args) + self.pattern = args[0] + self.replace = args[1] + self.flag = None + if len(args) > 2: + self.flag = args[2] + + @property + def variables(self): + regexp = Regexp(self.pattern, case_sensitive=True) + result = [] + for name, group in regexp.groups.items(): + result.append(Variable(name=name, value=group, boundary=self.boundary, provider=self)) + return result + + +class RootDirective(Directive): + nginx_name = 'root' + provide_variables = True + + def __init__(self, name, args): + super(RootDirective, self).__init__(name, args) + self.path = args[0] + + @property + def variables(self): + return [Variable(name='document_root', value=self.path, provider=self)] diff --git a/gixy/formatters/__init__.py b/gixy/formatters/__init__.py new file mode 100644 index 0000000..1884b7b --- /dev/null +++ b/gixy/formatters/__init__.py @@ -0,0 +1,23 @@ +import os +from gixy.formatters.base import BaseFormatter + +FORMATTERS = {} + + +def import_formatters(): + files_list = os.listdir(os.path.dirname(__file__)) + for formatter_file in files_list: + if not formatter_file.endswith(".py") or formatter_file.startswith('_'): + continue + __import__('gixy.formatters.' + os.path.splitext(formatter_file)[0], None, None, ['']) + + +def get_all(): + if len(FORMATTERS): + return FORMATTERS + + import_formatters() + for klass in BaseFormatter.__subclasses__(): + FORMATTERS[klass.__name__.replace('Formatter', '').lower()] = klass + + return FORMATTERS diff --git a/gixy/formatters/base.py b/gixy/formatters/base.py new file mode 100644 index 0000000..6aeaa1d --- /dev/null +++ b/gixy/formatters/base.py @@ -0,0 +1,84 @@ +from __future__ import absolute_import + +from gixy.directives import block + + +class BaseFormatter(object): + skip_parents = {block.Root, block.HttpBlock} + + def format_reports(self, reports, stats): + raise NotImplementedError("Formatter must override format_reports function") + + def format(self, manager): + reports = [] + for result in manager.get_results(): + report = self._prepare_result(manager.root, + summary=result.summary, + severity=result.severity, + description=result.description, + issues=result.issues, + plugin=result.name, + help_url=result.help_url) + reports.extend(report) + + return self.format_reports(reports, manager.stats) + + def _prepare_result(self, root, issues, severity, summary, description, plugin, help_url): + result = {} + for issue in issues: + report = dict( + plugin=plugin, + summary=issue.summary or summary, + severity=issue.severity or severity, + description=issue.description or description, + help_url=issue.help_url or help_url, + reason=issue.reason or '', + ) + key = ''.join(report.values()) + report['directives'] = issue.directives + if key in result: + result[key]['directives'].extend(report['directives']) + else: + result[key] = report + + for report in result.values(): + if report['directives']: + config = self._resolve_config(root, report['directives']) + else: + config = '' + + del report['directives'] + report['config'] = config + yield report + + def _resolve_config(self, root, directives): + points = set() + for directive in directives: + points.add(directive) + points.update(p for p in directive.parents) + + result = self._traverse_tree(root, points, 0) + return '\n'.join(result) + + def _traverse_tree(self, tree, points, level): + result = [] + for leap in tree.children: + if leap not in points: + continue + printable = type(leap) not in self.skip_parents + # Special hack for includes + # TODO(buglloc): fix me + have_parentheses = type(leap) != block.IncludeBlock + + if printable: + if leap.is_block: + result.append('') + directive = str(leap).replace('\n', '\n' + '\t' * (level + 1)) + result.append('{:s}{:s}'.format('\t' * level, directive)) + + if leap.is_block: + result.extend(self._traverse_tree(leap, points, level + 1 if printable else level)) + if printable and have_parentheses: + result.append('{:s}}}'.format('\t' * level)) + + return result diff --git a/gixy/formatters/console.py b/gixy/formatters/console.py new file mode 100644 index 0000000..560ab66 --- /dev/null +++ b/gixy/formatters/console.py @@ -0,0 +1,13 @@ +from __future__ import absolute_import +from jinja2 import Environment, PackageLoader + +from gixy.formatters.base import BaseFormatter + + +class ConsoleFormatter(BaseFormatter): + def __init__(self): + env = Environment(loader=PackageLoader('gixy', 'formatters/templates'), trim_blocks=True, lstrip_blocks=True) + self.template = env.get_template('console.j2') + + def format_reports(self, reports, stats): + return self.template.render(issues=reports, stats=stats) diff --git a/gixy/formatters/json.py b/gixy/formatters/json.py new file mode 100644 index 0000000..b80e3e2 --- /dev/null +++ b/gixy/formatters/json.py @@ -0,0 +1,10 @@ +from __future__ import absolute_import + +import json + +from gixy.formatters.base import BaseFormatter + + +class JsonFormatter(BaseFormatter): + def format_reports(self, reports, stats): + return json.dumps(reports, sort_keys=True, indent=2, separators=(',', ': ')) diff --git a/gixy/formatters/templates/console.j2 b/gixy/formatters/templates/console.j2 new file mode 100644 index 0000000..caf341d --- /dev/null +++ b/gixy/formatters/templates/console.j2 @@ -0,0 +1,36 @@ +{% set colors = {'DEF': '\033[0m', 'TITLE': '\033[95m', 'UNSPECIFIED': '\033[0m', 'LOW': '\033[94m', 'MEDIUM': '\033[93m', 'HIGH': '\033[91m'} %} + + +{{ colors['TITLE'] }}==================== Results ==================={{ colors['DEF'] }} +{% if not issues %} +No issues found. +{% else %} + +{% for issue in issues|sort(attribute='severity') %} +{{ colors[issue.severity] }}Problem: [{{ issue.plugin }}] {{ issue.summary }} +{% if issue.description %} +Description: {{ issue.description }} +{% endif %} +{% if issue.help_url %} +Additional info: {{ issue.help_url }} +{% endif %} +{% if issue.reason %} +Reason: {{ issue.reason }} +{% endif %} +{{ colors['DEF'] }}Pseudo config:{{ issue.config }} + +{% if not loop.last %} +--------8<--------8<--------8<--------8<-------- + +{% endif %} +{% endfor %} +{% endif %} + +{% if stats %} +{{ colors['TITLE'] }}==================== Summary ==================={{ colors['DEF'] }} +Total issues: + Unspecified: {{ stats['UNSPECIFIED'] }} + Low: {{ stats['LOW'] }} + Medium: {{ stats['MEDIUM'] }} + High: {{ stats['HIGH'] }} +{% endif %} diff --git a/gixy/formatters/templates/text.j2 b/gixy/formatters/templates/text.j2 new file mode 100644 index 0000000..0f0362a --- /dev/null +++ b/gixy/formatters/templates/text.j2 @@ -0,0 +1,35 @@ + +==================== Results =================== +{% if not issues %} +No issues found. +{% else %} + +{% for issue in issues|sort(attribute='severity') %} +Problem: [{{ issue.plugin }}] {{ issue.summary }} +Severity: {{ issue.severity }} +{% if issue.description %} +Description: {{ issue.description }} +{% endif %} +{% if issue.help_url %} +Additional info: {{ issue.help_url }} +{% endif %} +{% if issue.reason %} +Reason: {{ issue.reason }} +{% endif %} +Pseudo config: {{ issue.config }} + +{% if not loop.last %} +--------8<--------8<--------8<--------8<-------- + +{% endif %} +{% endfor %} +{% endif %} + +{% if stats %} +==================== Summary =================== +Total issues: + Unspecified: {{ stats['UNSPECIFIED'] }} + Low: {{ stats['LOW'] }} + Medium: {{ stats['MEDIUM'] }} + High: {{ stats['HIGH'] }} +{% endif %} diff --git a/gixy/formatters/text.py b/gixy/formatters/text.py new file mode 100644 index 0000000..addfe07 --- /dev/null +++ b/gixy/formatters/text.py @@ -0,0 +1,13 @@ +from __future__ import absolute_import +from jinja2 import Environment, PackageLoader + +from gixy.formatters.base import BaseFormatter + + +class TextFormatter(BaseFormatter): + def __init__(self): + env = Environment(loader=PackageLoader('gixy', 'formatters/templates'), trim_blocks=True, lstrip_blocks=True) + self.template = env.get_template('text.j2') + + def format_reports(self, reports, stats): + return self.template.render(issues=reports, stats=stats) diff --git a/gixy/parser/__init__.py b/gixy/parser/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/gixy/parser/nginx_parser.py b/gixy/parser/nginx_parser.py new file mode 100644 index 0000000..a3e28ba --- /dev/null +++ b/gixy/parser/nginx_parser.py @@ -0,0 +1,142 @@ +"""NginxParser is a member object of the NginxConfigurator class.""" +import os +import glob +import logging +import fnmatch + +from pyparsing import ParseException + +from gixy.parser import raw_parser +from gixy.directives import block, directive + +LOG = logging.getLogger(__name__) + + +class NginxParser(object): + + def __init__(self, file_path, allow_includes=True): + self.base_file_path = file_path + self.cwd = os.path.dirname(file_path) + self.configs = {} + self.is_dump = False + self.allow_includes = allow_includes + self.directives = {} + self._init_directives() + + def parse(self, file_path, root=None): + LOG.debug("Parse file: {}".format(file_path)) + + if not root: + root = block.Root() + parser = raw_parser.RawParser() + parsed = parser.parse(file_path) + if len(parsed) and parsed[0].getName() == 'file_delimiter': + # Were parse nginx dump + LOG.info('Switched to parse nginx configuration dump.') + root_filename = self._prepare_dump(parsed) + self.is_dump = True + self.cwd = os.path.dirname(root_filename) + parsed = self.configs[root_filename] + + self.parse_block(parsed, root) + return root + + def parse_block(self, parsed_block, parent): + for parsed in parsed_block: + parsed_type = parsed.getName() + parsed_name = parsed[0] + parsed_args = parsed[1:] + if parsed_type == 'include': + # TODO: WTF?! + self._resolve_include(parsed_args, parent) + else: + directive_inst = self.directive_factory(parsed_type, parsed_name, parsed_args) + if directive_inst: + parent.append(directive_inst) + + def directive_factory(self, parsed_type, parsed_name, parsed_args): + klass = self._get_directive_class(parsed_type, parsed_name) + if not klass: + return None + + if klass.is_block: + args = [str(v).strip() for v in parsed_args[0]] + children = parsed_args[1] + + inst = klass(parsed_name, args) + self.parse_block(children, inst) + return inst + else: + args = [str(v).strip() for v in parsed_args] + return klass(parsed_name, args) + + def _get_directive_class(self, parsed_type, parsed_name): + if parsed_type in self.directives and parsed_name in self.directives[parsed_type]: + return self.directives[parsed_type][parsed_name] + elif parsed_type == 'block': + return block.Block + elif parsed_type == 'directive': + return directive.Directive + elif parsed_type == 'unparsed_block': + LOG.warning('Skip unparseable block: "%s"', parsed_name) + return None + else: + return None + + def _init_directives(self): + self.directives['block'] = block.get_overrides() + self.directives['directive'] = directive.get_overrides() + + def _resolve_include(self, args, parent): + pattern = args[0] + # TODO(buglloc): maybe file providers? + if self.is_dump: + return self._resolve_dump_include(pattern=pattern, parent=parent) + if not self.allow_includes: + LOG.debug('Includes are disallowed, skip: {}'.format(pattern)) + return + + return self._resolve_file_include(pattern=pattern, parent=parent) + + def _resolve_file_include(self, pattern, parent): + path = os.path.join(self.cwd, pattern) + file_path = None + for file_path in glob.iglob(path): + include = block.IncludeBlock('include', [file_path]) + parent.append(include) + try: + self.parse(file_path, include) + except ParseException as e: + LOG.error('Failed to parse include "{file}": {error}'.format(file=file_path, error=str(e))) + + if not file_path: + LOG.warning("File not found: {}".format(path)) + + def _resolve_dump_include(self, pattern, parent): + path = os.path.join(self.cwd, pattern) + founded = False + for file_path, parsed in self.configs.items(): + if fnmatch.fnmatch(file_path, path): + founded = True + include = block.IncludeBlock('include', [file_path]) + parent.append(include) + try: + self.parse_block(parsed, include) + except ParseException as e: + LOG.error('Failed to parse include "{file}": {error}'.format(file=file_path, error=str(e))) + + if not founded: + LOG.warning("File not found: {}".format(path)) + + def _prepare_dump(self, parsed_block): + filename = '' + root_filename = '' + for parsed in parsed_block: + if parsed.getName() == 'file_delimiter': + if not filename: + root_filename = parsed[0] + filename = parsed[0] + self.configs[filename] = [] + continue + self.configs[filename].append(parsed) + return root_filename diff --git a/gixy/parser/raw_parser.py b/gixy/parser/raw_parser.py new file mode 100644 index 0000000..99946fd --- /dev/null +++ b/gixy/parser/raw_parser.py @@ -0,0 +1,164 @@ +"""Very low-level nginx config parser based on pyparsing.""" +import re +import logging +from cached_property import cached_property + +from pyparsing import ( + Literal, Suppress, White, Word, alphanums, Forward, Group, Optional, Combine, + Keyword, OneOrMore, ZeroOrMore, Regex, QuotedString, nestedExpr) + + +LOG = logging.getLogger(__name__) + + +class NginxQuotedString(QuotedString): + def __init__(self, quoteChar): + super(NginxQuotedString, self).__init__(quoteChar, escChar='\\', multiline=True) + # Nginx parse quoted values in special manner: + # '^https?:\/\/yandex\.ru\/\00\'\"' -> ^https?:\/\/yandex\.ru\/\00'" + # TODO(buglloc): research and find another special characters! + + self.escCharReplacePattern = '\\\\(\'|")' + + +class RawParser(object): + """ + A class that parses nginx configuration with pyparsing + """ + + def __init__(self): + self._script = None + + def parse(self, file_path): + """ + Returns the parsed tree. + """ + # Temporary, dirty hack :( + content = open(file_path).read() + content = re.sub(r'(if\s.+)\)\)(\s*\{)?$', '\\1) )\\2', content, flags=re.MULTILINE) + return self.script.parseString(content, parseAll=True) + # return self.script.parseFile(file_path, parseAll=True) + + @cached_property + def script(self): + # constants + left_bracket = Suppress("{") + right_bracket = Suppress("}") + semicolon = Suppress(";") + space = White().suppress() + keyword = Word(alphanums + ".+-_/") + path = Word(alphanums + ".-_/") + variable = Word("$_-" + alphanums) + value_wq = Regex(r'(?:\([^\s;]*\)|\$\{\w+\}|[^\s;(){}])+') + value_sq = NginxQuotedString(quoteChar="'") + value_dq = NginxQuotedString(quoteChar='"') + value = (value_dq | value_sq | value_wq) + # modifier for location uri [ = | ~ | ~* | ^~ ] + location_modifier = ( + Keyword("=") | + Keyword("~*") | Keyword("~") | + Keyword("^~")) + # modifier for if statement + if_modifier = Combine(Optional("!") + ( + Keyword("=") | + Keyword("~*") | Keyword("~") | + (Literal("-") + (Literal("f") | Literal("d") | Literal("e") | Literal("x"))))) + condition = ( + (if_modifier + Optional(space) + value) | + (variable + Optional(space + if_modifier + Optional(space) + value)) + ) + + # rules + include = ( + Keyword("include") + + space + + value + + semicolon + )("include") + + directive = ( + keyword + + ZeroOrMore(space + value) + + semicolon + )("directive") + + file_delimiter = ( + Suppress("# configuration file ") + + path + + Suppress(":") + )("file_delimiter") + + comment = ( + Suppress('#') + + Regex(r".*") + )("comment") + + hash_value = Group( + value + + ZeroOrMore(space + value) + + semicolon + )("hash_value") + + generic_block = Forward() + if_block = Forward() + location_block = Forward() + hash_block = Forward() + unparsed_block = Forward() + + sub_block = OneOrMore(Group(if_block | + location_block | + hash_block | + generic_block | + include | + directive | + file_delimiter | + comment | + unparsed_block)) + + if_block << ( + Keyword("if") + + Suppress("(") + + Group(condition) + + Suppress(")") + + Group( + left_bracket + + Optional(sub_block) + + right_bracket) + )("block") + + location_block << ( + Keyword("location") + + Group( + Optional(space + location_modifier) + + Optional(space) + value) + + Group( + left_bracket + + Optional(sub_block) + + right_bracket) + )("block") + + hash_block << ( + keyword + + Group(OneOrMore(space + variable)) + + Group( + left_bracket + + Optional(OneOrMore(hash_value)) + + right_bracket) + )("block") + + generic_block << ( + keyword + + Group(ZeroOrMore(space + variable)) + + Group( + left_bracket + + Optional(sub_block) + + right_bracket) + )("block") + + unparsed_block << ( + keyword + + Group(ZeroOrMore(space + variable)) + + nestedExpr(opener="{", closer="}") + )("unparsed_block") + + return sub_block diff --git a/gixy/plugins/__init__.py b/gixy/plugins/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/gixy/plugins/_internal_rewrite.py b/gixy/plugins/_internal_rewrite.py new file mode 100644 index 0000000..6cd1cf3 --- /dev/null +++ b/gixy/plugins/_internal_rewrite.py @@ -0,0 +1,87 @@ +import re +import logging +import gixy +from gixy.plugins.plugin import Plugin +from gixy.core.regexp import Regexp +from gixy.core.variable import EXTRACT_RE +from gixy.core.utils import is_indexed_name + + +LOG = logging.getLogger(__name__) + +# TODO(buglloc): Complete it! + + +class internal_rewrite(Plugin): + """ + Insecure example: + location ~* ^/internal-proxy/(https?)/(.*?)/(.*) { + internal; + proxy_pass $1://$2/$3; + } + + rewrite "^/([^?.]+[^/?.])(?:\?(.*))?$" "/$1.xml" last; + """ + + summary = 'Some internal rewrite' + severity = gixy.severity.HIGH + description = 'Some descr' + help_url = 'https://github.com/yandex/gixy/wiki/ru/internalrewrite' + directives = ['location'] + + def audit(self, directive): + if not directive.is_internal: + # Not internal location + return + + values = _gen_location_values(directive) + # print([x for x in values]) + for rewrite in directive.parent.find('rewrite', flat=True): + if rewrite.flag not in {None, 'last', 'break'}: + # Not internal rewrite + continue + rewrite_regex = _construct_rewrite_regex(rewrite) + if not rewrite_regex: + # We can't build results regexp :( + continue + + for value in values: + if re.match(rewrite_regex, value): + # YAY! + self.add_issue([directive, rewrite]) + + +def _gen_location_values(location): + if location.modifier not in ('~', '~*'): + # Prefixed location + return [location.path] + + regex = Regexp(location.path, case_sensitive=location.modifier == '~*', strict=True) + return regex.generate(char='a', anchored=False) + + +def _construct_rewrite_regex(rewrite): + regex = Regexp(rewrite.pattern, case_sensitive=True) + parts = {} + for name, group in regex.groups.items(): + parts[name] = group + + return _compile_script(rewrite.replace, parts) + + +def _compile_script(script, parts): + result = [] + for i, var in enumerate(EXTRACT_RE.split(str(script))): + if i % 2: + # Variable + var = var.strip('{}\x20') + if is_indexed_name(var): + var = int(var) + if var not in parts: + LOG.warn('Can\'t find variable "{}"'.format(var)) + return + result.append(str(parts[var])) + elif var: + # Literal + result.append(var) + return ''.join(result) diff --git a/gixy/plugins/add_header_multiline.py b/gixy/plugins/add_header_multiline.py new file mode 100644 index 0000000..0693a8c --- /dev/null +++ b/gixy/plugins/add_header_multiline.py @@ -0,0 +1,46 @@ +import gixy +from gixy.plugins.plugin import Plugin + + +class add_header_multiline(Plugin): + """ + Insecure example: +add_header Content-Security-Policy " + default-src: 'none'; + img-src data: https://mc.yandex.ru https://yastatic.net *.yandex.net https://mc.yandex.${tld} https://mc.yandex.ru; + font-src data: https://yastatic.net;"; + """ + summary = 'Found a multi-line header.' + severity = gixy.severity.LOW + description = ('Multi-line headers are deprecated (see RFC 7230). ' + 'Some clients never supports them (e.g. IE/Edge).') + help_url = 'https://github.com/yandex/gixy/wiki/ru/addheadermultiline' + directives = ['add_header', 'more_set_headers'] + + def audit(self, directive): + header_values = get_header_values(directive) + for value in header_values: + if '\n\x20' in value or '\n\t' in value: + self.add_issue(directive=directive) + break + + +def get_header_values(directive): + if directive.name == 'add_header': + return [directive.args[1]] + + # See headers more documentation: https://github.com/openresty/headers-more-nginx-module#description + result = [] + skip_next = False + for arg in directive.args: + if arg in {'-s', '-t'}: + # Skip next value, because it's not a header + skip_next = True + elif arg.startswith('-'): + # Skip any options + pass + elif skip_next: + skip_next = False + elif not skip_next: + result.append(arg) + return result diff --git a/gixy/plugins/add_header_redefinition.py b/gixy/plugins/add_header_redefinition.py new file mode 100644 index 0000000..ac54063 --- /dev/null +++ b/gixy/plugins/add_header_redefinition.py @@ -0,0 +1,69 @@ +import gixy +from gixy.plugins.plugin import Plugin + + +class add_header_redefinition(Plugin): + """ + Insecure example: + server { + add_header X-Content-Type-Options nosniff; + location / { + add_header X-Frame-Options DENY; + } + } + """ + summary = 'Nested "add_header" drops parent headers.' + severity = gixy.severity.MEDIUM + description = ('"add_header" replaces ALL parent headers. ' + 'See documentation: http://nginx.org/en/docs/http/ngx_http_headers_module.html#add_header') + help_url = 'https://github.com/yandex/gixy/wiki/ru/addheaderredefinition' + directives = ['server', 'location', 'if'] + options = {'headers': {'x-frame-options', + 'x-content-type-options', + 'x-xss-protection', + 'content-security-policy', + 'strict-transport-security', + 'cache-control'} + } + + def __init__(self, config): + super(add_header_redefinition, self).__init__(config) + self.interesting_headers = self.config.get('headers') + + def audit(self, directive): + if not directive.is_block: + # Skip all not block directives + return + + actual_headers = get_headers(directive) + if not actual_headers: + return + + for parent in directive.parents: + parent_headers = get_headers(parent) + if not parent_headers: + continue + + diff = (parent_headers - actual_headers) & self.interesting_headers + + if len(diff): + self._report_issue(directive, parent, diff) + + break + + def _report_issue(self, current, parent, diff): + directives = [] + # Add headers from parent level + directives.extend(parent.find('add_header')) + # Add headers from current level + directives.extend(current.find('add_header')) + reason = 'Parent headers "{headers}" was dropped in current level'.format(headers='", "'.join(diff)) + self.add_issue(directive=directives, reason=reason) + + +def get_headers(directive): + headers = directive.find('add_header') + if not headers: + return set() + + return set(map(lambda d: d.header, headers)) diff --git a/gixy/plugins/force_https.py b/gixy/plugins/force_https.py new file mode 100644 index 0000000..c7d2503 --- /dev/null +++ b/gixy/plugins/force_https.py @@ -0,0 +1,20 @@ +import gixy +from gixy.plugins.plugin import Plugin + + +class force_https(Plugin): + """ + Insecure example: + rewrite ^.*/(foo)(/|/index.xml)?$ http://test.com/foo?; + """ + summary = 'Found redirection to HTTP URL.' + severity = gixy.severity.LOW + description = 'Should be https://... URL while redirection.' + help_url = 'https://github.com/yandex/gixy/wiki/ru/forcehttps' + directives = ['rewrite', 'return'] + + def audit(self, directive): + for a in directive.args: + if a.startswith('http://'): + self.add_issue(directive=directive) + break diff --git a/gixy/plugins/host_spoofing.py b/gixy/plugins/host_spoofing.py new file mode 100644 index 0000000..0c9ab71 --- /dev/null +++ b/gixy/plugins/host_spoofing.py @@ -0,0 +1,23 @@ +import gixy +from gixy.plugins.plugin import Plugin + + +class host_spoofing(Plugin): + """ + Insecure example: + proxy_set_header Host $http_host + """ + summary = 'The proxied Host header may be spoofed.' + severity = gixy.severity.MEDIUM + description = 'In most cases "$host" variable are more appropriate, just use it.' + help_url = 'https://github.com/yandex/gixy/wiki/ru/hostspoofing' + directives = ['proxy_set_header'] + + def audit(self, directive): + name, value = directive.args + if name.lower() != 'host': + # Not a "Host" header + return + + if value == '$http_host' or value.startswith('$arg_'): + self.add_issue(directive=directive) diff --git a/gixy/plugins/http_splitting.py b/gixy/plugins/http_splitting.py new file mode 100644 index 0000000..8206147 --- /dev/null +++ b/gixy/plugins/http_splitting.py @@ -0,0 +1,43 @@ +import gixy +from gixy.plugins.plugin import Plugin +from gixy.core.variable import compile_script + + +class http_splitting(Plugin): + """ + Insecure examples: + rewrite ^ http://$host$uri; + return 301 http://$host$uri; + proxy_set_header "X-Original-Uri" $uri; + proxy_pass http://upstream$document_uri; + + location ~ /proxy/(a|b)/(\W*)$ { + set $path $2; + proxy_pass http://storage/$path; + } + """ + + summary = 'Possible HTTP-Splitting vulnerability.' + severity = gixy.severity.HIGH + description = 'Using variables that can contain "\\n" may lead to http injection.' + help_url = 'https://github.com/yandex/gixy/wiki/ru/httpsplitting' + directives = ['rewrite', 'return', 'add_header', 'proxy_set_header', 'proxy_pass'] + + def audit(self, directive): + value = _get_value(directive) + if not value: + return + + for var in compile_script(value): + if not var.can_contain('\n'): + continue + reason = 'At least variable "${var}" can contain "\\n"'.format(var=var.name) + self.add_issue(directive=[directive] + var.providers, reason=reason) + + +def _get_value(directive): + if directive.name == 'proxy_pass' and len(directive.args) >= 1: + return directive.args[0] + elif len(directive.args) >= 2: + return directive.args[1] + return None diff --git a/gixy/plugins/origins.py b/gixy/plugins/origins.py new file mode 100644 index 0000000..258c453 --- /dev/null +++ b/gixy/plugins/origins.py @@ -0,0 +1,71 @@ +import re +import logging +import gixy +from gixy.plugins.plugin import Plugin +from gixy.core.regexp import Regexp + +LOG = logging.getLogger(__name__) + + +class origins(Plugin): + """ + Insecure example: + if ($http_referer !~ "^https?://([^/]+metrika.*yandex\.ru/"){ + add_header X-Frame-Options SAMEORIGIN; + } + """ + summary = 'Validation regex for "origin" or "referrer" matches untrusted domain.' + severity = gixy.severity.MEDIUM + description = 'Improve the regular expression to match only trusted referrers.' + help_url = 'https://github.com/yandex/gixy/wiki/ru/origins' + directives = ['if'] + options = { + 'domains': ['*'], + 'https_only': False + } + + def __init__(self, config): + super(origins, self).__init__(config) + if self.config.get('domains') and self.config.get('domains')[0] and self.config.get('domains')[0] != '*': + domains = '|'.join(re.escape(d) for d in self.config.get('domains')) + else: + domains = '[^/.]*\.[^/]{2,7}' + + scheme = 'https{http}'.format(http=('?' if not self.config.get('https_only') else '')) + regex = r'^{scheme}://(?:[^/.]*\.){{0,10}}(?:{domains})(?::\d*)?(?:/|\?|$)'.format( + scheme=scheme, + domains=domains + ) + self.valid_re = re.compile(regex) + + def audit(self, directive): + if directive.operand not in {'~', '~*', '!~', '!~*'}: + # Not regexp + return + + if directive.variable not in {'$http_referer', '$http_origin'}: + # Not interesting + return + + invalid_referers = set() + regexp = Regexp(directive.value, case_sensitive=(directive.operand in {'~', '!~'})) + for value in regexp.generate('/', anchored=True): + if value.startswith('^'): + value = value[1:] + else: + value = 'http://evil.com/' + value + + if value.endswith('$'): + value = value[:-1] + elif not value.endswith('/'): + value += '.evil.com' + + if not self.valid_re.match(value): + invalid_referers.add(value) + + if invalid_referers: + invalid_referers = '", "'.join(invalid_referers) + name = 'origin' if directive.variable == '$http_origin' else 'referrer' + severity = gixy.severity.HIGH if directive.variable == '$http_origin' else gixy.severity.MEDIUM + reason = 'Regex matches "{value}" as a valid {name}.'.format(value=invalid_referers, name=name) + self.add_issue(directive=directive, reason=reason, severity=severity) diff --git a/gixy/plugins/plugin.py b/gixy/plugins/plugin.py new file mode 100644 index 0000000..87a349f --- /dev/null +++ b/gixy/plugins/plugin.py @@ -0,0 +1,30 @@ +import gixy +from gixy.core.issue import Issue + + +class Plugin(object): + summary = '' + description = '' + help_url = '' + severity = gixy.severity.UNSPECIFIED + directives = [] + options = {} + + def __init__(self, config): + self._issues = [] + self.config = config + + def add_issue(self, directive, summary=None, severity=None, description=None, reason=None, help_url=None): + self._issues.append(Issue(self, directives=directive, summary=summary, severity=severity, + description=description, reason=reason, help_url=help_url)) + + def audit(self, directive): + pass + + @property + def issues(self): + return self._issues + + @property + def name(self): + return self.__class__.__name__ diff --git a/gixy/plugins/ssrf.py b/gixy/plugins/ssrf.py new file mode 100644 index 0000000..dd5a88f --- /dev/null +++ b/gixy/plugins/ssrf.py @@ -0,0 +1,62 @@ +import re + +import gixy +from gixy.plugins.plugin import Plugin +from gixy.core.context import get_context +from gixy.core.variable import compile_script + + +class ssrf(Plugin): + """ + Insecure examples: + location ~ /proxy/(.*)/(.*)/(.*)$ { + set $scheme $1; + set $host $2; + set $path $3; + proxy_pass $scheme://$host/$path; + } + + location /proxy/ { + proxy_pass $arg_some; + } + """ + + summary = 'Possible SSRF (Server Side Request Forgery) vulnerability.' + severity = gixy.severity.HIGH + description = 'The configuration may allow attacker to create a arbitrary requests from the vulnerable server.' + help_url = 'https://github.com/yandex/gixy/wiki/ru/ssrf' + directives = ['proxy_pass'] + + def __init__(self, config): + super(ssrf, self).__init__(config) + self.parse_uri_re = re.compile(r'(?P[^?#/)]+://)?(?P[^?#/)]+)') + + def audit(self, directive): + value = directive.args[0] + if not value: + return + + context = get_context() + if context.block.name == 'location' and context.block.is_internal: + # Exclude internal locations + return + + parsed = self.parse_uri_re.match(value) + if not parsed: + return + + res = self._check_script(parsed.group('scheme'), directive) + if not res: + self._check_script(parsed.group('host'), directive) + + def _check_script(self, script, directive): + for var in compile_script(script): + if var.must_contain('/'): + # Skip variable checks + return False + if var.can_contain('.'): + # Yay! Our variable can contain any symbols! + reason = 'At least variable "${var}" can contain untrusted user input'.format(var=var.name) + self.add_issue(directive=[directive] + var.providers, reason=reason) + return True + return False diff --git a/gixy/plugins/valid_referers.py b/gixy/plugins/valid_referers.py new file mode 100644 index 0000000..cfdec36 --- /dev/null +++ b/gixy/plugins/valid_referers.py @@ -0,0 +1,18 @@ +import gixy +from gixy.plugins.plugin import Plugin + + +class valid_referers(Plugin): + """ + Insecure example: + valid_referers none server_names *.webvisor.com; + """ + summary = 'Used "none" as valid referer.' + severity = gixy.severity.HIGH + description = 'Never trust undefined referer.' + help_url = 'https://github.com/yandex/gixy/wiki/ru/validreferers' + directives = ['valid_referers'] + + def audit(self, directive): + if 'none' in directive.args: + self.add_issue(directive=directive) diff --git a/requirements.dev.pip b/requirements.dev.pip new file mode 100644 index 0000000..3696f8e --- /dev/null +++ b/requirements.dev.pip @@ -0,0 +1,4 @@ +nose>=1.3.7 +mock>=2.0.0 +coverage>=4.3 +flake8>=3.2 \ No newline at end of file diff --git a/requirements.pip b/requirements.pip new file mode 100644 index 0000000..dbf6d5f --- /dev/null +++ b/requirements.pip @@ -0,0 +1,6 @@ +pyparsing>=1.5.5 +cached-property>=1.2.0 +argparse>=1.4.0 +six>=1.1.0 +Jinja2>=2.8 +ConfigArgParse>=0.11.0 \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..6c3174b --- /dev/null +++ b/setup.py @@ -0,0 +1,40 @@ +import re +from setuptools import setup, find_packages + +with open('gixy/__init__.py', 'r') as fd: + version = re.search(r'^version\s*=\s*[\'"]([^\'"]*)[\'"]', + fd.read(), re.MULTILINE).group(1) + +if not version: + raise RuntimeError('Cannot find version information') + +setup( + name='gixy', + version=version, + description='Nginx configuration [sec]analyzer', + keywords='nginx security lint static-analysis', + author='Yandex IS Team', + url='https://github.com/yandex/gixy', + install_requires=[ + 'pyparsing>=1.5.5', + 'cached-property>=1.2.0', + 'argparse>=1.4.0', + 'six>=1.1.0', + 'Jinja2>=2.8', + 'ConfigArgParse>=0.11.0' + ], + entry_points={ + 'console_scripts': ['gixy=gixy.cli.main:main'], + }, + packages=find_packages(exclude=['tests', 'tests.*']), + classifiers=[ + 'Development Status :: 3 - Alpha', + 'Environment :: Console', + 'Intended Audience :: System Administrators', + 'Intended Audience :: Developers', + 'Topic :: Security', + 'Topic :: Software Development :: Quality Assurance', + 'Topic :: Software Development :: Testing' + ], + include_package_data=True, + ) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/core/__init__.py b/tests/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/core/test_context.py b/tests/core/test_context.py new file mode 100644 index 0000000..a095c83 --- /dev/null +++ b/tests/core/test_context.py @@ -0,0 +1,137 @@ +from nose.tools import with_setup, assert_equals, assert_not_equals, assert_true +from gixy.core.context import get_context, pop_context, push_context, purge_context, CONTEXTS, Context +from gixy.directives.block import Root +from gixy.core.variable import Variable +from gixy.core.regexp import Regexp + + +def setup(): + assert_equals(len(CONTEXTS), 0) + + +def tear_down(): + purge_context() + + +@with_setup(setup, tear_down) +def test_push_pop_context(): + root_a = Root() + push_context(root_a) + assert_equals(len(CONTEXTS), 1) + root_b = Root() + push_context(root_b) + assert_equals(len(CONTEXTS), 2) + + poped = pop_context() + assert_equals(len(CONTEXTS), 1) + assert_equals(poped.block, root_b) + poped = pop_context() + assert_equals(len(CONTEXTS), 0) + assert_equals(poped.block, root_a) + + +@with_setup(setup, tear_down) +def test_push_get_purge_context(): + root = Root() + push_context(root) + assert_equals(len(CONTEXTS), 1) + assert_equals(get_context().block, root) + root = Root() + push_context(root) + assert_equals(len(CONTEXTS), 2) + assert_equals(get_context().block, root) + + purge_context() + assert_equals(len(CONTEXTS), 0) + + +@with_setup(setup, tear_down) +def test_add_variables(): + context = push_context(Root()) + assert_equals(len(context.variables['index']), 0) + assert_equals(len(context.variables['name']), 0) + + one_str_var = Variable('1') + context.add_var('1', one_str_var) + one_int_var = Variable(1) + context.add_var(1, one_int_var) + some_var = Variable('some') + context.add_var('some', some_var) + + assert_equals(len(context.variables['index']), 1) + assert_equals(context.variables['index'][1], one_int_var) + assert_equals(len(context.variables['name']), 1) + assert_equals(context.variables['name']['some'], some_var) + context.clear_index_vars() + assert_equals(len(context.variables['index']), 0) + assert_equals(len(context.variables['name']), 1) + assert_equals(context.variables['name']['some'], some_var) + + +@with_setup(setup, tear_down) +def test_get_variables(): + context = push_context(Root()) + assert_equals(len(context.variables['index']), 0) + assert_equals(len(context.variables['name']), 0) + + one_var = Variable(1) + context.add_var(1, one_var) + some_var = Variable('some') + context.add_var('some', some_var) + + assert_equals(context.get_var(1), one_var) + assert_equals(context.get_var('some'), some_var) + # Checks not existed variables, for now context may return None + assert_equals(context.get_var(0), None) + assert_equals(context.get_var('not_existed'), None) + # Checks builtins variables + assert_true(context.get_var('uri')) + assert_true(context.get_var('document_uri')) + assert_true(context.get_var('arg_asdsadasd')) + assert_true(context.get_var('args')) + + +@with_setup(setup, tear_down) +def test_context_depend_variables(): + push_context(Root()) + assert_equals(len(get_context().variables['index']), 0) + assert_equals(len(get_context().variables['name']), 0) + + get_context().add_var(1, Variable(1, value='one')) + get_context().add_var('some', Variable('some', value='some')) + + assert_equals(get_context().get_var(1).value, 'one') + assert_equals(get_context().get_var('some').value, 'some') + + # Checks top context variables are still exists + push_context(Root()) + assert_equals(get_context().get_var(1).value, 'one') + assert_equals(get_context().get_var('some').value, 'some') + + # Checks variable overriding + get_context().add_var('some', Variable('some', value='some_new')) + get_context().add_var('foo', Variable('foo', value='foo')) + assert_not_equals(get_context().get_var('some').value, 'some') + assert_equals(get_context().get_var('some').value, 'some_new') + assert_equals(get_context().get_var('foo').value, 'foo') + assert_equals(get_context().get_var(1).value, 'one') + + # Checks variables after restore previous context + pop_context() + assert_not_equals(get_context().get_var('some').value, 'some_new') + assert_equals(get_context().get_var('some').value, 'some') + assert_equals(get_context().get_var('foo'), None) + assert_equals(get_context().get_var(1).value, 'one') + + +@with_setup(setup, tear_down) +def test_push_failed_with_regexp_py35_gixy_10(): + push_context(Root()) + assert_equals(len(get_context().variables['index']), 0) + assert_equals(len(get_context().variables['name']), 0) + + regexp = Regexp('^/some/(.*?)') + for name, group in regexp.groups.items(): + get_context().add_var(name, Variable(name=name, value=group)) + + push_context(Root()) diff --git a/tests/core/test_regexp.py b/tests/core/test_regexp.py new file mode 100644 index 0000000..4dfc053 --- /dev/null +++ b/tests/core/test_regexp.py @@ -0,0 +1,401 @@ +from nose.tools import assert_true, assert_false, assert_equals +from gixy.core.regexp import Regexp + +''' +CATEGORIES: + sre_parse.CATEGORY_SPACE + sre_parse.CATEGORY_NOT_SPACE + sre_parse.CATEGORY_DIGIT + sre_parse.CATEGORY_NOT_DIGIT + sre_parse.CATEGORY_WORD + sre_parse.CATEGORY_NOT_WORD + ANY +''' + + +def test_positive_contains(): + cases = ( + (r'[a-z]', 'a'), + (r'[a-z]*', 'a'), + (r'[a-z]*?', 'a'), + (r'[a-z]+?', 'a'), + (r'[a-z]', 'z'), + (r'(?:a|b)', 'b'), + (r'(/|:|[a-z])', 'g'), + (r'[^a-z]', '/'), + (r'[^a-z]', '\n'), + (r'[^0]', '9'), + (r'[^0-2]', '3'), + (r'[^0123a-z]', '9'), + (r'\s', '\x20'), + (r'[^\s]', 'a'), + (r'\d', '1'), + (r'[^\d]', 'b'), + (r'\w', '_'), + (r'[^\w]', '\n'), + (r'\W', '\n'), + (r'[^\W]', 'a'), + (r'.', 'a') + ) + for case in cases: + regexp, char = case + yield check_positive_contain, regexp, char + + +def test_negative_contains(): + cases = ( + ('[a-z]', '1'), + ('[a-z]*', '2'), + ('[a-z]*?', '3'), + ('[a-z]+?', '4'), + ('[a-z]', '\n'), + ('(?:a|b)', 'c'), + ('(/|:|[a-z])', '\n'), + ('[^a-z]', 'a'), + ('[^0]', '0'), + ('[^0-2]', '0'), + ('[^0123a-z]', 'z'), + (r'\s', 'a'), + (r'[^\s]', '\n'), + (r'\d', 'f'), + (r'[^\d]', '2'), + (r'\w', '\n'), + (r'[^\w]', '_'), + (r'\W', 'a'), + (r'[^\W]', '\n'), + (r'.', '\n') + ) + for case in cases: + regexp, char = case + yield check_negative_contain, regexp, char + + +def test_groups_names(): + cases = ( + ('foo', [0]), + ('(1)(2)(?:3)', [0, 1, 2]), + ('(1)((2)|(?:3))', [0, 1, 2, 3]), + ("(?'pcre_7'1as)(?P(?2)|(?:3))", [0, 1, 2, 3, 'pcre_7', 'outer', 'inner']), + ('/proxy/(?.*)$', [0, 1, 'proxy']) + ) + for case in cases: + regexp, groups = case + yield check_groups_names, regexp, groups + + +def test_to_string(): + cases = ( + (r'foo', 'foo'), + (r'(1)(2)(?:3)', '(1)(2)(?:3)'), + (r'(1)((2)|(?:3))', '(1)((?:(2)|(?:3)))'), + (r'\w|1|3-5|[a-z]', '(?:[\w]|1|3\\-5|[a-z])'), + (r'(1|(?:3)|([4-6]))', '((?:1|(?:3)|([4-6])))'), + (r'(1|(?:3)|(?P[4-6]))', '((?:1|(?:3)|([4-6])))'), + (r'^sss', '^sss'), + (r'(^bb|11)$', '((?:^bb|11))$'), + (r'(http|https)', '(http(?:|s))'), + (r'1*', '1*'), + (r'1*?', '1*?'), + (r'1+', '1+'), + ) + for case in cases: + regexp, string = case + yield check_to_string, regexp, string + + +def test_positive_startswith(): + cases = ( + (r'foo', 'q', False), + (r'foo', 'f', True), + (r'^foo', 'f', False), + (r'(^foo)', 'f', False), + (r'(^foo)', 'f', True), + (r'(^foo|g)', 'f', True), + (r'(^foo|g)', 'g', True), + (r'(^foo|g)', 'q', False), + (r'^[^/]+', '\n', True), + (r'/[^/]+', '/', True), + (r'((a))', 'a', False), + (r'((a))', 'b', False), + (r'^[a-z]{0}0', '0', False), + (r'^[a-z]{1}0', 'a', False), + ) + for case in cases: + regexp, check, strict = case + yield check_positive_startswith, regexp, check, strict + + +def test_negative_startswith(): + cases = ( + (r'foo', '\n', False), + (r'foo', 'o', True), + (r'^foo', 'o', False), + (r'(^foo)', 'q', False), + (r'(^foo)', 'q', True), + (r'(^foo|g)', 'q', True), + (r'(^foo|g)', 'o', True), + (r'(^foo|g)', '\n', False), + (r'^[^/]+', '/', True), + (r'/[^/]+', 'a', True), + (r'((abc)|(ss))', 'b', True), + (r'^[a-z]{0}0', 'a', False), + (r'^[a-z]{0}0', 'g', False), + ) + for case in cases: + regexp, check, strict = case + yield check_negative_startswith, regexp, check, strict + + +def test_positive_must_contain(): + cases = ( + (r'abc', 'a'), + (r'abc', 'b'), + (r'abc', 'c'), + (r'3+', '3'), + (r'[0]', '0'), + (r'([0])', '0'), + (r'(?:[0])', '0'), + (r'(?:[0])|0|((((0))))', '0'), + ) + for case in cases: + regexp, char = case + yield check_positive_must_contain, regexp, char + + +def test_negative_must_contain(): + cases = ( + (r'[a-z]', '1'), + (r'2{0}1', '2'), + (r'3?', '3'), + (r'3*', '3'), + (r'3*?', '3'), + (r'3+a', 'b'), + (r'[a-z]', 'a'), + (r'(?:a|b)', 'a'), + (r'(?:a|b)', 'b'), + (r'(/|:|[a-z])', '/'), + (r'(/|:|[a-z])', 'z'), + (r'[^a-z]', '\n'), + (r'[^0]', '0'), + (r'[^0-2]', '0'), + (r'[^0123a-z]', 'z'), + (r'\s', '\x20'), + (r'[^\s]', '\n'), + (r'\d', '3'), + (r'[^\d]', 'a'), + (r'\w', 'a'), + (r'[^\w]', '\n'), + (r'\W', '\n'), + (r'[^\W]', 'a'), + (r'.', '\n') + ) + for case in cases: + regexp, char = case + yield check_negative_must_contain, regexp, char + + +def test_positive_must_startswith(): + cases = ( + (r'foo', 'f', True), + (r'^foo', 'f', False), + (r'(^foo)', 'f', True), + (r'^((a))', 'a', False), + (r'((a))', 'a', True), + (r'^[a-z]{0}0', '0', False), + (r'^a{1}0', 'a', False), + ) + for case in cases: + regexp, check, strict = case + yield check_positive_must_startswith, regexp, check, strict + + +def test_negative_must_startswith(): + cases = ( + (r'foo', 'o', False), + (r'^foo', 'o', False), + (r'(^foo)', 'o', False), + (r'[a-z]', '1', True), + (r'[a-z]', 'a', True), + (r'/[^/]+', 'a', True), + (r'3?', '3', True), + (r'3*', '3', True), + (r'3*?', '3', True), + (r'3+a', 'b', True), + (r'^((a))', 'b', False), + (r'((a))', 'a', False), + (r'^a{0}0', 'a', False), + ) + for case in cases: + regexp, check, strict = case + yield check_negative_must_startswith, regexp, check, strict + + +def test_generate(): + cases = ( + (r'foo', {'foo'}), + (r'^sss', {'^sss'}), + (r'(1)(2)(3)', {'123'}), + (r'(1)((2)|(?:3))', {'12', '13'}), + (r'(^1?2?|aa/)', {'^', '^1', '^2', '^12', 'aa/'}), + (r'^https?://yandex.ru', {'^http://yandex|ru', '^https://yandex|ru'}), + (r'(^bb|11)$', {'^bb$', '11$'}), + (r'(http|https)', {'http', 'https'}), + (r'1*', {'', '11111'}), + (r'1*?', {'', '11111'}), + (r'1{0}?2', {'2'}), + (r'1{0}2', {'2'}), + (r'1+', {'11111'}), + (r'[^/]?', {'', '|'}), + (r'^http://(foo|bar)|baz', {'^http://foo', '^http://bar', 'baz'}), + (r'[^\x00-\x7b|\x7e-\xff]', {'\x7d'}), + (r'(a|b|c)', {'a', 'b', 'c'}), + (r'[xyz]', {'x', 'y', 'z'}) + ) + for case in cases: + regexp, values = case + yield check_generate, regexp, values + + +def test_strict_generate(): + reg = Regexp('^foo|bar', strict=True) + assert_equals(sorted(reg.generate('|', anchored=True)), sorted({'^foo', '^bar'})) + + +def test_gen_anchor(): + + reg = Regexp('^some$') + val = next(reg.generate('', anchored=False)) + assert_equals(val, 'some') + + reg = Regexp('^some$') + val = next(reg.generate('', anchored=True)) + assert_equals(val, '^some$') + + reg = Regexp('^some$', strict=True) + val = next(reg.generate('', anchored=False)) + assert_equals(val, 'some') + + reg = Regexp('^some$', strict=True) + val = next(reg.generate('', anchored=True)) + assert_equals(val, '^some$') + + +def test_group_can_contains(): + source = '/some/(?P[^/:.]+)/' + reg = Regexp(source) + assert_true(reg.can_contain('\n'), + 'Whole regex "{}" can contains "{}"'.format(source, '\\n')) + + assert_true(reg.group(0).can_contain('\n'), + 'Group 0 from regex "{}" can contains "{}"'.format(source, '\\n')) + + assert_true(reg.group('action').can_contain('\n'), + 'Group "action" from regex "{}" can contains "{}"'.format(source, '\\n')) + + assert_true(reg.group(1).can_contain('\n'), + 'Group 1 from regex "{}" can contains "{}"'.format(source, '\\n')) + + assert_false(reg.group('action').can_contain('/'), + 'Group "action" from regex "{}" CAN\'T (!) contain "{}"'.format(source, '/')) + + +def check_positive_contain(regexp, char): + reg = Regexp(regexp, case_sensitive=True) + assert_true(reg.can_contain(char), + '"{}" should contain "{}"'.format(regexp, char)) + + reg = Regexp(regexp, case_sensitive=False) + char = char.upper() + assert_true(reg.can_contain(char), + '"{}" (case insensitive) should contain "{}"'.format(regexp, char)) + + +def check_negative_contain(regexp, char): + reg = Regexp(regexp, case_sensitive=True) + assert_false(reg.can_contain(char), + '"{}" should not contain "{}"'.format(regexp, char)) + + reg = Regexp(regexp, case_sensitive=False) + char = char.upper() + assert_false(reg.can_contain(char), + '"{}" (case insensitive) should not contain "{}"'.format(regexp, char)) + + +def check_positive_startswith(regexp, char, strict): + reg = Regexp(regexp, case_sensitive=True, strict=strict) + assert_true(reg.can_startswith(char), + '"{}" can start\'s with "{}"'.format(regexp, char)) + + reg = Regexp(regexp, case_sensitive=False, strict=strict) + char = char.upper() + assert_true(reg.can_startswith(char), + '"{}" (case insensitive) can start\'s with "{}"'.format(regexp, char)) + + +def check_negative_startswith(regexp, char, strict): + reg = Regexp(regexp, case_sensitive=True, strict=strict) + assert_false(reg.can_startswith(char), + '"{}" can\'t start\'s with "{}"'.format(regexp, char)) + + reg = Regexp(regexp, case_sensitive=False, strict=strict) + char = char.upper() + assert_false(reg.can_startswith(char), + '"{}" (case insensitive) can\'t start\'s with "{}"'.format(regexp, char)) + + +def check_groups_names(regexp, groups): + reg = Regexp(regexp) + assert_equals(set(reg.groups.keys()), set(groups)) + + +def check_to_string(regexp, string): + reg = Regexp(regexp) + assert_equals(str(reg), string) + + +def check_positive_must_contain(regexp, char): + reg = Regexp(regexp, case_sensitive=True) + assert_true(reg.must_contain(char), + '"{}" must contain with "{}"'.format(regexp, char)) + + reg = Regexp(regexp, case_sensitive=False) + char = char.upper() + assert_true(reg.must_contain(char), + '"{}" (case insensitive) must contain with "{}"'.format(regexp, char)) + + +def check_negative_must_contain(regexp, char): + reg = Regexp(regexp, case_sensitive=True) + assert_false(reg.must_contain(char), + '"{}" must NOT contain with "{}"'.format(regexp, char)) + + reg = Regexp(regexp, case_sensitive=False) + char = char.upper() + assert_false(reg.must_contain(char), + '"{}" (case insensitive) must NOT contain with "{}"'.format(regexp, char)) + + +def check_positive_must_startswith(regexp, char, strict): + reg = Regexp(regexp, case_sensitive=True, strict=strict) + assert_true(reg.must_startswith(char), + '"{}" MUST start\'s with "{}"'.format(regexp, char)) + + reg = Regexp(regexp, case_sensitive=False, strict=strict) + char = char.upper() + assert_true(reg.must_startswith(char), + '"{}" (case insensitive) MUST start\'s with "{}"'.format(regexp, char)) + + +def check_negative_must_startswith(regexp, char, strict): + reg = Regexp(regexp, case_sensitive=True, strict=strict) + assert_false(reg.must_startswith(char), + '"{}" MUST NOT start\'s with "{}"'.format(regexp, char)) + + reg = Regexp(regexp, case_sensitive=False, strict=strict) + char = char.upper() + assert_false(reg.must_startswith(char), + '"{}" (case insensitive) MUST NOT start\'s with "{}"'.format(regexp, char)) + +def check_generate(regexp, values): + reg = Regexp(regexp) + assert_equals(sorted(reg.generate('|', anchored=True)), sorted(values)) diff --git a/tests/core/test_variable.py b/tests/core/test_variable.py new file mode 100644 index 0000000..25c813c --- /dev/null +++ b/tests/core/test_variable.py @@ -0,0 +1,99 @@ +from nose.tools import assert_true, assert_false, assert_equals, with_setup +from gixy.core.context import get_context, push_context, purge_context +from gixy.directives.block import Root +from gixy.core.regexp import Regexp +from gixy.core.variable import Variable + +def setup(): + push_context(Root()) + + +def tear_down(): + purge_context() + + +@with_setup(setup, tear_down) +def test_literal(): + var = Variable(name='simple', value='$uri', have_script=False) + assert_false(var.depends) + assert_false(var.regexp) + assert_equals(var.value, '$uri') + + assert_false(var.can_startswith('$')) + assert_false(var.can_contain('i')) + assert_true(var.must_contain('$')) + assert_true(var.must_contain('u')) + assert_false(var.must_contain('a')) + assert_true(var.must_startswith('$')) + assert_false(var.must_startswith('u')) + + +@with_setup(setup, tear_down) +def test_regexp(): + var = Variable(name='simple', value=Regexp('^/.*')) + assert_false(var.depends) + assert_true(var.regexp) + + assert_true(var.can_startswith('/')) + assert_false(var.can_startswith('a')) + assert_true(var.can_contain('a')) + assert_false(var.can_contain('\n')) + assert_true(var.must_contain('/')) + assert_false(var.must_contain('a')) + assert_true(var.must_startswith('/')) + assert_false(var.must_startswith('a')) + + +@with_setup(setup, tear_down) +def test_script(): + get_context().add_var('foo', Variable(name='foo', value=Regexp('.*'))) + var = Variable(name='simple', value='/$foo') + assert_true(var.depends) + assert_false(var.regexp) + + assert_false(var.can_startswith('/')) + assert_false(var.can_startswith('a')) + assert_true(var.can_contain('/')) + assert_true(var.can_contain('a')) + assert_false(var.can_contain('\n')) + assert_true(var.must_contain('/')) + assert_false(var.must_contain('a')) + assert_true(var.must_startswith('/')) + assert_false(var.must_startswith('a')) + + +@with_setup(setup, tear_down) +def test_regexp_boundary(): + var = Variable(name='simple', value=Regexp('.*'), boundary=Regexp('/[a-z]', strict=True)) + assert_false(var.depends) + assert_true(var.regexp) + + assert_true(var.can_startswith('/')) + assert_false(var.can_startswith('a')) + assert_false(var.can_contain('/')) + assert_true(var.can_contain('a')) + assert_false(var.can_contain('0')) + assert_false(var.can_contain('\n')) + assert_true(var.must_contain('/')) + assert_false(var.must_contain('a')) + assert_true(var.must_startswith('/')) + assert_false(var.must_startswith('a')) + + +@with_setup(setup, tear_down) +def test_script_boundary(): + get_context().add_var('foo', Variable(name='foo', value=Regexp('.*'), boundary=Regexp('[a-z]', strict=True))) + var = Variable(name='simple', value='/$foo', boundary=Regexp('[/a-z0-9]', strict=True)) + assert_true(var.depends) + assert_false(var.regexp) + + assert_false(var.can_startswith('/')) + assert_false(var.can_startswith('a')) + assert_false(var.can_contain('/')) + assert_true(var.can_contain('a')) + assert_false(var.can_contain('\n')) + assert_false(var.can_contain('0')) + assert_true(var.must_contain('/')) + assert_false(var.must_contain('a')) + assert_true(var.must_startswith('/')) + assert_false(var.must_startswith('a')) diff --git a/tests/directives/__init__.py b/tests/directives/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/directives/test_block.py b/tests/directives/test_block.py new file mode 100644 index 0000000..5ac7ae0 --- /dev/null +++ b/tests/directives/test_block.py @@ -0,0 +1,208 @@ +from nose.tools import assert_equals, assert_is_instance, assert_is_not_none, assert_is_none, assert_true, assert_false +import mock +from six import StringIO +from six.moves import builtins +from gixy.parser.nginx_parser import NginxParser +from gixy.directives.block import * + +# TODO(buglloc): what about include block? + +def _get_parsed(config): + with mock.patch('%s.open' % builtins.__name__) as mock_open: + mock_open.return_value = StringIO(config) + root = NginxParser('/foo/bar', allow_includes=False).parse('/foo/bar') + return root.children[0] + + +def test_block(): + config = 'some {some;}' + + directive = _get_parsed(config) + assert_is_instance(directive, Block) + assert_true(directive.is_block) + assert_true(directive.self_context) + assert_false(directive.provide_variables) + + +def test_http(): + config = ''' +http { + default_type application/octet-stream; + sendfile on; + keepalive_timeout 65; +} + ''' + + directive = _get_parsed(config) + assert_is_instance(directive, HttpBlock) + assert_true(directive.is_block) + assert_true(directive.self_context) + assert_false(directive.provide_variables) + + +def test_server(): + config = ''' +server { + listen 80; + server_name _; + server_name cool.io; +} + + ''' + + directive = _get_parsed(config) + assert_is_instance(directive, ServerBlock) + assert_true(directive.is_block) + assert_true(directive.self_context) + assert_equals([d.args[0] for d in directive.get_names()], ['_', 'cool.io']) + assert_false(directive.provide_variables) + + +def test_location(): + config = ''' +location / { +} + ''' + + directive = _get_parsed(config) + assert_is_instance(directive, LocationBlock) + assert_true(directive.is_block) + assert_true(directive.self_context) + assert_true(directive.provide_variables) + assert_is_none(directive.modifier) + assert_equals(directive.path, '/') + assert_false(directive.is_internal) + + +def test_location_internal(): + config = ''' +location / { + internal; +} + ''' + + directive = _get_parsed(config) + assert_is_instance(directive, LocationBlock) + assert_true(directive.is_internal) + + +def test_location_modifier(): + config = ''' +location = / { +} + ''' + + directive = _get_parsed(config) + assert_is_instance(directive, LocationBlock) + assert_equals(directive.modifier, '=') + assert_equals(directive.path, '/') + + +def test_if(): + config = ''' +if ($some) { +} + ''' + + directive = _get_parsed(config) + assert_is_instance(directive, IfBlock) + assert_true(directive.is_block) + assert_false(directive.self_context) + assert_false(directive.provide_variables) + assert_equals(directive.variable, '$some') + assert_is_none(directive.operand) + assert_is_none(directive.value) + + +def test_if_modifier(): + config = ''' +if (-f /some) { +} + ''' + + directive = _get_parsed(config) + assert_is_instance(directive, IfBlock) + assert_equals(directive.operand, '-f') + assert_equals(directive.value, '/some') + assert_is_none(directive.variable) + + +def test_if_variable(): + config = ''' +if ($http_some = '/some') { +} + ''' + + directive = _get_parsed(config) + assert_is_instance(directive, IfBlock) + assert_equals(directive.variable, '$http_some') + assert_equals(directive.operand, '=') + assert_equals(directive.value, '/some') + + +def test_block_some_flat(): + config = ''' + some { + default_type application/octet-stream; + sendfile on; + if (-f /some/) { + keepalive_timeout 65; + } + } + ''' + + directive = _get_parsed(config) + for d in {'default_type', 'sendfile', 'keepalive_timeout'}: + c = directive.some(d, flat=True) + assert_is_not_none(c) + assert_equals(c.name, d) + + +def test_block_some_not_flat(): + config = ''' + some { + default_type application/octet-stream; + sendfile on; + if (-f /some/) { + keepalive_timeout 65; + } + } + ''' + + directive = _get_parsed(config) + c = directive.some('keepalive_timeout', flat=False) + assert_is_none(c) + + +def test_block_find_flat(): + config = ''' + some { + directive 1; + if (-f /some/) { + directive 2; + } + } + ''' + + directive = _get_parsed(config) + finds = directive.find('directive', flat=True) + assert_equals(len(finds), 2) + assert_equals([x.name for x in finds], ['directive', 'directive']) + assert_equals([x.args[0] for x in finds], ['1', '2']) + + +def test_block_find_not_flat(): + config = ''' + some { + directive 1; + if (-f /some/) { + directive 2; + } + } + ''' + + directive = _get_parsed(config) + finds = directive.find('directive', flat=False) + assert_equals(len(finds), 1) + assert_equals([x.name for x in finds], ['directive']) + assert_equals([x.args[0] for x in finds], ['1']) diff --git a/tests/directives/test_directive.py b/tests/directives/test_directive.py new file mode 100644 index 0000000..e846e2d --- /dev/null +++ b/tests/directives/test_directive.py @@ -0,0 +1,104 @@ +from nose.tools import assert_equals, assert_is_instance, assert_false, assert_true +import mock +from six import StringIO +from six.moves import builtins +from gixy.parser.nginx_parser import NginxParser +from gixy.directives.directive import * + + +def _get_parsed(config): + with mock.patch('%s.open' % builtins.__name__) as mock_open: + mock_open.return_value = StringIO(config) + return NginxParser('/foo/bar', allow_includes=False).parse('/foo/bar').children[0] + + +def test_directive(): + config = 'some "foo" "bar";' + + directive = _get_parsed(config) + assert_is_instance(directive, Directive) + assert_equals(directive.name, 'some') + assert_equals(directive.args, ['foo', 'bar']) + assert_equals(str(directive), 'some foo bar;') + + +def test_add_header(): + config = 'add_header "X-Foo" "bar";' + + directive = _get_parsed(config) + assert_is_instance(directive, AddHeaderDirective) + assert_equals(directive.name, 'add_header') + assert_equals(directive.args, ['X-Foo', 'bar']) + assert_equals(directive.header, 'x-foo') + assert_equals(directive.value, 'bar') + assert_false(directive.always) + assert_equals(str(directive), 'add_header X-Foo bar;') + + +def test_add_header_always(): + config = 'add_header "X-Foo" "bar" always;' + + directive = _get_parsed(config) + assert_is_instance(directive, AddHeaderDirective) + assert_equals(directive.name, 'add_header') + assert_equals(directive.args, ['X-Foo', 'bar', 'always']) + assert_equals(directive.header, 'x-foo') + assert_equals(directive.value, 'bar') + assert_true(directive.always) + assert_equals(str(directive), 'add_header X-Foo bar always;') + + +def test_set(): + config = 'set $foo bar;' + + directive = _get_parsed(config) + assert_is_instance(directive, SetDirective) + assert_equals(directive.name, 'set') + assert_equals(directive.args, ['$foo', 'bar']) + assert_equals(directive.variable, 'foo') + assert_equals(directive.value, 'bar') + assert_equals(str(directive), 'set $foo bar;') + assert_true(directive.provide_variables) + + +def test_rewrite(): + config = 'rewrite ^ http://some;' + + directive = _get_parsed(config) + assert_is_instance(directive, RewriteDirective) + assert_equals(directive.name, 'rewrite') + assert_equals(directive.args, ['^', 'http://some']) + assert_equals(str(directive), 'rewrite ^ http://some;') + assert_true(directive.provide_variables) + + assert_equals(directive.pattern, '^') + assert_equals(directive.replace, 'http://some') + assert_equals(directive.flag, None) + + +def test_rewrite_flags(): + config = 'rewrite ^/(.*)$ http://some/$1 redirect;' + + directive = _get_parsed(config) + assert_is_instance(directive, RewriteDirective) + assert_equals(directive.name, 'rewrite') + assert_equals(directive.args, ['^/(.*)$', 'http://some/$1', 'redirect']) + assert_equals(str(directive), 'rewrite ^/(.*)$ http://some/$1 redirect;') + assert_true(directive.provide_variables) + + assert_equals(directive.pattern, '^/(.*)$') + assert_equals(directive.replace, 'http://some/$1') + assert_equals(directive.flag, 'redirect') + + +def test_root(): + config = 'root /var/www/html;' + + directive = _get_parsed(config) + assert_is_instance(directive, RootDirective) + assert_equals(directive.name, 'root') + assert_equals(directive.args, ['/var/www/html']) + assert_equals(str(directive), 'root /var/www/html;') + assert_true(directive.provide_variables) + + assert_equals(directive.path, '/var/www/html') diff --git a/tests/parser/__init__.py b/tests/parser/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/parser/test_nginx_parser.py b/tests/parser/test_nginx_parser.py new file mode 100644 index 0000000..2609df2 --- /dev/null +++ b/tests/parser/test_nginx_parser.py @@ -0,0 +1,114 @@ +from nose.tools import assert_is_instance, assert_equal +import mock +from six import StringIO +from six.moves import builtins +from gixy.parser.nginx_parser import NginxParser +from gixy.directives.directive import * +from gixy.directives.block import * + + +def _parse(config): + with mock.patch('%s.open' % builtins.__name__) as mock_open: + mock_open.return_value = StringIO(config) + return NginxParser('/foo/bar', allow_includes=False).parse('/foo/bar') + + +def test_directive(): + configs = [ + 'access_log syslog:server=127.0.0.1,tag=nginx_sentry toolsformat;', + 'user http;', + 'internal;', + 'set $foo "bar";', + "set $foo 'bar';", + 'proxy_pass http://unix:/run/sock.socket;', + 'rewrite ^/([a-zA-Z0-9]+)$ /$1/${arg_v}.pb break;' + ] + + expected = [ + [Directive], + [Directive], + [Directive], + [Directive, SetDirective], + [Directive], + [Directive, RewriteDirective] + ] + + for i, config in enumerate(configs): + return assert_config, config, expected[i] + + +def test_blocks(): + configs = [ + 'if (-f /some) {}', + 'location / {}' + ] + + expected = [ + [Directive, Block, IfBlock], + [Directive, Block, LocationBlock], + ] + + for i, config in enumerate(configs): + yield assert_config, config, expected[i] + + +def test_dump_simple(): + config = ''' +# configuration file /etc/nginx/nginx.conf: +http { + include sites/*.conf; +} + +# configuration file /etc/nginx/conf.d/listen: +listen 80; + +# configuration file /etc/nginx/sites/default.conf: +server { + include conf.d/listen; +} + ''' + + tree = _parse(config) + assert_is_instance(tree, Directive) + assert_is_instance(tree, Block) + assert_is_instance(tree, Root) + + assert_equal(len(tree.children), 1) + http = tree.children[0] + assert_is_instance(http, Directive) + assert_is_instance(http, Block) + assert_is_instance(http, HttpBlock) + + assert_equal(len(http.children), 1) + include_server = http.children[0] + assert_is_instance(include_server, Directive) + assert_is_instance(include_server, IncludeBlock) + assert_equal(include_server.file_path, '/etc/nginx/sites/default.conf') + + assert_equal(len(include_server.children), 1) + server = include_server.children[0] + assert_is_instance(server, Directive) + assert_is_instance(server, Block) + assert_is_instance(server, ServerBlock) + + assert_equal(len(server.children), 1) + include_listen = server.children[0] + assert_is_instance(include_listen, Directive) + assert_is_instance(include_listen, IncludeBlock) + assert_equal(include_listen.file_path, '/etc/nginx/conf.d/listen') + + assert_equal(len(include_listen.children), 1) + listen = include_listen.children[0] + assert_is_instance(listen, Directive) + assert_equal(listen.args, ['80']) + + +def assert_config(config, expected): + tree = _parse(config) + assert_is_instance(tree, Directive) + assert_is_instance(tree, Block) + assert_is_instance(tree, Root) + + child = tree.children[0] + for ex in expected: + assert_is_instance(child, ex) diff --git a/tests/parser/test_raw_parser.py b/tests/parser/test_raw_parser.py new file mode 100644 index 0000000..400db67 --- /dev/null +++ b/tests/parser/test_raw_parser.py @@ -0,0 +1,470 @@ +from nose.tools import assert_equals +import mock +from six import StringIO +from six.moves import builtins +from gixy.parser.raw_parser import * + + +def test_directive(): + config = ''' +access_log syslog:server=127.0.0.1,tag=nginx_sentry toolsformat; +user http; +internal; +set $foo "bar"; +set $foo 'bar'; +proxy_pass http://unix:/run/sock.socket; +rewrite ^/([a-zA-Z0-9]+)$ /$1/${arg_v}.pb break; + server_name some.tld ~^(www\.)?podberi.(?:ru|com|ua)$ + ~^(www\.)?guru.yandex.ru$; + ''' + + expected = [ + ['access_log', 'syslog:server=127.0.0.1,tag=nginx_sentry', 'toolsformat'], + ['user', 'http'], + ['internal'], + ['set', '$foo', 'bar'], + ['set', '$foo', 'bar'], + ['proxy_pass', 'http://unix:/run/sock.socket'], + ['rewrite', '^/([a-zA-Z0-9]+)$', '/$1/${arg_v}.pb', 'break'], + ['server_name', 'some.tld', '~^(www\.)?podberi.(?:ru|com|ua)$', '~^(www\.)?guru.yandex.ru$'] + ] + + assert_config(config, expected) + + +def test_block(): + config = ''' +http { +} + ''' + + expected = [['http', [], []]] + assert_config(config, expected) + + +def test_block_with_child(): + config = ''' +http { + gzip on; +} + ''' + + expected = [['http', [], [['gzip', 'on']]]] + assert_config(config, expected) + + +def test_location_simple(): + config = ''' +location / { +} +location = /foo { +} +location ~ ^/bar { +} +location ~* ^/baz$ { +} +location ^~ ^/bazz { +} +# Whitespace may be omitted:(( +location ~\.(js|css)$ { +} + ''' + + expected = [['location', ['/'], []], + ['location', ['=', '/foo'], []], + ['location', ['~', '^/bar'], []], + ['location', ['~*', '^/baz$'], []], + ['location', ['^~', '^/bazz'], []], + ['Whitespace may be omitted:(('], + ['location', ['~', '\.(js|css)$'], []]] + + assert_config(config, expected) + + +def test_quoted_strings(): + config = ''' +some_sq '\\'la\\.\\/\\"'; +some_dq '\\'la\\.\\/\\"'; + ''' + + expected = [['some_sq', '\'la\\.\\/\"'], + ['some_dq', '\'la\\.\\/\"']] + + assert_config(config, expected) + + +def test_location_child(): + config = ''' +location = /foo { + proxy_pass http://unix:/run/sock.socket; +} + ''' + + expected = [['location', ['=', '/foo'], [ + ['proxy_pass', 'http://unix:/run/sock.socket'] + ]]] + assert_config(config, expected) + + +def test_nested_location(): + config = ''' +location ~* ^/foo { + location = /foo/bar { + internal; + proxy_pass http://any.yandex.ru; + } + + location = /foo/baz { + proxy_pass upstream; + } +} + ''' + + expected = [['location', ['~*', '^/foo'], [ + ['location', ['=', '/foo/bar'], [ + ['internal'], + ['proxy_pass', 'http://any.yandex.ru'] + ]], + ['location', ['=', '/foo/baz'], [ + ['proxy_pass', 'upstream'] + ]], + ]]] + + assert_config(config, expected) + + +def test_hash_block(): + config = ''' +geo $geo { + default 0; + + 127.0.0.1 2; + 192.168.1.0/24 1; + 10.1.0.0/16 1; + + ::1 2; + 2001:0db8::/32 1; +} + ''' + + expected = [['geo', ['$geo'], [ + ['default', '0'], + ['127.0.0.1', '2'], + ['192.168.1.0/24', '1'], + ['10.1.0.0/16', '1'], + ['::1', '2'], + ['2001:0db8::/32', '1'] + ]]] + + assert_config(config, expected) + + +def test_hash_block_in_location(): + config = ''' +location /iphone/ { + types { + text/html html htm shtml; + application/json json; + application/rss+xml rss; + text/vnd.sun.j2me.app-descriptor jad; + } +} + ''' + + expected = [['location', ['/iphone/'], [ + ['types', [], [ + ['text/html', 'html', 'htm', 'shtml'], + ['application/json', 'json'], + ['application/rss+xml', 'rss'], + ['text/vnd.sun.j2me.app-descriptor', 'jad'] + ]], + ]]] + + assert_config(config, expected) + + +def test_named_location(): + config = ''' +location @foo { + proxy_pass http://any.yandex.ru; +} + ''' + + expected = [['location', ['@foo'], [ + ['proxy_pass', 'http://any.yandex.ru'] + ]]] + + assert_config(config, expected) + + +def test_if(): + config = ''' +# http://nginx.org/ru/docs/http/ngx_http_rewrite_module.html#if + +if ($http_user_agent ~ MSIE) { + rewrite ^(.*)$ /msie/$1 break; +} + +if ($http_cookie ~* "id=([^;]+)(?:;|$)") { + set $id $1; +} + +if ($request_method = POST) { + return 405; +} + +if ($slow) { + limit_rate 10k; +} + +if ($invalid_referer) { + return 403; +} + +if (!-e "/var/data/$dataset") { + return 503; +} + +if ($https_or_slb = (by_slb|https)) { +} + +if ($host ~* (lori|rage2)\.yandex\.(ru|ua|com|com\.tr)) { + set $x_frame_options ALLOW; +} + ''' + + expected = [ + ['http://nginx.org/ru/docs/http/ngx_http_rewrite_module.html#if'], + ['if', ['$http_user_agent', '~', 'MSIE'], [ + ['rewrite', '^(.*)$', '/msie/$1', 'break'] + ]], + ['if', ['$http_cookie', '~*', 'id=([^;]+)(?:;|$)'], [ + ['set', '$id', '$1'] + ]], + ['if', ['$request_method', '=', 'POST'], [ + ['return', '405'] + ]], + ['if', ['$slow'], [ + ['limit_rate', '10k'] + ]], + ['if', ['$invalid_referer'], [ + ['return', '403'] + ]], + ['if', ['!-e', '/var/data/$dataset'], [ + ['return', '503'] + ]], + ['if', ['$https_or_slb', '=', '(by_slb|https)'], [ + ]], + ['if', ['$host', '~*', '(lori|rage2)\.yandex\.(ru|ua|com|com\.tr)'], [ + ['set', '$x_frame_options', 'ALLOW'] + ]], + ] + + assert_config(config, expected) + + +def test_hash_block_map(): + config = ''' +# http://nginx.org/ru/docs/http/ngx_http_map_module.html + +map $http_host $name { + hostnames; + + default 0; + + example.com 1; + *.example.com 1; + example.org 2; + *.example.org 2; + .example.net 3; + wap.* 4; +} + +map $http_user_agent $mobile { + default 0; + "~Opera Mini" 1; +} + ''' + + expected = [ + ['http://nginx.org/ru/docs/http/ngx_http_map_module.html'], + ['map', ['$http_host', '$name'], [ + ['hostnames'], + ['default', '0'], + ['example.com', '1'], + ['*.example.com', '1'], + ['example.org', '2'], + ['*.example.org', '2'], + ['.example.net', '3'], + ['wap.*', '4'], + ]], + ['map', ['$http_user_agent', '$mobile'], [ + ['default', '0'], + ['~Opera Mini', '1'], + ]] + ] + + assert_config(config, expected) + + +def test_upstream(): + config = ''' +# http://nginx.org/ru/docs/http/ngx_http_upstream_module.html + +upstream backend { + server backend1.example.com weight=5; + server backend2.example.com:8080; + server unix:/tmp/backend3; + + server backup1.example.com:8080 backup; + server backup2.example.com:8080 backup; +} + +server { + location / { + proxy_pass http://backend; + } +} + ''' + + expected = [ + ['http://nginx.org/ru/docs/http/ngx_http_upstream_module.html'], + ['upstream', ['backend'], [ + ['server', 'backend1.example.com', 'weight=5'], + ['server', 'backend2.example.com:8080'], + ['server', 'unix:/tmp/backend3'], + ['server', 'backup1.example.com:8080', 'backup'], + ['server', 'backup2.example.com:8080', 'backup'], + ]], + ['server', [], [ + ['location', ['/'], [ + ['proxy_pass', 'http://backend'] + ]] + ]]] + + assert_config(config, expected) + + +def test_issue_8(): + config = ''' +# http://nginx.org/ru/docs/http/ngx_http_upstream_module.html +if ($http_referer ~* (\.(ru|ua|by|kz)/(pages/music|partners/|page-no-rights\.xml)) ) { + set $temp A; +} + ''' + + expected = [ + ['http://nginx.org/ru/docs/http/ngx_http_upstream_module.html'], + ['if', ['$http_referer', '~*', '(\.(ru|ua|by|kz)/(pages/music|partners/|page-no-rights\.xml))'], [ + ['set', '$temp', 'A'] + ]] + ] + + assert_config(config, expected) + + +def test_issue_11(): + config = ''' +init_by_lua_block { + tvm = require "nginx.tvm" +} + ''' + + expected = [ + ['init_by_lua_block', [], ['tvm', '=', 'require', '"nginx.tvm"']] + ] + + assert_config(config, expected) + + +def test_lua_block(): + config = ''' +# https://github.com/openresty/lua-nginx-module#typical-uses +location = /lua { + # MIME type determined by default_type: + default_type 'text/plain'; + + content_by_lua_block { + local res = ngx.location.capture("/some_other_location") + if res then + ngx.say("status: ", res.status) + ngx.say("body:") + ngx.print(res.body) + end + } +} + ''' + + expected = [ + ['https://github.com/openresty/lua-nginx-module#typical-uses'], + ['location', ['=', '/lua'], [ + ['MIME type determined by default_type:'], + ['default_type', 'text/plain'], + ['content_by_lua_block', [], [ + 'local', 'res', '=', 'ngx.location.capture(', '"/some_other_location"', ')', + 'if', 'res', 'then', + 'ngx.say(', '"status: "', ',', 'res.status)', + 'ngx.say(', '"body:"', ')', + 'ngx.print(res.body)', + 'end']] + ]] + ] + + assert_config(config, expected) + + +def test_lua_block_brackets(): + config = ''' +location = /foo { + rewrite_by_lua_block { + res = ngx.location.capture("/memc", + { args = { cmd = "incr", key = ngx.var.uri } } + ) + } + + proxy_pass http://blah.blah.com; +} + ''' + + expected = [ + ['location', ['=', '/foo'], [ + ['rewrite_by_lua_block', [], [ + 'res', '=', 'ngx.location.capture(', '"/memc"', ',', + ['args', '=', ['cmd', '=', '"incr"', ',', 'key', '=', 'ngx.var.uri']], + ')']], + ['proxy_pass', 'http://blah.blah.com'] + ]] + ] + + assert_config(config, expected) + + +def test_file_delims(): + config = ''' +# configuration file /etc/nginx/nginx.conf: +http { + include sites/*.conf; +} + +# configuration file /etc/nginx/sites/default.conf: +server { + +} + ''' + + expected = [ + ['/etc/nginx/nginx.conf'], + ['http', [], [ + ['include', 'sites/*.conf'] + ]], + ['/etc/nginx/sites/default.conf'], + ['server', [], []] + ] + + assert_config(config, expected) + +def assert_config(config, expected): + with mock.patch('%s.open' % builtins.__name__) as mock_open: + mock_open.return_value = StringIO(config) + actual = RawParser().parse('/foo/bar') + assert_equals(actual.asList(), expected) diff --git a/tests/plugins/__init__.py b/tests/plugins/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/plugins/simply/add_header_multiline/add_header.conf b/tests/plugins/simply/add_header_multiline/add_header.conf new file mode 100644 index 0000000..e5e9e04 --- /dev/null +++ b/tests/plugins/simply/add_header_multiline/add_header.conf @@ -0,0 +1,3 @@ +add_header Content-Security-Policy " + default-src: 'none'; + font-src data: https://yastatic.net;"; \ No newline at end of file diff --git a/tests/plugins/simply/add_header_multiline/add_header_fp.conf b/tests/plugins/simply/add_header_multiline/add_header_fp.conf new file mode 100644 index 0000000..61eb4c2 --- /dev/null +++ b/tests/plugins/simply/add_header_multiline/add_header_fp.conf @@ -0,0 +1 @@ +add_header X-Foo foo; \ No newline at end of file diff --git a/tests/plugins/simply/add_header_multiline/config.json b/tests/plugins/simply/add_header_multiline/config.json new file mode 100644 index 0000000..3c47ff0 --- /dev/null +++ b/tests/plugins/simply/add_header_multiline/config.json @@ -0,0 +1,3 @@ +{ + "severity": "LOW" +} \ No newline at end of file diff --git a/tests/plugins/simply/add_header_multiline/more_set_headers.conf b/tests/plugins/simply/add_header_multiline/more_set_headers.conf new file mode 100644 index 0000000..63c538a --- /dev/null +++ b/tests/plugins/simply/add_header_multiline/more_set_headers.conf @@ -0,0 +1,3 @@ +more_set_headers -t 'text/html text/plain' + 'X-Foo: Bar + multiline'; \ No newline at end of file diff --git a/tests/plugins/simply/add_header_multiline/more_set_headers_fp.conf b/tests/plugins/simply/add_header_multiline/more_set_headers_fp.conf new file mode 100644 index 0000000..7dfa331 --- /dev/null +++ b/tests/plugins/simply/add_header_multiline/more_set_headers_fp.conf @@ -0,0 +1,2 @@ +more_set_headers -t 'text/html text/plain' + 'X-Foo: Bar multiline'; \ No newline at end of file diff --git a/tests/plugins/simply/add_header_multiline/more_set_headers_multiple.conf b/tests/plugins/simply/add_header_multiline/more_set_headers_multiple.conf new file mode 100644 index 0000000..89cdc20 --- /dev/null +++ b/tests/plugins/simply/add_header_multiline/more_set_headers_multiple.conf @@ -0,0 +1,7 @@ +more_set_headers -t 'text/html text/plain' + 'X-Foo: some + multiline' + 'X-Bar: some + multiline' + 'X-Baz: some + multiline'; \ No newline at end of file diff --git a/tests/plugins/simply/add_header_multiline/more_set_headers_replace.conf b/tests/plugins/simply/add_header_multiline/more_set_headers_replace.conf new file mode 100644 index 0000000..5c3645e --- /dev/null +++ b/tests/plugins/simply/add_header_multiline/more_set_headers_replace.conf @@ -0,0 +1,2 @@ +more_set_headers -r 'Foo: + multiline'; \ No newline at end of file diff --git a/tests/plugins/simply/add_header_multiline/more_set_headers_replace_fp.conf b/tests/plugins/simply/add_header_multiline/more_set_headers_replace_fp.conf new file mode 100644 index 0000000..50f8c1e --- /dev/null +++ b/tests/plugins/simply/add_header_multiline/more_set_headers_replace_fp.conf @@ -0,0 +1 @@ +more_set_headers -r 'Foo: multiline'; \ No newline at end of file diff --git a/tests/plugins/simply/add_header_multiline/more_set_headers_status_fp.conf b/tests/plugins/simply/add_header_multiline/more_set_headers_status_fp.conf new file mode 100644 index 0000000..9868163 --- /dev/null +++ b/tests/plugins/simply/add_header_multiline/more_set_headers_status_fp.conf @@ -0,0 +1 @@ +more_set_headers -s 404 -s '500 503' 'Foo: bar'; \ No newline at end of file diff --git a/tests/plugins/simply/add_header_multiline/more_set_headers_type_fp.conf b/tests/plugins/simply/add_header_multiline/more_set_headers_type_fp.conf new file mode 100644 index 0000000..1cabb44 --- /dev/null +++ b/tests/plugins/simply/add_header_multiline/more_set_headers_type_fp.conf @@ -0,0 +1,2 @@ +more_set_headers -t 'text/html + text/plain' 'X-Foo: some'; \ No newline at end of file diff --git a/tests/plugins/simply/add_header_redefinition/config.json b/tests/plugins/simply/add_header_redefinition/config.json new file mode 100644 index 0000000..ffb101a --- /dev/null +++ b/tests/plugins/simply/add_header_redefinition/config.json @@ -0,0 +1,3 @@ +{ + "severity": "MEDIUM" +} \ No newline at end of file diff --git a/tests/plugins/simply/add_header_redefinition/duplicate_fp.conf b/tests/plugins/simply/add_header_redefinition/duplicate_fp.conf new file mode 100644 index 0000000..7fd5cf9 --- /dev/null +++ b/tests/plugins/simply/add_header_redefinition/duplicate_fp.conf @@ -0,0 +1,9 @@ +http { +add_header X-Frame-Options "DENY" always; + server { + location /new-headers { + add_header X-Frame-Options "DENY" always; + add_header X-Foo foo; + } + } +} \ No newline at end of file diff --git a/tests/plugins/simply/add_header_redefinition/if_replaces.conf b/tests/plugins/simply/add_header_redefinition/if_replaces.conf new file mode 100644 index 0000000..6ba27bf --- /dev/null +++ b/tests/plugins/simply/add_header_redefinition/if_replaces.conf @@ -0,0 +1,5 @@ +add_header X-Frame-Options "DENY" always; + +if (1) { + add_header X-Foo foo; +} \ No newline at end of file diff --git a/tests/plugins/simply/add_header_redefinition/location_replaces.conf b/tests/plugins/simply/add_header_redefinition/location_replaces.conf new file mode 100644 index 0000000..0222b81 --- /dev/null +++ b/tests/plugins/simply/add_header_redefinition/location_replaces.conf @@ -0,0 +1,5 @@ +add_header X-Frame-Options "DENY" always; + +location /new-headers { + add_header X-Foo foo; +} \ No newline at end of file diff --git a/tests/plugins/simply/add_header_redefinition/non_block_fp.conf b/tests/plugins/simply/add_header_redefinition/non_block_fp.conf new file mode 100644 index 0000000..c8c2da4 --- /dev/null +++ b/tests/plugins/simply/add_header_redefinition/non_block_fp.conf @@ -0,0 +1,3 @@ +add_header X-Frame-Options "DENY" always; +server "some"; +add_header X-Foo foo; diff --git a/tests/plugins/simply/add_header_redefinition/not_secure_both_fp.conf b/tests/plugins/simply/add_header_redefinition/not_secure_both_fp.conf new file mode 100644 index 0000000..d0d6753 --- /dev/null +++ b/tests/plugins/simply/add_header_redefinition/not_secure_both_fp.conf @@ -0,0 +1,5 @@ +add_header X-Bar bar; + +location /new-headers { + add_header X-Foo foo; +} \ No newline at end of file diff --git a/tests/plugins/simply/add_header_redefinition/not_secure_outer_fp.conf b/tests/plugins/simply/add_header_redefinition/not_secure_outer_fp.conf new file mode 100644 index 0000000..e50ee3d --- /dev/null +++ b/tests/plugins/simply/add_header_redefinition/not_secure_outer_fp.conf @@ -0,0 +1,5 @@ +add_header X-Bar bar; + +location /new-headers { + add_header X-Frame-Options "DENY" always; +} \ No newline at end of file diff --git a/tests/plugins/simply/add_header_redefinition/step_replaces.conf b/tests/plugins/simply/add_header_redefinition/step_replaces.conf new file mode 100644 index 0000000..0de7198 --- /dev/null +++ b/tests/plugins/simply/add_header_redefinition/step_replaces.conf @@ -0,0 +1,8 @@ +http { +add_header X-Frame-Options "DENY" always; + server { + location /new-headers { + add_header X-Foo foo; + } + } +} \ No newline at end of file diff --git a/tests/plugins/simply/force_https/config.json b/tests/plugins/simply/force_https/config.json new file mode 100644 index 0000000..3c47ff0 --- /dev/null +++ b/tests/plugins/simply/force_https/config.json @@ -0,0 +1,3 @@ +{ + "severity": "LOW" +} \ No newline at end of file diff --git a/tests/plugins/simply/force_https/return.conf b/tests/plugins/simply/force_https/return.conf new file mode 100644 index 0000000..fee32c1 --- /dev/null +++ b/tests/plugins/simply/force_https/return.conf @@ -0,0 +1 @@ +return 301 http://some.yandex.ru/; \ No newline at end of file diff --git a/tests/plugins/simply/force_https/rewrite.conf b/tests/plugins/simply/force_https/rewrite.conf new file mode 100644 index 0000000..2e23d6d --- /dev/null +++ b/tests/plugins/simply/force_https/rewrite.conf @@ -0,0 +1 @@ +rewrite ^ http://some.yandex.ru/ permanent; \ No newline at end of file diff --git a/tests/plugins/simply/force_https/simple_fp.conf b/tests/plugins/simply/force_https/simple_fp.conf new file mode 100644 index 0000000..5a1aba8 --- /dev/null +++ b/tests/plugins/simply/force_https/simple_fp.conf @@ -0,0 +1,2 @@ +rewrite ^ https://some.yandex.ru/ permanent; +return 301 https://some.yandex.ru/; \ No newline at end of file diff --git a/tests/plugins/simply/host_spoofing/config.json b/tests/plugins/simply/host_spoofing/config.json new file mode 100644 index 0000000..ffb101a --- /dev/null +++ b/tests/plugins/simply/host_spoofing/config.json @@ -0,0 +1,3 @@ +{ + "severity": "MEDIUM" +} \ No newline at end of file diff --git a/tests/plugins/simply/host_spoofing/http_fp.conf b/tests/plugins/simply/host_spoofing/http_fp.conf new file mode 100644 index 0000000..10f97a0 --- /dev/null +++ b/tests/plugins/simply/host_spoofing/http_fp.conf @@ -0,0 +1 @@ +proxy_set_header Host $host; \ No newline at end of file diff --git a/tests/plugins/simply/host_spoofing/http_host.conf b/tests/plugins/simply/host_spoofing/http_host.conf new file mode 100644 index 0000000..5bfe2c5 --- /dev/null +++ b/tests/plugins/simply/host_spoofing/http_host.conf @@ -0,0 +1 @@ +proxy_set_header Host $http_host; \ No newline at end of file diff --git a/tests/plugins/simply/host_spoofing/http_host_diff_case.conf b/tests/plugins/simply/host_spoofing/http_host_diff_case.conf new file mode 100644 index 0000000..82eb8de --- /dev/null +++ b/tests/plugins/simply/host_spoofing/http_host_diff_case.conf @@ -0,0 +1 @@ +proxy_set_header HoSt $http_host; \ No newline at end of file diff --git a/tests/plugins/simply/host_spoofing/some_arg.conf b/tests/plugins/simply/host_spoofing/some_arg.conf new file mode 100644 index 0000000..23b0b32 --- /dev/null +++ b/tests/plugins/simply/host_spoofing/some_arg.conf @@ -0,0 +1 @@ +proxy_set_header host $arg_host; \ No newline at end of file diff --git a/tests/plugins/simply/http_splitting/add_header_uri.conf b/tests/plugins/simply/http_splitting/add_header_uri.conf new file mode 100644 index 0000000..91ea08c --- /dev/null +++ b/tests/plugins/simply/http_splitting/add_header_uri.conf @@ -0,0 +1 @@ +add_header X-Uri $uri; \ No newline at end of file diff --git a/tests/plugins/simply/http_splitting/config.json b/tests/plugins/simply/http_splitting/config.json new file mode 100644 index 0000000..b9e2289 --- /dev/null +++ b/tests/plugins/simply/http_splitting/config.json @@ -0,0 +1,3 @@ +{ + "severity": "HIGH" +} \ No newline at end of file diff --git a/tests/plugins/simply/http_splitting/dont_report_not_resolved_var_fp.conf b/tests/plugins/simply/http_splitting/dont_report_not_resolved_var_fp.conf new file mode 100644 index 0000000..d9150cb --- /dev/null +++ b/tests/plugins/simply/http_splitting/dont_report_not_resolved_var_fp.conf @@ -0,0 +1,3 @@ +location ~ /proxy/(a|b)/(\W*)$ { + proxy_pass http://storage/$some; +} \ No newline at end of file diff --git a/tests/plugins/simply/http_splitting/proxy_from_location_var.conf b/tests/plugins/simply/http_splitting/proxy_from_location_var.conf new file mode 100644 index 0000000..3c4468c --- /dev/null +++ b/tests/plugins/simply/http_splitting/proxy_from_location_var.conf @@ -0,0 +1,3 @@ +location ~ /proxy/(a|b)/(\W*)$ { + proxy_pass http://storage/$2; +} \ No newline at end of file diff --git a/tests/plugins/simply/http_splitting/proxy_from_location_var_var.conf b/tests/plugins/simply/http_splitting/proxy_from_location_var_var.conf new file mode 100644 index 0000000..b049841 --- /dev/null +++ b/tests/plugins/simply/http_splitting/proxy_from_location_var_var.conf @@ -0,0 +1,4 @@ +location ~ /proxy/(a|b)/(\W*)$ { + set $p $2; + proxy_pass http://storage/$p; +} \ No newline at end of file diff --git a/tests/plugins/simply/http_splitting/proxy_from_location_var_var_fp.conf b/tests/plugins/simply/http_splitting/proxy_from_location_var_var_fp.conf new file mode 100644 index 0000000..983fa30 --- /dev/null +++ b/tests/plugins/simply/http_splitting/proxy_from_location_var_var_fp.conf @@ -0,0 +1,4 @@ +location ~ /proxy/(a|b)/(\W*)$ { + set $p $1; + proxy_pass http://storage/$p; +} \ No newline at end of file diff --git a/tests/plugins/simply/http_splitting/proxy_from_location_var_var_var.conf b/tests/plugins/simply/http_splitting/proxy_from_location_var_var_var.conf new file mode 100644 index 0000000..9eecbc0 --- /dev/null +++ b/tests/plugins/simply/http_splitting/proxy_from_location_var_var_var.conf @@ -0,0 +1,4 @@ +location ~ /proxy/(a|b)/(?

\W*)$ { + set $upstream "http://$1/$p?"; + proxy_pass $upstream; +} \ No newline at end of file diff --git a/tests/plugins/simply/http_splitting/proxy_pass_ducument_uri.conf b/tests/plugins/simply/http_splitting/proxy_pass_ducument_uri.conf new file mode 100644 index 0000000..b991d69 --- /dev/null +++ b/tests/plugins/simply/http_splitting/proxy_pass_ducument_uri.conf @@ -0,0 +1 @@ +proxy_pass http://upstream$document_uri; \ No newline at end of file diff --git a/tests/plugins/simply/http_splitting/proxy_set_header_ducument_uri.conf b/tests/plugins/simply/http_splitting/proxy_set_header_ducument_uri.conf new file mode 100644 index 0000000..3600632 --- /dev/null +++ b/tests/plugins/simply/http_splitting/proxy_set_header_ducument_uri.conf @@ -0,0 +1 @@ +proxy_set_header "X-Original-Uri" $document_uri; \ No newline at end of file diff --git a/tests/plugins/simply/http_splitting/return_403_fp.conf b/tests/plugins/simply/http_splitting/return_403_fp.conf new file mode 100644 index 0000000..90bb892 --- /dev/null +++ b/tests/plugins/simply/http_splitting/return_403_fp.conf @@ -0,0 +1 @@ +return 403; \ No newline at end of file diff --git a/tests/plugins/simply/http_splitting/return_request_uri_fp.conf b/tests/plugins/simply/http_splitting/return_request_uri_fp.conf new file mode 100644 index 0000000..751a97d --- /dev/null +++ b/tests/plugins/simply/http_splitting/return_request_uri_fp.conf @@ -0,0 +1 @@ +return 301 https://some$request_uri; \ No newline at end of file diff --git a/tests/plugins/simply/http_splitting/rewrite_extract_fp.conf b/tests/plugins/simply/http_splitting/rewrite_extract_fp.conf new file mode 100644 index 0000000..5c853dc --- /dev/null +++ b/tests/plugins/simply/http_splitting/rewrite_extract_fp.conf @@ -0,0 +1 @@ +rewrite ^/proxy/(a|b)/(?\W*)$ http://storage/$path redirect; \ No newline at end of file diff --git a/tests/plugins/simply/http_splitting/rewrite_uri.conf b/tests/plugins/simply/http_splitting/rewrite_uri.conf new file mode 100644 index 0000000..55837c6 --- /dev/null +++ b/tests/plugins/simply/http_splitting/rewrite_uri.conf @@ -0,0 +1 @@ +rewrite ^ http://some$uri; \ No newline at end of file diff --git a/tests/plugins/simply/http_splitting/rewrite_uri_after_var.conf b/tests/plugins/simply/http_splitting/rewrite_uri_after_var.conf new file mode 100644 index 0000000..a172ba1 --- /dev/null +++ b/tests/plugins/simply/http_splitting/rewrite_uri_after_var.conf @@ -0,0 +1 @@ +return 301 https://$host$uri; \ No newline at end of file diff --git a/tests/plugins/simply/origins/config.json b/tests/plugins/simply/origins/config.json new file mode 100644 index 0000000..e359bfd --- /dev/null +++ b/tests/plugins/simply/origins/config.json @@ -0,0 +1,3 @@ +{ + "severity": ["MEDIUM", "HIGH"] +} \ No newline at end of file diff --git a/tests/plugins/simply/origins/metrika.conf b/tests/plugins/simply/origins/metrika.conf new file mode 100644 index 0000000..b0afeb6 --- /dev/null +++ b/tests/plugins/simply/origins/metrika.conf @@ -0,0 +1,3 @@ +if ($http_referer !~ "^https?://([^/]+metrika.*yandex\.(ru|ua|com|com\.tr|by|kz)|([^/]+\.)?webvisor\.com)/"){ + add_header X-Frame-Options SAMEORIGIN; +} \ No newline at end of file diff --git a/tests/plugins/simply/origins/origin.conf b/tests/plugins/simply/origins/origin.conf new file mode 100644 index 0000000..1b2f1cc --- /dev/null +++ b/tests/plugins/simply/origins/origin.conf @@ -0,0 +1,3 @@ +if ($http_origin !~ '^https?:\/\/yandex.ru\/') { + +} \ No newline at end of file diff --git a/tests/plugins/simply/origins/origin_fp.conf b/tests/plugins/simply/origins/origin_fp.conf new file mode 100644 index 0000000..bc3a50c --- /dev/null +++ b/tests/plugins/simply/origins/origin_fp.conf @@ -0,0 +1,3 @@ +if ($http_origin !~ '^https?:\/\/yandex\.ru\/') { + +} \ No newline at end of file diff --git a/tests/plugins/simply/origins/origin_w_slash_anchored_fp.conf b/tests/plugins/simply/origins/origin_w_slash_anchored_fp.conf new file mode 100644 index 0000000..dea8979 --- /dev/null +++ b/tests/plugins/simply/origins/origin_w_slash_anchored_fp.conf @@ -0,0 +1,3 @@ +if ($http_origin !~ '^https?:\/\/yandex\.ru/$') { + +} \ No newline at end of file diff --git a/tests/plugins/simply/origins/origin_w_slash_fp.conf b/tests/plugins/simply/origins/origin_w_slash_fp.conf new file mode 100644 index 0000000..9c887ea --- /dev/null +++ b/tests/plugins/simply/origins/origin_w_slash_fp.conf @@ -0,0 +1,3 @@ +if ($http_origin !~ '^https?:\/\/yandex\.ru/') { + +} \ No newline at end of file diff --git a/tests/plugins/simply/origins/origin_wo_slash.conf b/tests/plugins/simply/origins/origin_wo_slash.conf new file mode 100644 index 0000000..610c10c --- /dev/null +++ b/tests/plugins/simply/origins/origin_wo_slash.conf @@ -0,0 +1,3 @@ +if ($http_origin !~ '^https?:\/\/yandex\.ru') { + +} \ No newline at end of file diff --git a/tests/plugins/simply/origins/referer.conf b/tests/plugins/simply/origins/referer.conf new file mode 100644 index 0000000..af2d2b7 --- /dev/null +++ b/tests/plugins/simply/origins/referer.conf @@ -0,0 +1,3 @@ +if ($http_referer !~ '^https?:\/\/yandex.ru\/') { + +} \ No newline at end of file diff --git a/tests/plugins/simply/origins/referer_fp.conf b/tests/plugins/simply/origins/referer_fp.conf new file mode 100644 index 0000000..2d6e75a --- /dev/null +++ b/tests/plugins/simply/origins/referer_fp.conf @@ -0,0 +1,3 @@ +if ($http_referer !~ '^https?:\/\/yandex\.ru\/') { + +} \ No newline at end of file diff --git a/tests/plugins/simply/origins/referer_subdomain.conf b/tests/plugins/simply/origins/referer_subdomain.conf new file mode 100644 index 0000000..2f42472 --- /dev/null +++ b/tests/plugins/simply/origins/referer_subdomain.conf @@ -0,0 +1,3 @@ +if ($http_referer !~ '^https?:\/\/some.yandex\.ru\/') { + +} \ No newline at end of file diff --git a/tests/plugins/simply/origins/referer_subdomain_fp.conf b/tests/plugins/simply/origins/referer_subdomain_fp.conf new file mode 100644 index 0000000..4c88520 --- /dev/null +++ b/tests/plugins/simply/origins/referer_subdomain_fp.conf @@ -0,0 +1,3 @@ +if ($http_referer !~ '^https?:\/\/some\.yandex\.ru\/') { + +} \ No newline at end of file diff --git a/tests/plugins/simply/origins/webvisor.conf b/tests/plugins/simply/origins/webvisor.conf new file mode 100644 index 0000000..bd72852 --- /dev/null +++ b/tests/plugins/simply/origins/webvisor.conf @@ -0,0 +1,3 @@ +if ($http_referer !~ "^https?://([^/]+\.)?yandex\.com/|([^/]+\.)?webvisor\.com/"){ + add_header X-Frame-Options SAMEORIGIN; +} \ No newline at end of file diff --git a/tests/plugins/simply/ssrf/config.json b/tests/plugins/simply/ssrf/config.json new file mode 100644 index 0000000..b9e2289 --- /dev/null +++ b/tests/plugins/simply/ssrf/config.json @@ -0,0 +1,3 @@ +{ + "severity": "HIGH" +} \ No newline at end of file diff --git a/tests/plugins/simply/ssrf/have_internal_fp.conf b/tests/plugins/simply/ssrf/have_internal_fp.conf new file mode 100644 index 0000000..471cf47 --- /dev/null +++ b/tests/plugins/simply/ssrf/have_internal_fp.conf @@ -0,0 +1,4 @@ +location /proxy/ { + internal; + proxy_pass $arg_some; +} \ No newline at end of file diff --git a/tests/plugins/simply/ssrf/host_w_const_start.conf b/tests/plugins/simply/ssrf/host_w_const_start.conf new file mode 100644 index 0000000..2d2717f --- /dev/null +++ b/tests/plugins/simply/ssrf/host_w_const_start.conf @@ -0,0 +1,3 @@ +location ~* ^/backend/(?.*) { + proxy_pass http://some$path; +} \ No newline at end of file diff --git a/tests/plugins/simply/ssrf/host_w_const_start_arg.conf b/tests/plugins/simply/ssrf/host_w_const_start_arg.conf new file mode 100644 index 0000000..e4fd58e --- /dev/null +++ b/tests/plugins/simply/ssrf/host_w_const_start_arg.conf @@ -0,0 +1,3 @@ +location /backend/ { + proxy_pass http://some${arg_la}.shit; +} \ No newline at end of file diff --git a/tests/plugins/simply/ssrf/not_host_var_fp.conf b/tests/plugins/simply/ssrf/not_host_var_fp.conf new file mode 100644 index 0000000..21faf29 --- /dev/null +++ b/tests/plugins/simply/ssrf/not_host_var_fp.conf @@ -0,0 +1,3 @@ +location ~ /proxy/(.*)$ { + proxy_pass http://yastatic.net/$1; +} \ No newline at end of file diff --git a/tests/plugins/simply/ssrf/request_uri_fp.conf b/tests/plugins/simply/ssrf/request_uri_fp.conf new file mode 100644 index 0000000..18cdb7d --- /dev/null +++ b/tests/plugins/simply/ssrf/request_uri_fp.conf @@ -0,0 +1,3 @@ +location /backend/ { + proxy_pass http://some$request_uri; +} \ No newline at end of file diff --git a/tests/plugins/simply/ssrf/request_uri_var_fp.conf b/tests/plugins/simply/ssrf/request_uri_var_fp.conf new file mode 100644 index 0000000..5dac1b8 --- /dev/null +++ b/tests/plugins/simply/ssrf/request_uri_var_fp.conf @@ -0,0 +1,4 @@ +location / { + set $upstream "http://some$request_uri"; + proxy_pass $upstream; +} \ No newline at end of file diff --git a/tests/plugins/simply/ssrf/scheme_var.conf b/tests/plugins/simply/ssrf/scheme_var.conf new file mode 100644 index 0000000..21cd5b4 --- /dev/null +++ b/tests/plugins/simply/ssrf/scheme_var.conf @@ -0,0 +1,3 @@ +location ~ /proxy/$ { + proxy_pass $http_proxy_scheme://some/file.conf; +} \ No newline at end of file diff --git a/tests/plugins/simply/ssrf/single_var.conf b/tests/plugins/simply/ssrf/single_var.conf new file mode 100644 index 0000000..c6d71c2 --- /dev/null +++ b/tests/plugins/simply/ssrf/single_var.conf @@ -0,0 +1,3 @@ +location ~ /proxy/(?P.*)$ { + proxy_pass $proxy; +} \ No newline at end of file diff --git a/tests/plugins/simply/ssrf/used_arg.conf b/tests/plugins/simply/ssrf/used_arg.conf new file mode 100644 index 0000000..3236e8e --- /dev/null +++ b/tests/plugins/simply/ssrf/used_arg.conf @@ -0,0 +1,3 @@ +location /proxy/ { + proxy_pass $arg_some; +} \ No newline at end of file diff --git a/tests/plugins/simply/ssrf/vars_from_loc.conf b/tests/plugins/simply/ssrf/vars_from_loc.conf new file mode 100644 index 0000000..370a0d4 --- /dev/null +++ b/tests/plugins/simply/ssrf/vars_from_loc.conf @@ -0,0 +1,6 @@ +location ~ /proxy/(.*)/(.*)/(.*)$ { + set $scheme $1; + set $host $2; + set $path $3; + proxy_pass $scheme://$host/$path; +} \ No newline at end of file diff --git a/tests/plugins/simply/ssrf/with_const_scheme.conf b/tests/plugins/simply/ssrf/with_const_scheme.conf new file mode 100644 index 0000000..619cc45 --- /dev/null +++ b/tests/plugins/simply/ssrf/with_const_scheme.conf @@ -0,0 +1,10 @@ +location ~* ^/internal-proxy/(https?)/(.*?)/(.*) { + resolver 127.0.0.1; + + set $proxy_protocol $1; + set $proxy_host $2; + set $proxy_path $3; + + proxy_pass $proxy_protocol://$proxy_host/$proxy_path ; + proxy_set_header Host $proxy_host; +} \ No newline at end of file diff --git a/tests/plugins/simply/valid_referers/config.json b/tests/plugins/simply/valid_referers/config.json new file mode 100644 index 0000000..b9e2289 --- /dev/null +++ b/tests/plugins/simply/valid_referers/config.json @@ -0,0 +1,3 @@ +{ + "severity": "HIGH" +} \ No newline at end of file diff --git a/tests/plugins/simply/valid_referers/none_first.conf b/tests/plugins/simply/valid_referers/none_first.conf new file mode 100644 index 0000000..3b29101 --- /dev/null +++ b/tests/plugins/simply/valid_referers/none_first.conf @@ -0,0 +1 @@ +valid_referers none server_names *.webvisor.com; \ No newline at end of file diff --git a/tests/plugins/simply/valid_referers/none_last.conf b/tests/plugins/simply/valid_referers/none_last.conf new file mode 100644 index 0000000..e6cf9f8 --- /dev/null +++ b/tests/plugins/simply/valid_referers/none_last.conf @@ -0,0 +1,3 @@ +valid_referers server_names + foo.com + none; \ No newline at end of file diff --git a/tests/plugins/simply/valid_referers/none_middle.conf b/tests/plugins/simply/valid_referers/none_middle.conf new file mode 100644 index 0000000..4740118 --- /dev/null +++ b/tests/plugins/simply/valid_referers/none_middle.conf @@ -0,0 +1,2 @@ +valid_referers server_names foo.com + none bar.com; \ No newline at end of file diff --git a/tests/plugins/simply/valid_referers/wo_none_fp.conf b/tests/plugins/simply/valid_referers/wo_none_fp.conf new file mode 100644 index 0000000..efa0962 --- /dev/null +++ b/tests/plugins/simply/valid_referers/wo_none_fp.conf @@ -0,0 +1 @@ +valid_referers server_names foo.com bar.com *.none.com none.ru; \ No newline at end of file diff --git a/tests/plugins/test_simply.py b/tests/plugins/test_simply.py new file mode 100644 index 0000000..78b009b --- /dev/null +++ b/tests/plugins/test_simply.py @@ -0,0 +1,98 @@ +from nose.tools import assert_equals, assert_true, assert_in +import os +from os import path +import json + +import gixy +from ..utils import * +from gixy.core.manager import Manager as Gixy +from gixy.core.plugins_manager import PluginsManager +from gixy.core.config import Config + + +def setup_module(): + pass + + +def teardown_module(): + pass + + +def test_from_config(): + tested_plugins = set() + tested_fp_plugins = set() + + conf_dir = path.join(path.dirname(__file__), 'simply') + for plugin in os.listdir(conf_dir): + if plugin in ('.', '..'): + continue + + plugin_path = path.join(conf_dir, plugin) + if not path.isdir(plugin_path): + continue + + config = {} + if path.exists(path.join(plugin_path, 'config.json')): + with open(path.join(plugin_path, 'config.json'), 'r') as file: + config = json.loads(file.read()) + + for test_case in os.listdir(plugin_path): + if not test_case.endswith('.conf'): + continue + + config_path = path.join(plugin_path, test_case) + if not test_case.endswith('_fp.conf'): + # Not False Positive test + tested_plugins.add(plugin) + test_func = check_configuration + else: + tested_fp_plugins.add(plugin) + test_func = check_configuration_fp + + yield test_func, plugin, config_path, config + + manager = PluginsManager() + for plugin in manager.plugins: + plugin = plugin.name + assert_true(plugin in tested_plugins, + 'Plugin "{}" should have at least one simple test config'.format(plugin)) + assert_true(plugin in tested_fp_plugins, + 'Plugin "{}" should have at least one simple test config with false positive'.format(plugin)) + + +def yoda_provider(plugin): + config = Config( + allow_includes=False, + plugins=[plugin] + ) + return Gixy(config=config) + + +def check_configuration(plugin, config_path, test_config): + with yoda_provider(plugin) as yoda: + yoda.audit(config_path) + results = RawFormatter().format(yoda) + + assert_equals(len(results), 1, 'Should have one report') + result = results[0] + + if 'severity' in test_config: + if not hasattr(test_config['severity'], '__iter__'): + assert_equals(result['severity'], test_config['severity']) + else: + assert_in(result['severity'], test_config['severity']) + assert_equals(result['plugin'], plugin) + assert_true(result['summary']) + assert_true(result['description']) + assert_true(result['config']) + assert_true(result['help_url'].startswith('https://'), + 'help_url must starts with https://. It\'is URL!') + + +def check_configuration_fp(plugin, config_path, test_config): + with yoda_provider(plugin) as yoda: + yoda.audit(config_path) + results = RawFormatter().format(yoda) + + assert_equals(len(results), 0, + 'False positive configuration must not trigger any plugins') diff --git a/tests/utils.py b/tests/utils.py new file mode 100644 index 0000000..c542b99 --- /dev/null +++ b/tests/utils.py @@ -0,0 +1,69 @@ +from logging.handlers import BufferingHandler +from gixy.formatters.base import BaseFormatter + + +class LogHandler(BufferingHandler): + def __init__(self, matcher): + # BufferingHandler takes a "capacity" argument + # so as to know when to flush. As we're overriding + # shouldFlush anyway, we can set a capacity of zero. + # You can call flush() manually to clear out the + # buffer. + super(LogHandler, self).__init__(0) + self.matcher = matcher + + def shouldFlush(self, **kwargs): + return False + + def emit(self, record): + self.buffer.append(record.__dict__) + + def matches(self, **kwargs): + """ + Look for a saved dict whose keys/values match the supplied arguments. + """ + result = False + for d in self.buffer: + if self.matcher.matches(d, **kwargs): + result = True + break + return result + + +class Matcher(object): + + _partial_matches = ('msg', 'message') + + def matches(self, d, **kwargs): + """ + Try to match a single dict with the supplied arguments. + + Keys whose values are strings and which are in self._partial_matches + will be checked for partial (i.e. substring) matches. You can extend + this scheme to (for example) do regular expression matching, etc. + """ + result = True + for k in kwargs: + v = kwargs[k] + dv = d.get(k) + if not self.match_value(k, dv, v): + result = False + break + return result + + def match_value(self, k, dv, v): + """ + Try to match a single stored value (dv) with a supplied value (v). + """ + if type(v) != type(dv): + result = False + elif type(dv) is not str or k not in self._partial_matches: + result = (v == dv) + else: + result = dv.find(v) >= 0 + return result + + +class RawFormatter(BaseFormatter): + def format_reports(self, reports, stats): + return reports diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..ec8cb23 --- /dev/null +++ b/tox.ini @@ -0,0 +1,19 @@ +[tox] +envlist = py27, py34, py35, py36, flake8 +skip_missing_interpreters = True + +[testenv] +deps = + -rrequirements.pip + -rrequirements.dev.pip +commands = nosetests -v + +[testenv:flake8] +deps = + flake8 +basepython = python3 +commands = + flake8 setup.py gixy + +[flake8] +max_line_length = 120 \ No newline at end of file