diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 000000000..f3a116bd6 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,88 @@ +--- +name: CI + +on: [push, pull_request] + +jobs: + package: + runs-on: ubuntu-latest + steps: + - name: Checkout Repository + uses: actions/checkout@v1 + - name: Set up Python 3.8 + uses: actions/setup-python@v1 + with: + python-version: 3.8 + - name: Install dependencies + run: | + python3.8 -m pip install setuptools wheel twine + - name: Build packages + run: | + python3.8 utils/build-dists.py + - name: Check packages + run: | + set -exo pipefail; + if [ $(python3.8 -m twine check dist/* | grep -c 'warning') != 0 ]; then exit 1; fi + + lint: + runs-on: ubuntu-latest + steps: + - name: Checkout Repository + uses: actions/checkout@v1 + - name: Set up Python 3.8 + uses: actions/setup-python@v1 + with: + python-version: 3.8 + - name: Install dependencies + run: | + python3.8 -m pip install nox + - name: Lint the code + run: nox -s lint + + docs: + runs-on: ubuntu-latest + steps: + - name: Checkout Repository + uses: actions/checkout@v2 + - name: Set up Python 3.8 + uses: actions/setup-python@v2 + with: + python-version: 3.8 + - name: Install dependencies + run: | + python3.8 -m pip install nox + - name: Build the docs + run: nox -s docs + + test-linux: + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + python-version: [2.7, 3.5, 3.6, 3.7, 3.8, 3.9] + es-version: [7.0.0, 7.10.0] + + steps: + - name: Checkout Repository + uses: actions/checkout@v2 + - name: Setup Elasticsearch + run: | + mkdir /tmp/elasticsearch + wget -O - https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-${{ matrix.es-version }}-linux-x86_64.tar.gz | tar xz --directory=/tmp/elasticsearch --strip-components=1 + /tmp/elasticsearch/bin/elasticsearch -d + - name: Setup Python - ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Set up Python 3.8 for Nox + if: matrix.python-version != '3.8' + uses: actions/setup-python@v2 + with: + python-version: 3 + - name: Install dependencies + run: | + python3 -m pip install nox + - name: Run Tests + run: | + nox -rs test-${{ matrix.python-version }} diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 40f469d60..000000000 --- a/.travis.yml +++ /dev/null @@ -1,28 +0,0 @@ -language: python - -python: - - "2.7" - - "3.4" - - "3.5" - - "3.6" - - "3.7" - - "3.8" - - "nightly" - - "pypy" - -env: - global: - - WAIT_FOR_ES=1 - - ES_PY_VERSION=master - matrix: - - ES_VERSION=7.0.0 - -install: - - mkdir /tmp/elasticsearch - - wget -O - https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-${ES_VERSION}-linux-x86_64.tar.gz | tar xz --directory=/tmp/elasticsearch --strip-components=1 - - /tmp/elasticsearch/bin/elasticsearch -d - - travis_retry pip install --upgrade pytest - - travis_retry pip install git+https://github.com/elastic/elasticsearch-py.git@${ES_PY_VERSION}#egg=elasticsearch - -script: - - python setup.py test diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 2dec86649..3f1830f81 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -7,6 +7,11 @@ you would like to do. It may be that somebody is already working on it, or that there are particular issues that you should know about before implementing the change. +If you want to be rewarded for your contributions, sign up for the +`Elastic Contributor Program `_. +Each time you make a valid contribution, you’ll earn points that increase your +chances of winning prizes and being recognized as a top contributor. + We enjoy working with contributors to get their code accepted. There are many approaches to fixing a problem and it is important to find the best approach before writing too much code. @@ -24,7 +29,7 @@ The process for contributing to any of the Elasticsearch repositories is similar .. code:: bash - $ python setup.py test + $ nox -rs lint test 3. Rebase your changes. Update your local repository with the most recent code from the main diff --git a/Changelog.rst b/Changelog.rst index 5fe51a094..558f91e85 100644 --- a/Changelog.rst +++ b/Changelog.rst @@ -3,6 +3,73 @@ Changelog ========= +7.4.1 (2023-03-01) +------------------ + +* Fixed ``DeprecationWarnings`` that would be emitted from deprecated + usages of the ``body`` parameter in the Python Elasticsearch client. + + +7.4.0 (2021-07-15) +------------------ + +* Added the ``ConstantKeyword``, ``RankFeatures`` field types (`#1456`_, `#1465`_) +* Added the ``ScriptScore`` query type (`#1464`_) +* Added ``UpdateByQueryResponse.success()`` method (`#1463`_) +* Added ``return_doc_meta`` parameter to ``Document.save()`` and ``Document.update()`` for + accessing the complete API response (`#1466`_) +* Added support for ``calendar_interval`` and ``fixed_interval`` to ``DateHistogramFacet`` (`#1467`_) +* Added ``Document.exists()`` method (`#1447`_, contributed by `@dem4ply`_) +* Added support for the ``year`` interval to ``DateHistogramFacet`` (`#1502`_, contributed by `@nrsimha`_) +* Fixed issue where ``to_dict()`` should be called recursively on ``Search.extras`` and ``**kwargs`` (`#1458`_) +* Fixed inverse of an empty ``Bool`` query should be ``MatchNone`` (`#1459`_) +* Fixed issue between ``retry_on_conflict`` and optimistic concurrency control within ``Document.update()`` (`#1461`_, contributed by `@armando1793`_) + + .. _@dem4ply: https://github.com/dem4ply + .. _@nrsimha: https://github.com/nrsimha + .. _@armando1793: https://github.com/armando1793 + .. _#1447: https://github.com/elastic/elasticsearch-dsl-py/pull/1447 + .. _#1456: https://github.com/elastic/elasticsearch-dsl-py/pull/1456 + .. _#1458: https://github.com/elastic/elasticsearch-dsl-py/pull/1458 + .. _#1459: https://github.com/elastic/elasticsearch-dsl-py/pull/1459 + .. _#1461: https://github.com/elastic/elasticsearch-dsl-py/pull/1461 + .. _#1463: https://github.com/elastic/elasticsearch-dsl-py/pull/1463 + .. _#1464: https://github.com/elastic/elasticsearch-dsl-py/pull/1464 + .. _#1465: https://github.com/elastic/elasticsearch-dsl-py/pull/1465 + .. _#1466: https://github.com/elastic/elasticsearch-dsl-py/pull/1466 + .. _#1467: https://github.com/elastic/elasticsearch-dsl-py/pull/1467 + .. _#1502: https://github.com/elastic/elasticsearch-dsl-py/pull/1502 + + +7.3.0 (2020-09-16) +------------------ + +* Added ``Intervals``, ``MatchBoolPrefix``, ``Shape``, and ``Wrapper`` queries (`#1392`_, `#1418`_) +* Added ``Boxplot``, ``RareTerms``, ``VariableWidthHistogram``, ``MedianAbsoluteDeviation``, + ``TTest``, ``CumulativeCardinality``, ``Inference``, ``MovingPercentiles``, + and ``Normalize`` aggregations (`#1416`_, `#1418`_) +* Added ``__all__`` and removed all star imports from ``elasticsearch_dsl`` namespace + to avoid leaking unintended names (`#1390`_) +* Fixed an issue where ``Object`` and ``Nested`` could mutate the inner + ``doc_class`` mapping (`#1255`_, contributed by `@l1nd3r0th`_) +* Fixed a typo in query ``SpanContaining``, previously was ``SpanContainining`` (`#1418`_) + + .. _@l1nd3r0th: https://github.com/l1nd3r0th + .. _#1255: https://github.com/elastic/elasticsearch-dsl-py/pull/1255 + .. _#1390: https://github.com/elastic/elasticsearch-dsl-py/pull/1390 + .. _#1392: https://github.com/elastic/elasticsearch-dsl-py/pull/1392 + .. _#1416: https://github.com/elastic/elasticsearch-dsl-py/pull/1416 + .. _#1418: https://github.com/elastic/elasticsearch-dsl-py/pull/1418 + +7.2.1 (2020-06-02) +------------------ + +* Fixed issue when slicing a Search that would result in a negative + ``size`` instead of a ``size`` of 0. (`#1360`_, contributed by `@bk-equityzen`_) + + .. _@bk-equityzen: https://github.com/bk-equityzen + .. _#1360: https://github.com/elastic/elasticsearch-dsl-py/pull/1360 + 7.2.0 (2020-05-04) ------------------ @@ -71,7 +138,7 @@ Changelog * Improved behavior of ``Index.save`` where it does a better job when index already exists * Composite aggregations now correctly support multiple ``sources`` aggs -* ``UpdateByQuery`` implementated by @emarcey +* ``UpdateByQuery`` implemented by @emarcey 6.2.1 (2018-07-03) ------------------ @@ -197,7 +264,7 @@ Breaking changes: 2.2.0 (2016-11-04) ------------------ - * accessing missing string fields no longer returnd ``''`` but returns + * accessing missing string fields no longer returned ``''`` but returns ``None`` instead. * fix issues with bool's ``|`` and ``&`` operators and ``minimum_should_match`` diff --git a/docs/api.rst b/docs/api.rst index 5e12daa1d..f914f69e8 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -53,7 +53,7 @@ pattern to the query dsl: from elasticsearch_dsl import Keyword, Mapping, Nested, Text # name your type - m = Mapping('my-type') + m = Mapping() # add fields m.field('title', 'text') @@ -93,7 +93,7 @@ directly from an existing type: .. code:: python # get the mapping from our production cluster - m = Mapping.from_es('my-index', 'my-type', using='prod') + m = Mapping.from_es('my-index', using='prod') # update based on data in QA cluster m.update_from_es('my-index', using='qa') diff --git a/docs/conf.py b/docs/conf.py index 035d8cff6..60aacdb58 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,4 +1,21 @@ # -*- coding: utf-8 -*- +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + # # Elasticsearch documentation build configuration file, created by # sphinx-quickstart on Mon May 6 15:38:41 2013. @@ -11,47 +28,47 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import os import datetime +import os + +import elasticsearch_dsl # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -#sys.path.insert(0, os.path.abspath('.')) +# sys.path.insert(0, os.path.abspath('.')) # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' +# needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest'] +extensions = ["sphinx.ext.autodoc", "sphinx.ext.doctest"] autoclass_content = "both" # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix of source filenames. -source_suffix = '.rst' +source_suffix = ".rst" # The encoding of source files. -#source_encoding = 'utf-8-sig' +# source_encoding = 'utf-8-sig' # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = u'Elasticsearch DSL' -copyright = u'%d, Elasticsearch B.V' % datetime.datetime.now().year +project = u"Elasticsearch DSL" +copyright = u"%d, Elasticsearch B.V" % datetime.datetime.now().year # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. -# -import elasticsearch_dsl # The short X.Y version. version = elasticsearch_dsl.__versionstr__ # The full version, including alpha/beta/rc tags. @@ -59,40 +76,40 @@ # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. -#language = None +# language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: -#today = '' +# today = '' # Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' +# today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ['_build'] +exclude_patterns = ["_build"] # The reST default role (used for this markup: `text`) to use for all documents. -#default_role = None +# default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True +# add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). -#add_module_names = True +# add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. -#show_authors = False +# show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] +# modindex_common_prefix = [] # If true, keep warnings as "system message" paragraphs in the built documents. -#keep_warnings = False +# keep_warnings = False # -- Options for HTML output --------------------------------------------------- @@ -100,126 +117,130 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -on_rtd = os.environ.get('READTHEDOCS', None) == 'True' +on_rtd = os.environ.get("READTHEDOCS", None) == "True" if not on_rtd: # only import and set the theme if we're building docs locally import sphinx_rtd_theme - html_theme = 'sphinx_rtd_theme' + + html_theme = "sphinx_rtd_theme" html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -#html_theme_options = {} +# html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. -#html_theme_path = [] +# html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". -#html_title = None +# html_title = None # A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None +# html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. -#html_logo = None +# html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -#html_favicon = None +# html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -#html_static_path = ['_static'] +# html_static_path = ['_static'] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' +# html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. -#html_use_smartypants = True +# html_use_smartypants = True # Custom sidebar templates, maps document names to template names. -#html_sidebars = {} +# html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. -#html_additional_pages = {} +# html_additional_pages = {} # If false, no module index is generated. -#html_domain_indices = True +# html_domain_indices = True # If false, no index is generated. -#html_use_index = True +# html_use_index = True # If true, the index is split into individual pages for each letter. -#html_split_index = False +# html_split_index = False # If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True +# html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True +# html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True +# html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. -#html_use_opensearch = '' +# html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None +# html_file_suffix = None # Output file base name for HTML help builder. -htmlhelp_basename = 'Elasticsearchdoc' +htmlhelp_basename = "Elasticsearchdoc" # -- Options for LaTeX output -------------------------------------------------- latex_elements = { -# The paper size ('letterpaper' or 'a4paper'). -#'papersize': 'letterpaper', - -# The font size ('10pt', '11pt' or '12pt'). -#'pointsize': '10pt', - -# Additional stuff for the LaTeX preamble. -#'preamble': '', + # The paper size ('letterpaper' or 'a4paper'). + # 'papersize': 'letterpaper', + # The font size ('10pt', '11pt' or '12pt'). + # 'pointsize': '10pt', + # Additional stuff for the LaTeX preamble. + # 'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ - ('index', 'Elasticsearch-dsl.tex', u'Elasticsearch DSL Documentation', - u'Honza Král', 'manual'), + ( + "index", + "Elasticsearch-dsl.tex", + u"Elasticsearch DSL Documentation", + u"Elasticsearch B.V", + "manual", + ), ] # The name of an image file (relative to this directory) to place at the top of # the title page. -#latex_logo = None +# latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. -#latex_use_parts = False +# latex_use_parts = False # If true, show page references after internal links. -#latex_show_pagerefs = False +# latex_show_pagerefs = False # If true, show URL addresses after external links. -#latex_show_urls = False +# latex_show_urls = False # Documents to append as an appendix to all manuals. -#latex_appendices = [] +# latex_appendices = [] # If false, no module index is generated. -#latex_domain_indices = True +# latex_domain_indices = True # -- Options for manual page output -------------------------------------------- @@ -227,12 +248,17 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ - ('index', 'elasticsearch-dsl', u'Elasticsearch DSL Documentation', - [u'Honza Král'], 1) + ( + "index", + "elasticsearch-dsl", + u"Elasticsearch DSL Documentation", + [u"Elasticsearch B.V"], + 1, + ) ] # If true, show URL addresses after external links. -#man_show_urls = False +# man_show_urls = False # -- Options for Texinfo output ------------------------------------------------ @@ -241,19 +267,25 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - ('index', 'Elasticsearch', u'Elasticsearch Documentation', - u'Honza Král', 'Elasticsearch', 'One line description of project.', - 'Miscellaneous'), + ( + "index", + "Elasticsearch", + u"Elasticsearch Documentation", + u"Elasticsearch B.V", + "Elasticsearch", + "One line description of project.", + "Miscellaneous", + ), ] # Documents to append as an appendix to all manuals. -#texinfo_appendices = [] +# texinfo_appendices = [] # If false, no module index is generated. -#texinfo_domain_indices = True +# texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. -#texinfo_show_urls = 'footnote' +# texinfo_show_urls = 'footnote' # If true, do not generate a @detailmenu in the "Top" node's menu. -#texinfo_no_detailmenu = False +# texinfo_no_detailmenu = False diff --git a/docs/faceted_search.rst b/docs/faceted_search.rst index c2f7e5e0e..dbf84a1f4 100644 --- a/docs/faceted_search.rst +++ b/docs/faceted_search.rst @@ -49,7 +49,7 @@ There are several different facets available: provides an option to split documents into groups based on a value of a field, for example ``TermsFacet(field='category')`` ``DateHistogramFacet`` - split documents into time intervals, example: ``DateHistogramFacet(field="published_date", interval="day")`` + split documents into time intervals, example: ``DateHistogramFacet(field="published_date", calendar_interval="day")`` ``HistogramFacet`` similar to ``DateHistogramFacet`` but for numerical values: ``HistogramFacet(field="rating", interval=2)`` @@ -83,7 +83,7 @@ of the methods responsible for the class' functions: filter for published articles only). ``query(self, search)`` - adds the query postion of the search (if search input specified), by default + adds the query position of the search (if search input specified), by default using ``MultiField`` query. Override this if you want to modify the query type used. ``highlight(self, search)`` diff --git a/docs/persistence.rst b/docs/persistence.rst index 0eca56404..cb1dc2266 100644 --- a/docs/persistence.rst +++ b/docs/persistence.rst @@ -228,7 +228,7 @@ If the document is not found in elasticsearch an exception p = Post.get(id='not-in-es', ignore=404) p is None -When you wish to retrive multiple documents at the same time by their ``id`` +When you wish to retrieve multiple documents at the same time by their ``id`` you can use the ``mget`` method: .. code:: python diff --git a/docs/search_dsl.rst b/docs/search_dsl.rst index 6891cb4dc..2d8b5dfca 100644 --- a/docs/search_dsl.rst +++ b/docs/search_dsl.rst @@ -532,6 +532,9 @@ just iterate over the ``Response`` object: for h in response: print(h.title, h.body) +.. note:: + + If you are only seeing partial results (e.g. 10000 or even 10 results), consider using the option ``s.extra(track_total_hits=True)`` to get a full hit count. Result ~~~~~~ diff --git a/elasticsearch_dsl/__init__.py b/elasticsearch_dsl/__init__.py index ad79cf79d..f9e71bef7 100644 --- a/elasticsearch_dsl/__init__.py +++ b/elasticsearch_dsl/__init__.py @@ -1,16 +1,163 @@ -from .query import Q +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from . import connections from .aggs import A +from .analysis import analyzer, char_filter, normalizer, token_filter, tokenizer +from .document import Document, InnerDoc, MetaField +from .exceptions import ( + ElasticsearchDslException, + IllegalOperation, + UnknownDslObject, + ValidationException, +) +from .faceted_search import ( + DateHistogramFacet, + Facet, + FacetedResponse, + FacetedSearch, + HistogramFacet, + NestedFacet, + RangeFacet, + TermsFacet, +) +from .field import ( + Binary, + Boolean, + Byte, + Completion, + CustomField, + Date, + DateRange, + DenseVector, + Double, + DoubleRange, + Field, + Float, + FloatRange, + GeoPoint, + GeoShape, + HalfFloat, + Integer, + IntegerRange, + Ip, + IpRange, + Join, + Keyword, + Long, + LongRange, + Murmur3, + Nested, + Object, + Percolator, + RangeField, + RankFeature, + RankFeatures, + ScaledFloat, + SearchAsYouType, + Short, + SparseVector, + Text, + TokenCount, + construct_field, +) from .function import SF -from .search import Search, MultiSearch -from .update_by_query import UpdateByQuery -from .field import * -from .document import Document, MetaField, InnerDoc -from .mapping import Mapping from .index import Index, IndexTemplate -from .analysis import analyzer, char_filter, normalizer, token_filter, tokenizer -from .faceted_search import * -from .wrappers import * +from .mapping import Mapping +from .query import Q +from .search import MultiSearch, Search +from .update_by_query import UpdateByQuery +from .utils import AttrDict, AttrList, DslBase +from .wrappers import Range -VERSION = (7, 2, 0) +VERSION = (7, 4, 1) __version__ = VERSION -__versionstr__ = '.'.join(map(str, VERSION)) +__versionstr__ = ".".join(map(str, VERSION)) +__all__ = [ + "A", + "AttrDict", + "AttrList", + "Binary", + "Boolean", + "Byte", + "Completion", + "CustomField", + "Date", + "DateHistogramFacet", + "DateRange", + "DenseVector", + "Document", + "Double", + "DoubleRange", + "DslBase", + "ElasticsearchDslException", + "Facet", + "FacetedResponse", + "FacetedSearch", + "Field", + "Float", + "FloatRange", + "GeoPoint", + "GeoShape", + "HalfFloat", + "HistogramFacet", + "IllegalOperation", + "Index", + "IndexTemplate", + "InnerDoc", + "Integer", + "IntegerRange", + "Ip", + "IpRange", + "Join", + "Keyword", + "Long", + "LongRange", + "Mapping", + "MetaField", + "MultiSearch", + "Murmur3", + "Nested", + "NestedFacet", + "Object", + "Percolator", + "Q", + "Range", + "RangeFacet", + "RangeField", + "RankFeature", + "RankFeatures", + "SF", + "ScaledFloat", + "Search", + "SearchAsYouType", + "Short", + "SparseVector", + "TermsFacet", + "Text", + "TokenCount", + "UnknownDslObject", + "UpdateByQuery", + "ValidationException", + "analyzer", + "char_filter", + "connections", + "construct_field", + "normalizer", + "token_filter", + "tokenizer", +] diff --git a/elasticsearch_dsl/aggs.py b/elasticsearch_dsl/aggs.py index 3c1fd89bc..d30716400 100644 --- a/elasticsearch_dsl/aggs.py +++ b/elasticsearch_dsl/aggs.py @@ -1,51 +1,77 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + try: import collections.abc as collections_abc # only works on python 3.3+ except ImportError: import collections as collections_abc +from .response.aggs import AggResponse, BucketData, FieldBucketData, TopHitsData from .utils import DslBase -from .response.aggs import BucketData, FieldBucketData, AggResponse, TopHitsData + def A(name_or_agg, filter=None, **params): if filter is not None: - if name_or_agg != 'filter': - raise ValueError("Aggregation %r doesn't accept positional argument 'filter'." % name_or_agg) - params['filter'] = filter + if name_or_agg != "filter": + raise ValueError( + "Aggregation %r doesn't accept positional argument 'filter'." + % name_or_agg + ) + params["filter"] = filter # {"terms": {"field": "tags"}, "aggs": {...}} if isinstance(name_or_agg, collections_abc.Mapping): if params: - raise ValueError('A() cannot accept parameters when passing in a dict.') + raise ValueError("A() cannot accept parameters when passing in a dict.") # copy to avoid modifying in-place agg = name_or_agg.copy() # pop out nested aggs - aggs = agg.pop('aggs', None) + aggs = agg.pop("aggs", None) # pop out meta data - meta = agg.pop('meta', None) + meta = agg.pop("meta", None) # should be {"terms": {"field": "tags"}} if len(agg) != 1: - raise ValueError('A() can only accept dict with an aggregation ({"terms": {...}}). ' - 'Instead it got (%r)' % name_or_agg) + raise ValueError( + 'A() can only accept dict with an aggregation ({"terms": {...}}). ' + "Instead it got (%r)" % name_or_agg + ) agg_type, params = agg.popitem() if aggs: params = params.copy() - params['aggs'] = aggs + params["aggs"] = aggs if meta: params = params.copy() - params['meta'] = meta + params["meta"] = meta return Agg.get_dsl_class(agg_type)(_expand__to_dot=False, **params) # Terms(...) just return the nested agg elif isinstance(name_or_agg, Agg): if params: - raise ValueError('A() cannot accept parameters when passing in an Agg object.') + raise ValueError( + "A() cannot accept parameters when passing in an Agg object." + ) return name_or_agg # "terms", field="tags" return Agg.get_dsl_class(name_or_agg)(**params) + class Agg(DslBase): - _type_name = 'agg' + _type_name = "agg" _type_shortcut = staticmethod(A) name = None @@ -54,8 +80,8 @@ def __contains__(self, key): def to_dict(self): d = super(Agg, self).to_dict() - if 'meta' in d[self.name]: - d['meta'] = d[self.name].pop('meta') + if "meta" in d[self.name]: + d["meta"] = d[self.name].pop("meta") return d def result(self, search, data): @@ -64,20 +90,21 @@ def result(self, search, data): class AggBase(object): _param_defs = { - 'aggs': {'type': 'agg', 'hash': True}, + "aggs": {"type": "agg", "hash": True}, } + def __contains__(self, key): - return key in self._params.get('aggs', {}) + return key in self._params.get("aggs", {}) def __getitem__(self, agg_name): - agg = self._params.setdefault('aggs', {})[agg_name] # propagate KeyError + agg = self._params.setdefault("aggs", {})[agg_name] # propagate KeyError # make sure we're not mutating a shared state - whenever accessing a # bucket, return a shallow copy of it to be safe if isinstance(agg, Bucket): agg = A(agg.name, **agg._params) # be sure to store the copy so any modifications to it will affect us - self._params['aggs'][agg_name] = agg + self._params["aggs"][agg_name] = agg return agg @@ -118,204 +145,301 @@ def __init__(self, **params): def to_dict(self): d = super(AggBase, self).to_dict() - if 'aggs' in d[self.name]: - d['aggs'] = d[self.name].pop('aggs') + if "aggs" in d[self.name]: + d["aggs"] = d[self.name].pop("aggs") return d + class Filter(Bucket): - name = 'filter' + name = "filter" _param_defs = { - 'filter': {'type': 'query'}, - 'aggs': {'type': 'agg', 'hash': True}, + "filter": {"type": "query"}, + "aggs": {"type": "agg", "hash": True}, } def __init__(self, filter=None, **params): if filter is not None: - params['filter'] = filter + params["filter"] = filter super(Filter, self).__init__(**params) def to_dict(self): d = super(Filter, self).to_dict() - d[self.name].update(d[self.name].pop('filter', {})) + d[self.name].update(d[self.name].pop("filter", {})) return d + class Pipeline(Agg): pass + # bucket aggregations class Filters(Bucket): - name = 'filters' + name = "filters" _param_defs = { - 'filters': {'type': 'query', 'hash': True}, - 'aggs': {'type': 'agg', 'hash': True}, + "filters": {"type": "query", "hash": True}, + "aggs": {"type": "agg", "hash": True}, } + class Children(Bucket): - name = 'children' + name = "children" + class Parent(Bucket): - name = 'parent' + name = "parent" + class DateHistogram(Bucket): - name = 'date_histogram' + name = "date_histogram" def result(self, search, data): return FieldBucketData(self, search, data) + class AutoDateHistogram(DateHistogram): - name = 'auto_date_histogram' + name = "auto_date_histogram" + class DateRange(Bucket): - name = 'date_range' + name = "date_range" + class GeoDistance(Bucket): - name = 'geo_distance' + name = "geo_distance" + class GeohashGrid(Bucket): - name = 'geohash_grid' + name = "geohash_grid" + class GeotileGrid(Bucket): - name = 'geotile_grid' + name = "geotile_grid" + class GeoCentroid(Bucket): - name = 'geo_centroid' + name = "geo_centroid" + class Global(Bucket): - name = 'global' + name = "global" + class Histogram(Bucket): - name = 'histogram' + name = "histogram" + def result(self, search, data): return FieldBucketData(self, search, data) + class IPRange(Bucket): - name = 'ip_range' + name = "ip_range" + class Missing(Bucket): - name = 'missing' + name = "missing" + class Nested(Bucket): - name = 'nested' + name = "nested" + class Range(Bucket): - name = 'range' + name = "range" + + +class RareTerms(Bucket): + name = "rare_terms" + + def result(self, search, data): + return FieldBucketData(self, search, data) + class ReverseNested(Bucket): - name = 'reverse_nested' + name = "reverse_nested" + class SignificantTerms(Bucket): - name = 'significant_terms' + name = "significant_terms" + class SignificantText(Bucket): - name = 'significant_text' + name = "significant_text" + class Terms(Bucket): - name = 'terms' + name = "terms" def result(self, search, data): return FieldBucketData(self, search, data) + class Sampler(Bucket): - name = 'sampler' + name = "sampler" + class DiversifiedSampler(Bucket): - name = 'diversified_sampler' + name = "diversified_sampler" + class Composite(Bucket): - name = 'composite' + name = "composite" _param_defs = { - 'sources': {'type': 'agg', 'hash': True, 'multi': True}, - 'aggs': {'type': 'agg', 'hash': True}, + "sources": {"type": "agg", "hash": True, "multi": True}, + "aggs": {"type": "agg", "hash": True}, } + +class VariableWidthHistogram(Bucket): + name = "variable_width_histogram" + + def result(self, search, data): + return FieldBucketData(self, search, data) + + # metric aggregations class TopHits(Agg): - name = 'top_hits' + name = "top_hits" def result(self, search, data): return TopHitsData(self, search, data) + class Avg(Agg): - name = 'avg' + name = "avg" + class WeightedAvg(Agg): - name = 'weighted_avg' + name = "weighted_avg" + class Cardinality(Agg): - name = 'cardinality' + name = "cardinality" + class ExtendedStats(Agg): - name = 'extended_stats' + name = "extended_stats" + + +class Boxplot(Agg): + name = "boxplot" + class GeoBounds(Agg): - name = 'geo_bounds' + name = "geo_bounds" + class Max(Agg): - name = 'max' + name = "max" + + +class MedianAbsoluteDeviation(Agg): + name = "median_absolute_deviation" + class Min(Agg): - name = 'min' + name = "min" + class Percentiles(Agg): - name = 'percentiles' + name = "percentiles" + class PercentileRanks(Agg): - name = 'percentile_ranks' + name = "percentile_ranks" + class ScriptedMetric(Agg): - name = 'scripted_metric' + name = "scripted_metric" + class Stats(Agg): - name = 'stats' + name = "stats" + class Sum(Agg): - name = 'sum' + name = "sum" + + +class TTest(Agg): + name = "t_test" + class ValueCount(Agg): - name = 'value_count' + name = "value_count" + # pipeline aggregations class AvgBucket(Pipeline): - name = 'avg_bucket' + name = "avg_bucket" + class BucketScript(Pipeline): - name = 'bucket_script' + name = "bucket_script" + class BucketSelector(Pipeline): - name = 'bucket_selector' + name = "bucket_selector" + class CumulativeSum(Pipeline): - name = 'cumulative_sum' + name = "cumulative_sum" + + +class CumulativeCardinality(Pipeline): + name = "cumulative_cardinality" + class Derivative(Pipeline): - name = 'derivative' + name = "derivative" + class ExtendedStatsBucket(Pipeline): - name = 'extended_stats_bucket' + name = "extended_stats_bucket" + + +class Inference(Pipeline): + name = "inference" + class MaxBucket(Pipeline): - name = 'max_bucket' + name = "max_bucket" + class MinBucket(Pipeline): - name = 'min_bucket' + name = "min_bucket" + class MovingFn(Pipeline): - name = 'moving_fn' + name = "moving_fn" + class MovingAvg(Pipeline): - name = 'moving_avg' + name = "moving_avg" + + +class MovingPercentiles(Pipeline): + name = "moving_percentiles" + + +class Normalize(Pipeline): + name = "normalize" + class PercentilesBucket(Pipeline): - name = 'percentiles_bucket' + name = "percentiles_bucket" + class SerialDiff(Pipeline): - name = 'serial_diff' + name = "serial_diff" + class StatsBucket(Pipeline): - name = 'stats_bucket' + name = "stats_bucket" + class SumBucket(Pipeline): - name = 'sum_bucket' + name = "sum_bucket" + class BucketSort(Pipeline): - name = 'bucket_sort' + name = "bucket_sort" diff --git a/elasticsearch_dsl/analysis.py b/elasticsearch_dsl/analysis.py index ffa6a904e..115b2fab2 100644 --- a/elasticsearch_dsl/analysis.py +++ b/elasticsearch_dsl/analysis.py @@ -1,28 +1,48 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + import six from .connections import get_connection from .utils import AttrDict, DslBase, merge -__all__ = [ - 'tokenizer', 'analyzer', 'char_filter', 'token_filter', 'normalizer' -] +__all__ = ["tokenizer", "analyzer", "char_filter", "token_filter", "normalizer"] + class AnalysisBase(object): @classmethod def _type_shortcut(cls, name_or_instance, type=None, **kwargs): if isinstance(name_or_instance, cls): if type or kwargs: - raise ValueError('%s() cannot accept parameters.' % cls.__name__) + raise ValueError("%s() cannot accept parameters." % cls.__name__) return name_or_instance if not (type or kwargs): - return cls.get_dsl_class('builtin')(name_or_instance) + return cls.get_dsl_class("builtin")(name_or_instance) + + return cls.get_dsl_class(type, "custom")( + name_or_instance, type or "custom", **kwargs + ) - return cls.get_dsl_class(type, 'custom')(name_or_instance, type or 'custom', **kwargs) class CustomAnalysis(object): - name = 'custom' - def __init__(self, filter_name, builtin_type='custom', **kwargs): + name = "custom" + + def __init__(self, filter_name, builtin_type="custom", **kwargs): self._builtin_type = builtin_type self._name = filter_name super(CustomAnalysis, self).__init__(**kwargs) @@ -34,38 +54,47 @@ def to_dict(self): def get_definition(self): d = super(CustomAnalysis, self).to_dict() d = d.pop(self.name) - d['type'] = self._builtin_type + d["type"] = self._builtin_type return d + class CustomAnalysisDefinition(CustomAnalysis): def get_analysis_definition(self): out = {self._type_name: {self._name: self.get_definition()}} - t = getattr(self, 'tokenizer', None) - if 'tokenizer' in self._param_defs and hasattr(t, 'get_definition'): - out['tokenizer'] = {t._name: t.get_definition()} + t = getattr(self, "tokenizer", None) + if "tokenizer" in self._param_defs and hasattr(t, "get_definition"): + out["tokenizer"] = {t._name: t.get_definition()} - filters = {f._name: f.get_definition() - for f in self.filter if hasattr(f, 'get_definition')} + filters = { + f._name: f.get_definition() + for f in self.filter + if hasattr(f, "get_definition") + } if filters: - out['filter'] = filters + out["filter"] = filters # any sub filter definitions like multiplexers etc? for f in self.filter: - if hasattr(f, 'get_analysis_definition'): + if hasattr(f, "get_analysis_definition"): d = f.get_analysis_definition() if d: merge(out, d, True) - char_filters = {f._name: f.get_definition() - for f in self.char_filter if hasattr(f, 'get_definition')} + char_filters = { + f._name: f.get_definition() + for f in self.char_filter + if hasattr(f, "get_definition") + } if char_filters: - out['char_filter'] = char_filters + out["char_filter"] = char_filters return out + class BuiltinAnalysis(object): - name = 'builtin' + name = "builtin" + def __init__(self, name): self._name = name super(BuiltinAnalysis, self).__init__() @@ -74,22 +103,25 @@ def to_dict(self): # only name to present in lists return self._name + class Analyzer(AnalysisBase, DslBase): - _type_name = 'analyzer' + _type_name = "analyzer" name = None + class BuiltinAnalyzer(BuiltinAnalysis, Analyzer): def get_analysis_definition(self): return {} + class CustomAnalyzer(CustomAnalysisDefinition, Analyzer): _param_defs = { - 'filter': {'type': 'token_filter', 'multi': True}, - 'char_filter': {'type': 'char_filter', 'multi': True}, - 'tokenizer': {'type': 'tokenizer'}, + "filter": {"type": "token_filter", "multi": True}, + "char_filter": {"type": "char_filter", "multi": True}, + "tokenizer": {"type": "tokenizer"}, } - def simulate(self, text, using='default', explain=False, attributes=None): + def simulate(self, text, using="default", explain=False, attributes=None): """ Use the Analyze API of elasticsearch to test the outcome of this analyzer. @@ -103,14 +135,14 @@ def simulate(self, text, using='default', explain=False, attributes=None): """ es = get_connection(using) - body = {'text': text, 'explain': explain} + body = {"text": text, "explain": explain} if attributes: - body['attributes'] = attributes + body["attributes"] = attributes definition = self.get_analysis_definition() analyzer_def = self.get_definition() - for section in ('tokenizer', 'char_filter', 'filter'): + for section in ("tokenizer", "char_filter", "filter"): if section not in analyzer_def: continue sec_def = definition.get(section, {}) @@ -119,109 +151,127 @@ def simulate(self, text, using='default', explain=False, attributes=None): if isinstance(sec_names, six.string_types): body[section] = sec_def.get(sec_names, sec_names) else: - body[section] = [sec_def.get(sec_name, sec_name) for sec_name in sec_names] + body[section] = [ + sec_def.get(sec_name, sec_name) for sec_name in sec_names + ] - if self._builtin_type != 'custom': - body['analyzer'] = self._builtin_type + if self._builtin_type != "custom": + body["analyzer"] = self._builtin_type return AttrDict(es.indices.analyze(body=body)) + class Normalizer(AnalysisBase, DslBase): - _type_name = 'normalizer' + _type_name = "normalizer" name = None + class BuiltinNormalizer(BuiltinAnalysis, Normalizer): def get_analysis_definition(self): return {} + class CustomNormalizer(CustomAnalysisDefinition, Normalizer): _param_defs = { - 'filter': {'type': 'token_filter', 'multi': True}, - 'char_filter': {'type': 'char_filter', 'multi': True} + "filter": {"type": "token_filter", "multi": True}, + "char_filter": {"type": "char_filter", "multi": True}, } + class Tokenizer(AnalysisBase, DslBase): - _type_name = 'tokenizer' + _type_name = "tokenizer" name = None + class BuiltinTokenizer(BuiltinAnalysis, Tokenizer): pass + class CustomTokenizer(CustomAnalysis, Tokenizer): pass class TokenFilter(AnalysisBase, DslBase): - _type_name = 'token_filter' + _type_name = "token_filter" name = None + class BuiltinTokenFilter(BuiltinAnalysis, TokenFilter): pass + class CustomTokenFilter(CustomAnalysis, TokenFilter): pass + class MultiplexerTokenFilter(CustomTokenFilter): - name = 'multiplexer' + name = "multiplexer" def get_definition(self): d = super(CustomTokenFilter, self).get_definition() - if 'filters' in d: - d['filters'] = [ + if "filters" in d: + d["filters"] = [ # comma delimited string given by user fs if isinstance(fs, six.string_types) else # list of strings or TokenFilter objects - ', '.join(f.to_dict() if hasattr(f, 'to_dict') else f for f in fs) - + ", ".join(f.to_dict() if hasattr(f, "to_dict") else f for f in fs) for fs in self.filters ] return d def get_analysis_definition(self): - if not hasattr(self, 'filters'): + if not hasattr(self, "filters"): return {} fs = {} - d = {'filter': fs} + d = {"filter": fs} for filters in self.filters: if isinstance(filters, six.string_types): continue - fs.update({f._name: f.get_definition() - for f in filters if hasattr(f, 'get_definition')}) + fs.update( + { + f._name: f.get_definition() + for f in filters + if hasattr(f, "get_definition") + } + ) return d + class ConditionalTokenFilter(CustomTokenFilter): - name = 'condition' + name = "condition" def get_definition(self): d = super(CustomTokenFilter, self).get_definition() - if 'filter' in d: - d['filter'] = [ - f.to_dict() if hasattr(f, 'to_dict') else f - for f in self.filter + if "filter" in d: + d["filter"] = [ + f.to_dict() if hasattr(f, "to_dict") else f for f in self.filter ] return d def get_analysis_definition(self): - if not hasattr(self, 'filter'): + if not hasattr(self, "filter"): return {} return { - 'filter': { + "filter": { f._name: f.get_definition() - for f in self.filter if hasattr(f, 'get_definition') + for f in self.filter + if hasattr(f, "get_definition") } } class CharFilter(AnalysisBase, DslBase): - _type_name = 'char_filter' + _type_name = "char_filter" name = None + class BuiltinCharFilter(BuiltinAnalysis, CharFilter): pass + class CustomCharFilter(CustomAnalysis, CharFilter): pass diff --git a/elasticsearch_dsl/connections.py b/elasticsearch_dsl/connections.py index 40f5b6a08..cdb490f3f 100644 --- a/elasticsearch_dsl/connections.py +++ b/elasticsearch_dsl/connections.py @@ -1,14 +1,37 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from elasticsearch import Elasticsearch, __version__ from six import string_types -from elasticsearch import Elasticsearch - from .serializer import serializer +# The 'body' parameter was deprecated in favor of named +# body parameters in version 7.15.0 of the client. The relevant APIs +# affected include 'search', 'index', 'update', and 'indices.create' +CLIENT_HAS_NAMED_BODY_PARAMS = __version__ >= (7, 15, 0) + + class Connections(object): """ Class responsible for holding connections to different clusters. Used as a singleton in this module. """ + def __init__(self): self._kwargs = {} self._conns = {} @@ -55,18 +78,18 @@ def remove_connection(self, alias): errors += 1 if errors == 2: - raise KeyError('There is no connection with alias %r.' % alias) + raise KeyError("There is no connection with alias %r." % alias) - def create_connection(self, alias='default', **kwargs): + def create_connection(self, alias="default", **kwargs): """ Construct an instance of ``elasticsearch.Elasticsearch`` and register it under given alias. """ - kwargs.setdefault('serializer', serializer) + kwargs.setdefault("serializer", serializer) conn = self._conns[alias] = Elasticsearch(**kwargs) return conn - def get_connection(self, alias='default'): + def get_connection(self, alias="default"): """ Retrieve a connection, construct it if necessary (only configuration was passed to us). If a non-string alias has been passed through we @@ -91,7 +114,8 @@ def get_connection(self, alias='default'): return self.create_connection(alias, **self._kwargs[alias]) except KeyError: # no connection and no kwargs to set one up - raise KeyError('There is no connection with alias %r.' % alias) + raise KeyError("There is no connection with alias %r." % alias) + connections = Connections() configure = connections.configure diff --git a/elasticsearch_dsl/document.py b/elasticsearch_dsl/document.py index e2d25bae8..995481b10 100644 --- a/elasticsearch_dsl/document.py +++ b/elasticsearch_dsl/document.py @@ -1,3 +1,20 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + try: import collections.abc as collections_abc # only works on python 3.3+ except ImportError: @@ -8,7 +25,7 @@ from elasticsearch.exceptions import NotFoundError, RequestError from six import add_metaclass, iteritems -from .connections import get_connection +from .connections import CLIENT_HAS_NAMED_BODY_PARAMS, get_connection from .exceptions import IllegalOperation, ValidationException from .field import Field from .index import Index @@ -25,9 +42,10 @@ def __init__(self, *args, **kwargs): class DocumentMeta(type): def __new__(cls, name, bases, attrs): # DocumentMeta filters attrs in place - attrs['_doc_type'] = DocumentOptions(name, bases, attrs) + attrs["_doc_type"] = DocumentOptions(name, bases, attrs) return super(DocumentMeta, cls).__new__(cls, name, bases, attrs) + class IndexMeta(DocumentMeta): # global flag to guard us from associating an Index with the base Document # class, only user defined subclasses should have an _index attr @@ -36,7 +54,7 @@ class IndexMeta(DocumentMeta): def __new__(cls, name, bases, attrs): new_cls = super(IndexMeta, cls).__new__(cls, name, bases, attrs) if cls._document_initialized: - index_opts = attrs.pop('Index', None) + index_opts = attrs.pop("Index", None) index = cls.construct_index(index_opts, bases) new_cls._index = index index.document(new_cls) @@ -47,29 +65,26 @@ def __new__(cls, name, bases, attrs): def construct_index(cls, opts, bases): if opts is None: for b in bases: - if hasattr(b, '_index'): + if hasattr(b, "_index"): return b._index # Set None as Index name so it will set _all while making the query return Index(name=None) - i = Index( - getattr(opts, 'name', '*'), - using=getattr(opts, 'using', 'default') - ) - i.settings(**getattr(opts, 'settings', {})) - i.aliases(**getattr(opts, 'aliases', {})) - for a in getattr(opts, 'analyzers', ()): + i = Index(getattr(opts, "name", "*"), using=getattr(opts, "using", "default")) + i.settings(**getattr(opts, "settings", {})) + i.aliases(**getattr(opts, "aliases", {})) + for a in getattr(opts, "analyzers", ()): i.analyzer(a) return i class DocumentOptions(object): def __init__(self, name, bases, attrs): - meta = attrs.pop('Meta', None) + meta = attrs.pop("Meta", None) # create the mapping instance - self.mapping = getattr(meta, 'mapping', Mapping()) + self.mapping = getattr(meta, "mapping", Mapping()) # register all declared fields into the mapping for name, value in list(iteritems(attrs)): @@ -85,7 +100,7 @@ def __init__(self, name, bases, attrs): # document inheritance - include the fields from parents' mappings for b in bases: - if hasattr(b, '_doc_type') and hasattr(b._doc_type, 'mapping'): + if hasattr(b, "_doc_type") and hasattr(b._doc_type, "mapping"): self.mapping.update(b._doc_type.mapping, update_only=True) @property @@ -98,22 +113,25 @@ class InnerDoc(ObjectBase): """ Common class for inner documents like Object or Nested """ + @classmethod def from_es(cls, data, data_only=False): if data_only: - data = {'_source': data} + data = {"_source": data} return super(InnerDoc, cls).from_es(data) + @add_metaclass(IndexMeta) class Document(ObjectBase): """ Model-like class for persisting documents in elasticsearch. """ + @classmethod def _matches(cls, hit): if cls._index._name is None: return True - return fnmatch(hit.get('_index', ''), cls._index._name) + return fnmatch(hit.get("_index", ""), cls._index._name) @classmethod def _get_using(cls, using=None): @@ -139,20 +157,23 @@ def init(cls, index=None, using=None): def _get_index(self, index=None, required=True): if index is None: - index = getattr(self.meta, 'index', None) + index = getattr(self.meta, "index", None) if index is None: - index = getattr(self._index, '_name', None) + index = getattr(self._index, "_name", None) if index is None and required: - raise ValidationException('No index') - if index and '*' in index: - raise ValidationException('You cannot write to a wildcard index.') + raise ValidationException("No index") + if index and "*" in index: + raise ValidationException("You cannot write to a wildcard index.") return index def __repr__(self): - return '{}({})'.format( + return "{}({})".format( self.__class__.__name__, - ', '.join('{}={!r}'.format(key, getattr(self.meta, key)) for key in - ('index', 'id') if key in self.meta) + ", ".join( + "{}={!r}".format(key, getattr(self.meta, key)) + for key in ("index", "id") + if key in self.meta + ), ) @classmethod @@ -162,9 +183,7 @@ def search(cls, using=None, index=None): over this ``Document``. """ return Search( - using=cls._get_using(using), - index=cls._default_index(index), - doc_type=[cls] + using=cls._get_using(using), index=cls._default_index(index), doc_type=[cls] ) @classmethod @@ -181,18 +200,31 @@ def get(cls, id, using=None, index=None, **kwargs): ``Elasticsearch.get`` unchanged. """ es = cls._get_connection(using) - doc = es.get( - index=cls._default_index(index), - id=id, - **kwargs - ) - if not doc.get('found', False): + doc = es.get(index=cls._default_index(index), id=id, **kwargs) + if not doc.get("found", False): return None return cls.from_es(doc) @classmethod - def mget(cls, docs, using=None, index=None, raise_on_error=True, - missing='none', **kwargs): + def exists(cls, id, using=None, index=None, **kwargs): + """ + check if exists a single document from elasticsearch using its ``id``. + + :arg id: ``id`` of the document to check if exists + :arg index: elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + + Any additional keyword arguments will be passed to + ``Elasticsearch.exists`` unchanged. + """ + es = cls._get_connection(using) + return es.exists(index=cls._default_index(index), id=id, **kwargs) + + @classmethod + def mget( + cls, docs, using=None, index=None, raise_on_error=True, missing="none", **kwargs + ): r""" Retrieve multiple document by their ``id``\s. Returns a list of instances in the same order as requested. @@ -210,24 +242,20 @@ def mget(cls, docs, using=None, index=None, raise_on_error=True, Any additional keyword arguments will be passed to ``Elasticsearch.mget`` unchanged. """ - if missing not in ('raise', 'skip', 'none'): + if missing not in ("raise", "skip", "none"): raise ValueError("'missing' must be 'raise', 'skip', or 'none'.") es = cls._get_connection(using) body = { - 'docs': [ - doc if isinstance(doc, collections_abc.Mapping) else {'_id': doc} + "docs": [ + doc if isinstance(doc, collections_abc.Mapping) else {"_id": doc} for doc in docs ] } - results = es.mget( - body, - index=cls._default_index(index), - **kwargs - ) + results = es.mget(body, index=cls._default_index(index), **kwargs) objs, error_docs, missing_docs = [], [], [] - for doc in results['docs']: - if doc.get('found'): + for doc in results["docs"]: + if doc.get("found"): if error_docs or missing_docs: # We're going to raise an exception anyway, so avoid an # expensive call to cls.from_es(). @@ -235,27 +263,27 @@ def mget(cls, docs, using=None, index=None, raise_on_error=True, objs.append(cls.from_es(doc)) - elif doc.get('error'): + elif doc.get("error"): if raise_on_error: error_docs.append(doc) - if missing == 'none': + if missing == "none": objs.append(None) # The doc didn't cause an error, but the doc also wasn't found. - elif missing == 'raise': + elif missing == "raise": missing_docs.append(doc) - elif missing == 'none': + elif missing == "none": objs.append(None) if error_docs: - error_ids = [doc['_id'] for doc in error_docs] - message = 'Required routing not provided for documents %s.' - message %= ', '.join(error_ids) + error_ids = [doc["_id"] for doc in error_docs] + message = "Required routing not provided for documents %s." + message %= ", ".join(error_ids) raise RequestError(400, message, error_docs) if missing_docs: - missing_ids = [doc['_id'] for doc in missing_docs] - message = 'Documents %s not found.' % ', '.join(missing_ids) - raise NotFoundError(404, message, {'docs': missing_docs}) + missing_ids = [doc["_id"] for doc in missing_docs] + message = "Documents %s not found." % ", ".join(missing_ids) + raise NotFoundError(404, message, {"docs": missing_docs}) return objs def delete(self, using=None, index=None, **kwargs): @@ -271,22 +299,15 @@ def delete(self, using=None, index=None, **kwargs): """ es = self._get_connection(using) # extract routing etc from meta - doc_meta = { - k: self.meta[k] - for k in DOC_META_FIELDS - if k in self.meta - } + doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} # Optimistic concurrency control - if 'seq_no' in self.meta and 'primary_term' in self.meta: - doc_meta['if_seq_no'] = self.meta['seq_no'] - doc_meta['if_primary_term'] = self.meta['primary_term'] + if "seq_no" in self.meta and "primary_term" in self.meta: + doc_meta["if_seq_no"] = self.meta["seq_no"] + doc_meta["if_primary_term"] = self.meta["primary_term"] doc_meta.update(kwargs) - es.delete( - index=self._get_index(index), - **doc_meta - ) + es.delete(index=self._get_index(index), **doc_meta) def to_dict(self, include_meta=False, skip_empty=True): """ @@ -304,24 +325,31 @@ def to_dict(self, include_meta=False, skip_empty=True): if not include_meta: return d - meta = { - '_' + k: self.meta[k] - for k in DOC_META_FIELDS - if k in self.meta - } + meta = {"_" + k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} # in case of to_dict include the index unlike save/update/delete index = self._get_index(required=False) if index is not None: - meta['_index'] = index + meta["_index"] = index - meta['_source'] = d + meta["_source"] = d return meta - def update(self, using=None, index=None, detect_noop=True, - doc_as_upsert=False, refresh=False, retry_on_conflict=None, - script=None, script_id=None, scripted_upsert=False, upsert=None, - **fields): + def update( + self, + using=None, + index=None, + detect_noop=True, + doc_as_upsert=False, + refresh=False, + retry_on_conflict=None, + script=None, + script_id=None, + scripted_upsert=False, + upsert=None, + return_doc_meta=False, + **fields + ): """ Partial update of the document, specify fields you wish to update and both the instance and the document in elasticsearch will be updated:: @@ -345,34 +373,38 @@ def update(self, using=None, index=None, detect_noop=True, :arg doc_as_upsert: Instead of sending a partial doc plus an upsert doc, setting doc_as_upsert to true will use the contents of doc as the upsert value + :arg return_doc_meta: set to ``True`` to return all metadata from the + index API call instead of only the operation result :return operation result noop/updated """ body = { - 'doc_as_upsert': doc_as_upsert, - 'detect_noop': detect_noop, + "doc_as_upsert": doc_as_upsert, + "detect_noop": detect_noop, } # scripted update if script or script_id: if upsert is not None: - body['upsert'] = upsert + body["upsert"] = upsert if script: - script = {'source': script} + script = {"source": script} else: - script = {'id': script_id} + script = {"id": script_id} - script['params'] = fields + script["params"] = fields - body['script'] = script - body['scripted_upsert'] = scripted_upsert + body["script"] = script + body["scripted_upsert"] = scripted_upsert # partial document update else: if not fields: - raise IllegalOperation('You cannot call update() without updating individual fields or a script. ' - 'If you wish to update the entire object use save().') + raise IllegalOperation( + "You cannot call update() without updating individual fields or a script. " + "If you wish to update the entire object use save()." + ) # update given fields locally merge(self, fields) @@ -381,40 +413,49 @@ def update(self, using=None, index=None, detect_noop=True, values = self.to_dict() # if fields were given: partial update - body['doc'] = { - k: values.get(k) - for k in fields.keys() - } + body["doc"] = {k: values.get(k) for k in fields.keys()} # extract routing etc from meta - doc_meta = { - k: self.meta[k] - for k in DOC_META_FIELDS - if k in self.meta - } + params = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} if retry_on_conflict is not None: - doc_meta['retry_on_conflict'] = retry_on_conflict + params["retry_on_conflict"] = retry_on_conflict # Optimistic concurrency control - if 'seq_no' in self.meta and 'primary_term' in self.meta: - doc_meta['if_seq_no'] = self.meta['seq_no'] - doc_meta['if_primary_term'] = self.meta['primary_term'] + if ( + retry_on_conflict in (None, 0) + and "seq_no" in self.meta + and "primary_term" in self.meta + ): + params["if_seq_no"] = self.meta["seq_no"] + params["if_primary_term"] = self.meta["primary_term"] + + params["refresh"] = refresh + + if CLIENT_HAS_NAMED_BODY_PARAMS: + params.update(body) + else: + params["body"] = body meta = self._get_connection(using).update( - index=self._get_index(index), - body=body, - refresh=refresh, - **doc_meta + index=self._get_index(index), **params ) # update meta information from ES for k in META_FIELDS: - if '_' + k in meta: - setattr(self.meta, k, meta['_' + k]) - - return meta['result'] - - def save(self, using=None, index=None, validate=True, skip_empty=True, **kwargs): + if "_" + k in meta: + setattr(self.meta, k, meta["_" + k]) + + return meta if return_doc_meta else meta["result"] + + def save( + self, + using=None, + index=None, + validate=True, + skip_empty=True, + return_doc_meta=False, + **kwargs + ): """ Save the document into elasticsearch. If the document doesn't exist it is created, it is overwritten otherwise. Returns ``True`` if this @@ -427,6 +468,8 @@ def save(self, using=None, index=None, validate=True, skip_empty=True, **kwargs) :arg skip_empty: if set to ``False`` will cause empty values (``None``, ``[]``, ``{}``) to be left on the document. Those values will be stripped out otherwise as they make no difference in elasticsearch. + :arg return_doc_meta: set to ``True`` to return all metadata from the + update API call instead of only the operation result Any additional keyword arguments will be passed to ``Elasticsearch.index`` unchanged. @@ -438,27 +481,23 @@ def save(self, using=None, index=None, validate=True, skip_empty=True, **kwargs) es = self._get_connection(using) # extract routing etc from meta - doc_meta = { - k: self.meta[k] - for k in DOC_META_FIELDS - if k in self.meta - } + params = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} # Optimistic concurrency control - if 'seq_no' in self.meta and 'primary_term' in self.meta: - doc_meta['if_seq_no'] = self.meta['seq_no'] - doc_meta['if_primary_term'] = self.meta['primary_term'] + if "seq_no" in self.meta and "primary_term" in self.meta: + params["if_seq_no"] = self.meta["seq_no"] + params["if_primary_term"] = self.meta["primary_term"] - doc_meta.update(kwargs) - meta = es.index( - index=self._get_index(index), - body=self.to_dict(skip_empty=skip_empty), - **doc_meta - ) + if CLIENT_HAS_NAMED_BODY_PARAMS: + params["document"] = self.to_dict(skip_empty=skip_empty) + else: + params["body"] = self.to_dict(skip_empty=skip_empty) + + params.update(kwargs) + meta = es.index(index=self._get_index(index), **params) # update meta information from ES for k in META_FIELDS: - if '_' + k in meta: - setattr(self.meta, k, meta['_' + k]) - - return meta['result'] + if "_" + k in meta: + setattr(self.meta, k, meta["_" + k]) + return meta if return_doc_meta else meta["result"] diff --git a/elasticsearch_dsl/exceptions.py b/elasticsearch_dsl/exceptions.py index 0ed5863d7..8aae0ffa8 100644 --- a/elasticsearch_dsl/exceptions.py +++ b/elasticsearch_dsl/exceptions.py @@ -1,3 +1,21 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + class ElasticsearchDslException(Exception): pass diff --git a/elasticsearch_dsl/faceted_search.py b/elasticsearch_dsl/faceted_search.py index 90a0282f1..eb80b686b 100644 --- a/elasticsearch_dsl/faceted_search.py +++ b/elasticsearch_dsl/faceted_search.py @@ -1,31 +1,55 @@ -from datetime import timedelta, datetime +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datetime import datetime, timedelta + from six import iteritems, itervalues -from .search import Search from .aggs import A -from .utils import AttrDict +from .query import MatchAll, Nested, Range, Terms from .response import Response -from .query import Terms, Nested, Range, MatchAll +from .search import Search +from .utils import AttrDict __all__ = [ - 'FacetedSearch', 'HistogramFacet', 'TermsFacet', 'DateHistogramFacet', 'RangeFacet', - 'NestedFacet', + "FacetedSearch", + "HistogramFacet", + "TermsFacet", + "DateHistogramFacet", + "RangeFacet", + "NestedFacet", ] + class Facet(object): """ A facet on faceted search. Wraps and aggregation and provides functionality to create a filter for selected values and return a list of facet values from the result of the aggregation. """ + agg_type = None - def __init__(self, metric=None, metric_sort='desc', **kwargs): + def __init__(self, metric=None, metric_sort="desc", **kwargs): self.filter_values = () self._params = kwargs self._metric = metric if metric and metric_sort: - self._params['order'] = {'metric': metric_sort} + self._params["order"] = {"metric": metric_sort} def get_aggregation(self): """ @@ -33,7 +57,7 @@ def get_aggregation(self): """ agg = A(self.agg_type, **self._params) if self._metric: - agg.metric('metric', self._metric) + agg.metric("metric", self._metric) return agg def add_filter(self, filter_values): @@ -64,15 +88,15 @@ def get_value(self, bucket): """ return a value representing a bucket. Its key as default. """ - return bucket['key'] + return bucket["key"] def get_metric(self, bucket): """ Return a metric, by default doc_count for a bucket. """ if self._metric: - return bucket['metric']['value'] - return bucket['doc_count'] + return bucket["metric"]["value"] + return bucket["doc_count"] def get_values(self, data, filter_values): """ @@ -83,73 +107,103 @@ def get_values(self, data, filter_values): out = [] for bucket in data.buckets: key = self.get_value(bucket) - out.append(( - key, - self.get_metric(bucket), - self.is_filtered(key, filter_values) - )) + out.append( + (key, self.get_metric(bucket), self.is_filtered(key, filter_values)) + ) return out class TermsFacet(Facet): - agg_type = 'terms' + agg_type = "terms" def add_filter(self, filter_values): - """ Create a terms filter instead of bool containing term filters. """ + """Create a terms filter instead of bool containing term filters.""" if filter_values: - return Terms(_expand__to_dot=False, **{self._params['field']: filter_values}) + return Terms( + _expand__to_dot=False, **{self._params["field"]: filter_values} + ) class RangeFacet(Facet): - agg_type = 'range' + agg_type = "range" def _range_to_dict(self, range): key, range = range - out = {'key': key} + out = {"key": key} if range[0] is not None: - out['from'] = range[0] + out["from"] = range[0] if range[1] is not None: - out['to'] = range[1] + out["to"] = range[1] return out def __init__(self, ranges, **kwargs): super(RangeFacet, self).__init__(**kwargs) - self._params['ranges'] = list(map(self._range_to_dict, ranges)) - self._params['keyed'] = False + self._params["ranges"] = list(map(self._range_to_dict, ranges)) + self._params["keyed"] = False self._ranges = dict(ranges) def get_value_filter(self, filter_value): f, t = self._ranges[filter_value] limits = {} if f is not None: - limits['gte'] = f + limits["gte"] = f if t is not None: - limits['lt'] = t + limits["lt"] = t + + return Range(_expand__to_dot=False, **{self._params["field"]: limits}) - return Range(_expand__to_dot=False, **{ - self._params['field']: limits - }) class HistogramFacet(Facet): - agg_type = 'histogram' + agg_type = "histogram" def get_value_filter(self, filter_value): - return Range(_expand__to_dot=False, **{ - self._params['field']: { - 'gte': filter_value, - 'lt': filter_value + self._params['interval'] + return Range( + _expand__to_dot=False, + **{ + self._params["field"]: { + "gte": filter_value, + "lt": filter_value + self._params["interval"], + } } - }) + ) + + +def _date_interval_year(d): + return d.replace( + year=d.year + 1, day=(28 if d.month == 2 and d.day == 29 else d.day) + ) + + +def _date_interval_month(d): + return (d + timedelta(days=32)).replace(day=1) + + +def _date_interval_week(d): + return d + timedelta(days=7) + + +def _date_interval_day(d): + return d + timedelta(days=1) + + +def _date_interval_hour(d): + return d + timedelta(hours=1) class DateHistogramFacet(Facet): - agg_type = 'date_histogram' + agg_type = "date_histogram" DATE_INTERVALS = { - 'month': lambda d: (d+timedelta(days=32)).replace(day=1), - 'week': lambda d: d+timedelta(days=7), - 'day': lambda d: d+timedelta(days=1), - 'hour': lambda d: d+timedelta(hours=1), + "year": _date_interval_year, + "1Y": _date_interval_year, + "month": _date_interval_month, + "1M": _date_interval_month, + "week": _date_interval_week, + "1w": _date_interval_week, + "day": _date_interval_day, + "1d": _date_interval_day, + "hour": _date_interval_hour, + "1h": _date_interval_hour, } def __init__(self, **kwargs): @@ -157,31 +211,45 @@ def __init__(self, **kwargs): super(DateHistogramFacet, self).__init__(**kwargs) def get_value(self, bucket): - if not isinstance(bucket['key'], datetime): + if not isinstance(bucket["key"], datetime): # Elasticsearch returns key=None instead of 0 for date 1970-01-01, # so we need to set key to 0 to avoid TypeError exception - if bucket['key'] is None: - bucket['key'] = 0 + if bucket["key"] is None: + bucket["key"] = 0 # Preserve milliseconds in the datetime - return datetime.utcfromtimestamp(int(bucket['key']) / 1000.0) + return datetime.utcfromtimestamp(int(bucket["key"]) / 1000.0) else: - return bucket['key'] + return bucket["key"] def get_value_filter(self, filter_value): - return Range(_expand__to_dot=False, **{ - self._params['field']: { - 'gte': filter_value, - 'lt': self.DATE_INTERVALS[self._params['interval']](filter_value) + for interval_type in ("calendar_interval", "fixed_interval"): + if interval_type in self._params: + break + else: + interval_type = "interval" + + return Range( + _expand__to_dot=False, + **{ + self._params["field"]: { + "gte": filter_value, + "lt": self.DATE_INTERVALS[self._params[interval_type]]( + filter_value + ), + } } - }) + ) + class NestedFacet(Facet): - agg_type = 'nested' + agg_type = "nested" def __init__(self, path, nested_facet): self._path = path self._inner = nested_facet - super(NestedFacet, self).__init__(path=path, aggs={'inner': nested_facet.get_aggregation()}) + super(NestedFacet, self).__init__( + path=path, aggs={"inner": nested_facet.get_aggregation()} + ) def get_values(self, data, filter_values): return self._inner.get_values(data.inner, filter_values) @@ -191,6 +259,7 @@ def add_filter(self, filter_values): if inner_q: return Nested(path=self._path, query=inner_q) + class FacetedResponse(Response): @property def query_string(self): @@ -198,12 +267,12 @@ def query_string(self): @property def facets(self): - if not hasattr(self, '_facets'): - super(AttrDict, self).__setattr__('_facets', AttrDict({})) + if not hasattr(self, "_facets"): + super(AttrDict, self).__setattr__("_facets", AttrDict({})) for name, facet in iteritems(self._faceted_search.facets): self._facets[name] = facet.get_values( - getattr(getattr(self.aggregations, '_filter_' + name), name), - self._faceted_search.filter_values.get(name, ()) + getattr(getattr(self.aggregations, "_filter_" + name), name), + self._faceted_search.filter_values.get(name, ()), ) return self._facets @@ -249,11 +318,12 @@ def search(self): ) """ + index = None doc_types = None fields = None facets = {} - using = 'default' + using = "default" def __init__(self, query=None, filters={}, sort=()): """ @@ -288,7 +358,9 @@ def add_filter(self, name, filter_values): if not isinstance(filter_values, (tuple, list)): if filter_values is None: return - filter_values = [filter_values, ] + filter_values = [ + filter_values, + ] # remember the filter values for use in FacetedResponse self.filter_values[name] = filter_values @@ -318,9 +390,9 @@ def query(self, search, query): """ if query: if self.fields: - return search.query('multi_match', fields=self.fields, query=query) + return search.query("multi_match", fields=self.fields, query=query) else: - return search.query('multi_match', query=query) + return search.query("multi_match", query=query) return search def aggregate(self, search): @@ -335,11 +407,9 @@ def aggregate(self, search): if f == field: continue agg_filter &= filter - search.aggs.bucket( - '_filter_' + f, - 'filter', - filter=agg_filter - ).bucket(f, agg) + search.aggs.bucket("_filter_" + f, "filter", filter=agg_filter).bucket( + f, agg + ) def filter(self, search): """ @@ -358,8 +428,9 @@ def highlight(self, search): """ Add highlighting for all the fields """ - return search.highlight(*(f if '^' not in f else f.split('^', 1)[0] - for f in self.fields)) + return search.highlight( + *(f if "^" not in f else f.split("^", 1)[0] for f in self.fields) + ) def sort(self, search): """ diff --git a/elasticsearch_dsl/field.py b/elasticsearch_dsl/field.py index be87bf10c..06a10f2a2 100644 --- a/elasticsearch_dsl/field.py +++ b/elasticsearch_dsl/field.py @@ -1,4 +1,22 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + import base64 +import copy import ipaddress try: @@ -9,46 +27,53 @@ from datetime import date, datetime from dateutil import parser, tz -from six import string_types, iteritems, integer_types +from six import integer_types, iteritems, string_types from six.moves import map -from .query import Q -from .utils import DslBase, AttrDict, AttrList from .exceptions import ValidationException +from .query import Q +from .utils import AttrDict, AttrList, DslBase from .wrappers import Range -unicode = type(u'') +unicode = type(u"") + def construct_field(name_or_field, **params): # {"type": "text", "analyzer": "snowball"} if isinstance(name_or_field, collections_abc.Mapping): if params: - raise ValueError('construct_field() cannot accept parameters when passing in a dict.') + raise ValueError( + "construct_field() cannot accept parameters when passing in a dict." + ) params = name_or_field.copy() - if 'type' not in params: + if "type" not in params: # inner object can be implicitly defined - if 'properties' in params: - name = 'object' + if "properties" in params: + name = "object" else: raise ValueError('construct_field() needs to have a "type" key.') else: - name = params.pop('type') + name = params.pop("type") return Field.get_dsl_class(name)(**params) # Text() if isinstance(name_or_field, Field): if params: - raise ValueError('construct_field() cannot accept parameters when passing in a construct_field object.') + raise ValueError( + "construct_field() cannot accept parameters " + "when passing in a construct_field object." + ) return name_or_field # "text", analyzer="snowball" return Field.get_dsl_class(name_or_field)(**params) + class Field(DslBase): - _type_name = 'field' + _type_name = "field" _type_shortcut = staticmethod(construct_field) # all fields can be multifields - _param_defs = {'fields': {'type': 'field', 'hash': True}} + _param_defs = {"fields": {"type": "field", "hash": True}} name = None _coerce = False @@ -62,7 +87,7 @@ def __init__(self, multi=False, required=False, *args, **kwargs): super(Field, self).__init__(*args, **kwargs) def __getitem__(self, subfield): - return self._params.get('fields', {})[subfield] + return self._params.get("fields", {})[subfield] def _serialize(self, data): return data @@ -85,10 +110,7 @@ def serialize(self, data): def deserialize(self, data): if isinstance(data, (list, AttrList, tuple)): - data = [ - None if d is None else self._deserialize(d) - for d in data - ] + data = [None if d is None else self._deserialize(d) for d in data] return data if data is None: return None @@ -104,11 +126,12 @@ def clean(self, data): def to_dict(self): d = super(Field, self).to_dict() name, value = d.popitem() - value['type'] = name + value["type"] = name return value + class CustomField(Field): - name = 'custom' + name = "custom" _coerce = True def to_dict(self): @@ -116,11 +139,12 @@ def to_dict(self): return self.builtin_type.to_dict() d = super(CustomField, self).to_dict() - d['type'] = self.builtin_type + d["type"] = self.builtin_type return d + class Object(Field): - name = 'object' + name = "object" _coerce = True def __init__(self, doc_class=None, dynamic=None, properties=None, **kwargs): @@ -138,20 +162,22 @@ def __init__(self, doc_class=None, dynamic=None, properties=None, **kwargs): """ if doc_class and (properties or dynamic is not None): raise ValidationException( - 'doc_class and properties/dynamic should not be provided together') + "doc_class and properties/dynamic should not be provided together" + ) if doc_class: self._doc_class = doc_class else: # FIXME import from .document import InnerDoc + # no InnerDoc subclass, creating one instead... - self._doc_class = type('InnerDoc', (InnerDoc, ), {}) + self._doc_class = type("InnerDoc", (InnerDoc,), {}) for name, field in iteritems(properties or {}): self._doc_class._doc_type.mapping.field(name, field) if dynamic is not None: - self._doc_class._doc_type.mapping.meta('dynamic', dynamic) + self._doc_class._doc_type.mapping.meta("dynamic", dynamic) - self._mapping = self._doc_class._doc_type.mapping + self._mapping = copy.deepcopy(self._doc_class._doc_type.mapping) super(Object, self).__init__(**kwargs) def __getitem__(self, name): @@ -217,15 +243,17 @@ def update(self, other, update_only=False): self._mapping.update(other._mapping, update_only) + class Nested(Object): - name = 'nested' + name = "nested" def __init__(self, *args, **kwargs): - kwargs.setdefault('multi', True) + kwargs.setdefault("multi", True) super(Nested, self).__init__(*args, **kwargs) + class Date(Field): - name = 'date' + name = "date" _coerce = True def __init__(self, default_timezone=None, *args, **kwargs): @@ -243,7 +271,9 @@ def _deserialize(self, data): try: data = parser.parse(data) except Exception as e: - raise ValidationException('Could not parse date from the value (%r)' % data, e) + raise ValidationException( + "Could not parse date from the value (%r)" % data, e + ) if isinstance(data, datetime): if self._default_timezone and data.tzinfo is None: @@ -255,35 +285,43 @@ def _deserialize(self, data): # Divide by a float to preserve milliseconds on the datetime. return datetime.utcfromtimestamp(data / 1000.0) - raise ValidationException('Could not parse date from the value (%r)' % data) + raise ValidationException("Could not parse date from the value (%r)" % data) + class Text(Field): _param_defs = { - 'fields': {'type': 'field', 'hash': True}, - 'analyzer': {'type': 'analyzer'}, - 'search_analyzer': {'type': 'analyzer'}, - 'search_quote_analyzer': {'type': 'analyzer'}, + "fields": {"type": "field", "hash": True}, + "analyzer": {"type": "analyzer"}, + "search_analyzer": {"type": "analyzer"}, + "search_quote_analyzer": {"type": "analyzer"}, } - name = 'text' + name = "text" + class SearchAsYouType(Field): _param_defs = { - 'analyzer': {'type': 'analyzer'}, - 'search_analyzer': {'type': 'analyzer'}, - 'search_quote_analyzer': {'type': 'analyzer'}, + "analyzer": {"type": "analyzer"}, + "search_analyzer": {"type": "analyzer"}, + "search_quote_analyzer": {"type": "analyzer"}, } - name = 'search_as_you_type' + name = "search_as_you_type" + class Keyword(Field): _param_defs = { - 'fields': {'type': 'field', 'hash': True}, - 'search_analyzer': {'type': 'analyzer'}, - 'normalizer': {'type': 'normalizer'} + "fields": {"type": "field", "hash": True}, + "search_analyzer": {"type": "analyzer"}, + "normalizer": {"type": "normalizer"}, } - name = 'keyword' + name = "keyword" + + +class ConstantKeyword(Keyword): + name = "constant_keyword" + class Boolean(Field): - name = 'boolean' + name = "boolean" _coerce = True def _deserialize(self, data): @@ -298,56 +336,74 @@ def clean(self, data): raise ValidationException("Value required for this field.") return data + class Float(Field): - name = 'float' + name = "float" _coerce = True def _deserialize(self, data): return float(data) + class DenseVector(Float): - name = 'dense_vector' + name = "dense_vector" def __init__(self, dims, **kwargs): kwargs["multi"] = True super(DenseVector, self).__init__(dims=dims, **kwargs) + class SparseVector(Field): - name = 'sparse_vector' + name = "sparse_vector" + class HalfFloat(Float): - name = 'half_float' + name = "half_float" + class ScaledFloat(Float): - name = 'scaled_float' + name = "scaled_float" def __init__(self, scaling_factor, *args, **kwargs): - super(ScaledFloat, self).__init__(scaling_factor=scaling_factor, *args, **kwargs) + super(ScaledFloat, self).__init__( + scaling_factor=scaling_factor, *args, **kwargs + ) + class Double(Float): - name = 'double' + name = "double" + class RankFeature(Float): - name = 'rank_feature' + name = "rank_feature" + + +class RankFeatures(Field): + name = "rank_features" + class Integer(Field): - name = 'integer' + name = "integer" _coerce = True def _deserialize(self, data): return int(data) + class Byte(Integer): - name = 'byte' + name = "byte" + class Short(Integer): - name = 'short' + name = "short" + class Long(Integer): - name = 'long' + name = "long" + class Ip(Field): - name = 'ip' + name = "ip" _coerce = True def _deserialize(self, data): @@ -359,8 +415,9 @@ def _serialize(self, data): return None return str(data) + class Binary(Field): - name = 'binary' + name = "binary" _coerce = True def clean(self, data): @@ -376,21 +433,25 @@ def _serialize(self, data): return None return base64.b64encode(data).decode() + class GeoPoint(Field): - name = 'geo_point' + name = "geo_point" + class GeoShape(Field): - name = 'geo_shape' + name = "geo_shape" + class Completion(Field): _param_defs = { - 'analyzer': {'type': 'analyzer'}, - 'search_analyzer': {'type': 'analyzer'}, + "analyzer": {"type": "analyzer"}, + "search_analyzer": {"type": "analyzer"}, } - name = 'completion' + name = "completion" + class Percolator(Field): - name = 'percolator' + name = "percolator" _coerce = True def _deserialize(self, data): @@ -401,6 +462,7 @@ def _serialize(self, data): return None return data.to_dict() + class RangeField(Field): _coerce = True _core_field = None @@ -420,34 +482,42 @@ def _serialize(self, data): class IntegerRange(RangeField): - name = 'integer_range' + name = "integer_range" _core_field = Integer() + class FloatRange(RangeField): - name = 'float_range' + name = "float_range" _core_field = Float() + class LongRange(RangeField): - name = 'long_range' + name = "long_range" _core_field = Long() + class DoubleRange(RangeField): - name = 'double_range' + name = "double_range" _core_field = Double() + class DateRange(RangeField): - name = 'date_range' + name = "date_range" _core_field = Date() + class IpRange(Field): # not a RangeField since ip_range supports CIDR ranges - name = 'ip_range' + name = "ip_range" + class Join(Field): - name = 'join' + name = "join" + class TokenCount(Field): - name = 'token_count' + name = "token_count" + class Murmur3(Field): - name = 'murmur3' + name = "murmur3" diff --git a/elasticsearch_dsl/function.py b/elasticsearch_dsl/function.py index e63768a68..3e91bea86 100644 --- a/elasticsearch_dsl/function.py +++ b/elasticsearch_dsl/function.py @@ -1,3 +1,20 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + try: import collections.abc as collections_abc # only works on python 3.3+ except ImportError: @@ -5,11 +22,12 @@ from .utils import DslBase + def SF(name_or_sf, **params): # {"script_score": {"script": "_score"}, "filter": {}} if isinstance(name_or_sf, collections_abc.Mapping): if params: - raise ValueError('SF() cannot accept parameters when passing in a dict.') + raise ValueError("SF() cannot accept parameters when passing in a dict.") kwargs = {} sf = name_or_sf.copy() for k in ScoreFunction._param_defs: @@ -18,16 +36,16 @@ def SF(name_or_sf, **params): # not sf, so just filter+weight, which used to be boost factor if not sf: - name = 'boost_factor' + name = "boost_factor" # {'FUNCTION': {...}} elif len(sf) == 1: name, params = sf.popitem() else: - raise ValueError('SF() got an unexpected fields in the dictionary: %r' % sf) + raise ValueError("SF() got an unexpected fields in the dictionary: %r" % sf) # boost factor special case, see elasticsearch #6343 if not isinstance(params, collections_abc.Mapping): - params = {'value': params} + params = {"value": params} # mix known params (from _param_defs) and from inside the function kwargs.update(params) @@ -36,19 +54,22 @@ def SF(name_or_sf, **params): # ScriptScore(script="_score", filter=Q()) if isinstance(name_or_sf, ScoreFunction): if params: - raise ValueError('SF() cannot accept parameters when passing in a ScoreFunction object.') + raise ValueError( + "SF() cannot accept parameters when passing in a ScoreFunction object." + ) return name_or_sf # "script_score", script="_score", filter=Q() return ScoreFunction.get_dsl_class(name_or_sf)(**params) + class ScoreFunction(DslBase): - _type_name = 'score_function' + _type_name = "score_function" _type_shortcut = staticmethod(SF) _param_defs = { - 'query': {'type': 'query'}, - 'filter': {'type': 'query'}, - 'weight': {} + "query": {"type": "query"}, + "filter": {"type": "query"}, + "weight": {}, } name = None @@ -60,32 +81,38 @@ def to_dict(self): d[k] = d[self.name].pop(k) return d + class ScriptScore(ScoreFunction): - name = 'script_score' + name = "script_score" + class BoostFactor(ScoreFunction): - name = 'boost_factor' + name = "boost_factor" def to_dict(self): d = super(BoostFactor, self).to_dict() - if 'value' in d[self.name]: - d[self.name] = d[self.name].pop('value') + if "value" in d[self.name]: + d[self.name] = d[self.name].pop("value") else: del d[self.name] return d + class RandomScore(ScoreFunction): - name = 'random_score' + name = "random_score" + class FieldValueFactor(ScoreFunction): - name = 'field_value_factor' + name = "field_value_factor" + class Linear(ScoreFunction): - name = 'linear' + name = "linear" + class Gauss(ScoreFunction): - name = 'gauss' + name = "gauss" -class Exp(ScoreFunction): - name = 'exp' +class Exp(ScoreFunction): + name = "exp" diff --git a/elasticsearch_dsl/index.py b/elasticsearch_dsl/index.py index e62e252ba..328d28761 100644 --- a/elasticsearch_dsl/index.py +++ b/elasticsearch_dsl/index.py @@ -1,5 +1,22 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from . import analysis -from .connections import get_connection +from .connections import CLIENT_HAS_NAMED_BODY_PARAMS, get_connection from .exceptions import IllegalOperation from .mapping import Mapping from .search import Search @@ -13,8 +30,10 @@ def __init__(self, name, template, index=None, order=None, **kwargs): self._index = Index(template, **kwargs) else: if kwargs: - raise ValueError("You cannot specify options for Index when" - " passing an Index instance.") + raise ValueError( + "You cannot specify options for Index when" + " passing an Index instance." + ) self._index = index.clone() self._index._name = template self._template_name = name @@ -25,9 +44,9 @@ def __getattr__(self, attr_name): def to_dict(self): d = self._index.to_dict() - d['index_patterns'] = [self._index._name] + d["index_patterns"] = [self._index._name] if self.order is not None: - d['order'] = self.order + d["order"] = self.order return d def save(self, using=None): @@ -37,7 +56,7 @@ def save(self, using=None): class Index(object): - def __init__(self, name, using='default'): + def __init__(self, name, using="default"): """ :arg name: name of the index :arg using: connection alias to use, defaults to ``'default'`` @@ -59,8 +78,9 @@ def as_template(self, template_name, pattern=None, order=None): # TODO: should we allow pattern to be a top-level arg? # or maybe have an IndexPattern that allows for it and have # Document._index be that? - return IndexTemplate(template_name, pattern or self._name, index=self, - order=order) + return IndexTemplate( + template_name, pattern or self._name, index=self, order=order + ) def resolve_nested(self, field_path): for doc in self._doc_types: @@ -81,7 +101,9 @@ def resolve_field(self, field_path): return None def load_mappings(self, using=None): - self.get_or_create_mapping().update_from_es(self._name, using=using or self._using) + self.get_or_create_mapping().update_from_es( + self._name, using=using or self._using + ) def clone(self, name=None, using=None): """ @@ -109,9 +131,9 @@ def clone(self, name=None, using=None): def _get_connection(self, using=None): if self._name is None: - raise ValueError( - "You cannot perform API calls on the default index.") + raise ValueError("You cannot perform API calls on the default index.") return get_connection(using or self._using) + connection = property(_get_connection) def mapping(self, mapping): @@ -207,9 +229,9 @@ def analyzer(self, *args, **kwargs): def to_dict(self): out = {} if self._settings: - out['settings'] = self._settings + out["settings"] = self._settings if self._aliases: - out['aliases'] = self._aliases + out["aliases"] = self._aliases mappings = self._mapping.to_dict() if self._mapping else {} analysis = self._mapping._collect_analysis() if self._mapping else {} for d in self._doc_types: @@ -217,10 +239,10 @@ def to_dict(self): merge(mappings, mapping.to_dict(), True) merge(analysis, mapping._collect_analysis(), True) if mappings: - out['mappings'] = mappings + out["mappings"] = mappings if analysis or self._analysis: merge(analysis, self._analysis) - out.setdefault('settings', {})['analysis'] = analysis + out.setdefault("settings", {})["analysis"] = analysis return out def search(self, using=None): @@ -230,9 +252,7 @@ def search(self, using=None): ``Document``\\s. """ return Search( - using=using or self._using, - index=self._name, - doc_type=self._doc_types + using=using or self._using, index=self._name, doc_type=self._doc_types ) def updateByQuery(self, using=None): @@ -256,11 +276,21 @@ def create(self, using=None, **kwargs): Any additional keyword arguments will be passed to ``Elasticsearch.indices.create`` unchanged. """ - return self._get_connection(using).indices.create(index=self._name, body=self.to_dict(), **kwargs) + es = self._get_connection(using) + + if CLIENT_HAS_NAMED_BODY_PARAMS: + params = self.to_dict() + else: + params = {"body": self.to_dict()} + params.update(kwargs) + + return es.indices.create(index=self._name, **params) def is_closed(self, using=None): - state = self._get_connection(using).cluster.state(index=self._name, metric='metadata') - return state['metadata']['indices'][self._name]['state'] == 'close' + state = self._get_connection(using).cluster.state( + index=self._name, metric="metadata" + ) + return state["metadata"]["indices"][self._name]["state"] == "close" def save(self, using=None): """ @@ -275,25 +305,30 @@ def save(self, using=None): return self.create(using=using) body = self.to_dict() - settings = body.pop('settings', {}) - analysis = settings.pop('analysis', None) - current_settings = self.get_settings(using=using)[self._name]['settings']['index'] + settings = body.pop("settings", {}) + analysis = settings.pop("analysis", None) + current_settings = self.get_settings(using=using)[self._name]["settings"][ + "index" + ] if analysis: if self.is_closed(using=using): # closed index, update away - settings['analysis'] = analysis + settings["analysis"] = analysis else: # compare analysis definition, if all analysis objects are # already defined as requested, skip analysis update and # proceed, otherwise raise IllegalOperation - existing_analysis = current_settings.get('analysis', {}) + existing_analysis = current_settings.get("analysis", {}) if any( - existing_analysis.get(section, {}).get(k, None) != analysis[section][k] + existing_analysis.get(section, {}).get(k, None) + != analysis[section][k] for section in analysis for k in analysis[section] ): raise IllegalOperation( - 'You cannot update analysis configuration on an open index, you need to close index %s first.' % self._name) + "You cannot update analysis configuration on an open index, " + "you need to close index %s first." % self._name + ) # try and update the settings if settings: @@ -307,7 +342,7 @@ def save(self, using=None): # update the mappings, any conflict in the mappings will result in an # exception - mappings = body.pop('mappings', {}) + mappings = body.pop("mappings", {}) if mappings: self.put_mapping(using=using, body=mappings) @@ -391,7 +426,9 @@ def exists_type(self, using=None, **kwargs): Any additional keyword arguments will be passed to ``Elasticsearch.indices.exists_type`` unchanged. """ - return self._get_connection(using).indices.exists_type(index=self._name, **kwargs) + return self._get_connection(using).indices.exists_type( + index=self._name, **kwargs + ) def put_mapping(self, using=None, **kwargs): """ @@ -400,7 +437,9 @@ def put_mapping(self, using=None, **kwargs): Any additional keyword arguments will be passed to ``Elasticsearch.indices.put_mapping`` unchanged. """ - return self._get_connection(using).indices.put_mapping(index=self._name, **kwargs) + return self._get_connection(using).indices.put_mapping( + index=self._name, **kwargs + ) def get_mapping(self, using=None, **kwargs): """ @@ -409,7 +448,9 @@ def get_mapping(self, using=None, **kwargs): Any additional keyword arguments will be passed to ``Elasticsearch.indices.get_mapping`` unchanged. """ - return self._get_connection(using).indices.get_mapping(index=self._name, **kwargs) + return self._get_connection(using).indices.get_mapping( + index=self._name, **kwargs + ) def get_field_mapping(self, using=None, **kwargs): """ @@ -418,7 +459,9 @@ def get_field_mapping(self, using=None, **kwargs): Any additional keyword arguments will be passed to ``Elasticsearch.indices.get_field_mapping`` unchanged. """ - return self._get_connection(using).indices.get_field_mapping(index=self._name, **kwargs) + return self._get_connection(using).indices.get_field_mapping( + index=self._name, **kwargs + ) def put_alias(self, using=None, **kwargs): """ @@ -436,7 +479,9 @@ def exists_alias(self, using=None, **kwargs): Any additional keyword arguments will be passed to ``Elasticsearch.indices.exists_alias`` unchanged. """ - return self._get_connection(using).indices.exists_alias(index=self._name, **kwargs) + return self._get_connection(using).indices.exists_alias( + index=self._name, **kwargs + ) def get_alias(self, using=None, **kwargs): """ @@ -454,7 +499,9 @@ def delete_alias(self, using=None, **kwargs): Any additional keyword arguments will be passed to ``Elasticsearch.indices.delete_alias`` unchanged. """ - return self._get_connection(using).indices.delete_alias(index=self._name, **kwargs) + return self._get_connection(using).indices.delete_alias( + index=self._name, **kwargs + ) def get_settings(self, using=None, **kwargs): """ @@ -463,7 +510,9 @@ def get_settings(self, using=None, **kwargs): Any additional keyword arguments will be passed to ``Elasticsearch.indices.get_settings`` unchanged. """ - return self._get_connection(using).indices.get_settings(index=self._name, **kwargs) + return self._get_connection(using).indices.get_settings( + index=self._name, **kwargs + ) def put_settings(self, using=None, **kwargs): """ @@ -472,7 +521,9 @@ def put_settings(self, using=None, **kwargs): Any additional keyword arguments will be passed to ``Elasticsearch.indices.put_settings`` unchanged. """ - return self._get_connection(using).indices.put_settings(index=self._name, **kwargs) + return self._get_connection(using).indices.put_settings( + index=self._name, **kwargs + ) def stats(self, using=None, **kwargs): """ @@ -500,7 +551,9 @@ def validate_query(self, using=None, **kwargs): Any additional keyword arguments will be passed to ``Elasticsearch.indices.validate_query`` unchanged. """ - return self._get_connection(using).indices.validate_query(index=self._name, **kwargs) + return self._get_connection(using).indices.validate_query( + index=self._name, **kwargs + ) def clear_cache(self, using=None, **kwargs): """ @@ -509,7 +562,9 @@ def clear_cache(self, using=None, **kwargs): Any additional keyword arguments will be passed to ``Elasticsearch.indices.clear_cache`` unchanged. """ - return self._get_connection(using).indices.clear_cache(index=self._name, **kwargs) + return self._get_connection(using).indices.clear_cache( + index=self._name, **kwargs + ) def recovery(self, using=None, **kwargs): """ @@ -537,7 +592,9 @@ def get_upgrade(self, using=None, **kwargs): Any additional keyword arguments will be passed to ``Elasticsearch.indices.get_upgrade`` unchanged. """ - return self._get_connection(using).indices.get_upgrade(index=self._name, **kwargs) + return self._get_connection(using).indices.get_upgrade( + index=self._name, **kwargs + ) def flush_synced(self, using=None, **kwargs): """ @@ -547,7 +604,9 @@ def flush_synced(self, using=None, **kwargs): Any additional keyword arguments will be passed to ``Elasticsearch.indices.flush_synced`` unchanged. """ - return self._get_connection(using).indices.flush_synced(index=self._name, **kwargs) + return self._get_connection(using).indices.flush_synced( + index=self._name, **kwargs + ) def shard_stores(self, using=None, **kwargs): """ @@ -559,7 +618,9 @@ def shard_stores(self, using=None, **kwargs): Any additional keyword arguments will be passed to ``Elasticsearch.indices.shard_stores`` unchanged. """ - return self._get_connection(using).indices.shard_stores(index=self._name, **kwargs) + return self._get_connection(using).indices.shard_stores( + index=self._name, **kwargs + ) def forcemerge(self, using=None, **kwargs): """ @@ -575,7 +636,9 @@ def forcemerge(self, using=None, **kwargs): Any additional keyword arguments will be passed to ``Elasticsearch.indices.forcemerge`` unchanged. """ - return self._get_connection(using).indices.forcemerge(index=self._name, **kwargs) + return self._get_connection(using).indices.forcemerge( + index=self._name, **kwargs + ) def shrink(self, using=None, **kwargs): """ diff --git a/elasticsearch_dsl/mapping.py b/elasticsearch_dsl/mapping.py index 15872ad52..b2dd33a1c 100644 --- a/elasticsearch_dsl/mapping.py +++ b/elasticsearch_dsl/mapping.py @@ -1,3 +1,20 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + try: import collections.abc as collections_abc # only works on python 3.3+ except ImportError: @@ -11,19 +28,28 @@ from .field import Nested, Text, construct_field from .utils import DslBase -META_FIELDS = frozenset(( - 'dynamic', 'transform', 'dynamic_date_formats', 'date_detection', - 'numeric_detection', 'dynamic_templates', 'enabled' -)) +META_FIELDS = frozenset( + ( + "dynamic", + "transform", + "dynamic_date_formats", + "date_detection", + "numeric_detection", + "dynamic_templates", + "enabled", + ) +) + class Properties(DslBase): - name = 'properties' - _param_defs = {'properties': {'type': 'field', 'hash': True}} + name = "properties" + _param_defs = {"properties": {"type": "field", "hash": True}} + def __init__(self): super(Properties, self).__init__() def __repr__(self): - return 'Properties()' + return "Properties()" def __getitem__(self, name): return self.properties[name] @@ -32,34 +58,34 @@ def __contains__(self, name): return name in self.properties def to_dict(self): - return super(Properties, self).to_dict()['properties'] + return super(Properties, self).to_dict()["properties"] def field(self, name, *args, **kwargs): self.properties[name] = construct_field(*args, **kwargs) return self def _collect_fields(self): - """ Iterate over all Field objects within, including multi fields. """ + """Iterate over all Field objects within, including multi fields.""" for f in itervalues(self.properties.to_dict()): yield f # multi fields - if hasattr(f, 'fields'): + if hasattr(f, "fields"): for inner_f in itervalues(f.fields.to_dict()): yield inner_f # nested and inner objects - if hasattr(f, '_collect_fields'): + if hasattr(f, "_collect_fields"): for inner_f in f._collect_fields(): yield inner_f def update(self, other_object): - if not hasattr(other_object, 'properties'): + if not hasattr(other_object, "properties"): # not an inner/nested object, no merge possible return our, other = self.properties, other_object.properties for name in other: if name in our: - if hasattr(our[name], 'update'): + if hasattr(our[name], "update"): our[name].update(other[name]) continue our[name] = other[name] @@ -71,7 +97,7 @@ def __init__(self): self._meta = {} def __repr__(self): - return 'Mapping()' + return "Mapping()" def _clone(self): m = Mapping() @@ -79,7 +105,7 @@ def _clone(self): return m @classmethod - def from_es(cls, index, using='default'): + def from_es(cls, index, using="default"): m = cls() m.update_from_es(index, using) return m @@ -87,19 +113,19 @@ def from_es(cls, index, using='default'): def resolve_nested(self, field_path): field = self nested = [] - parts = field_path.split('.') + parts = field_path.split(".") for i, step in enumerate(parts): try: field = field[step] except KeyError: return (), None if isinstance(field, Nested): - nested.append('.'.join(parts[:i+1])) + nested.append(".".join(parts[: i + 1])) return nested, field def resolve_field(self, field_path): field = self - for step in field_path.split('.'): + for step in field_path.split("."): try: field = field[step] except KeyError: @@ -109,11 +135,16 @@ def resolve_field(self, field_path): def _collect_analysis(self): analysis = {} fields = [] - if '_all' in self._meta: - fields.append(Text(**self._meta['_all'])) + if "_all" in self._meta: + fields.append(Text(**self._meta["_all"])) for f in chain(fields, self.properties._collect_fields()): - for analyzer_name in ('analyzer', 'normalizer', 'search_analyzer', 'search_quote_analyzer'): + for analyzer_name in ( + "analyzer", + "normalizer", + "search_analyzer", + "search_quote_analyzer", + ): if not hasattr(f, analyzer_name): continue analyzer = getattr(f, analyzer_name) @@ -129,25 +160,26 @@ def _collect_analysis(self): return analysis - def save(self, index, using='default'): + def save(self, index, using="default"): from .index import Index + index = Index(index, using=using) index.mapping(self) return index.save() - def update_from_es(self, index, using='default'): + def update_from_es(self, index, using="default"): es = get_connection(using) raw = es.indices.get_mapping(index=index) _, raw = raw.popitem() - self._update_from_dict(raw['mappings']) + self._update_from_dict(raw["mappings"]) def _update_from_dict(self, raw): - for name, definition in iteritems(raw.get('properties', {})): + for name, definition in iteritems(raw.get("properties", {})): self.field(name, definition) # metadata like _all etc for name, value in iteritems(raw): - if name != 'properties': + if name != "properties": if isinstance(value, collections_abc.Mapping): self.meta(name, **value) else: @@ -157,7 +189,7 @@ def update(self, mapping, update_only=False): for name in mapping: if update_only and name in self: # nested and inner objects, merge recursively - if hasattr(self[name], 'update'): + if hasattr(self[name], "update"): # FIXME only merge subfields, not the settings self[name].update(mapping[name], update_only) continue @@ -184,11 +216,11 @@ def field(self, *args, **kwargs): return self def meta(self, name, params=None, **kwargs): - if not name.startswith('_') and name not in META_FIELDS: - name = '_' + name + if not name.startswith("_") and name not in META_FIELDS: + name = "_" + name if params and kwargs: - raise ValueError('Meta configs cannot have both value and a dictionary.') + raise ValueError("Meta configs cannot have both value and a dictionary.") self._meta[name] = kwargs if params is None else params return self @@ -197,11 +229,11 @@ def to_dict(self): meta = self._meta # hard coded serialization of analyzers in _all - if '_all' in meta: + if "_all" in meta: meta = meta.copy() - _all = meta['_all'] = meta['_all'].copy() - for f in ('analyzer', 'search_analyzer', 'search_quote_analyzer'): - if hasattr(_all.get(f, None), 'to_dict'): + _all = meta["_all"] = meta["_all"].copy() + for f in ("analyzer", "search_analyzer", "search_quote_analyzer"): + if hasattr(_all.get(f, None), "to_dict"): _all[f] = _all[f].to_dict() meta.update(self.properties.to_dict()) return meta diff --git a/elasticsearch_dsl/query.py b/elasticsearch_dsl/query.py index ce52ea1cc..642ce2c88 100644 --- a/elasticsearch_dsl/query.py +++ b/elasticsearch_dsl/query.py @@ -1,3 +1,20 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + try: import collections.abc as collections_abc # only works on python 3.3+ except ImportError: @@ -5,43 +22,51 @@ from itertools import chain +# 'SF' looks unused but the test suite assumes it's available +# from this module so others are liable to do so as well. +from .function import SF # noqa: F401 +from .function import ScoreFunction from .utils import DslBase -from .function import SF, ScoreFunction -def Q(name_or_query='match_all', **params): +def Q(name_or_query="match_all", **params): # {"match": {"title": "python"}} if isinstance(name_or_query, collections_abc.Mapping): if params: - raise ValueError('Q() cannot accept parameters when passing in a dict.') + raise ValueError("Q() cannot accept parameters when passing in a dict.") if len(name_or_query) != 1: - raise ValueError('Q() can only accept dict with a single query ({"match": {...}}). ' - 'Instead it got (%r)' % name_or_query) + raise ValueError( + 'Q() can only accept dict with a single query ({"match": {...}}). ' + "Instead it got (%r)" % name_or_query + ) name, params = name_or_query.copy().popitem() return Query.get_dsl_class(name)(_expand__to_dot=False, **params) # MatchAll() if isinstance(name_or_query, Query): if params: - raise ValueError('Q() cannot accept parameters when passing in a Query object.') + raise ValueError( + "Q() cannot accept parameters when passing in a Query object." + ) return name_or_query # s.query = Q('filtered', query=s.query) - if hasattr(name_or_query, '_proxied'): + if hasattr(name_or_query, "_proxied"): return name_or_query._proxied # "match", title="python" return Query.get_dsl_class(name_or_query)(**params) + class Query(DslBase): - _type_name = 'query' + _type_name = "query" _type_shortcut = staticmethod(Q) name = None def __add__(self, other): # make sure we give queries that know how to combine themselves # preference - if hasattr(other, '__radd__'): + if hasattr(other, "__radd__"): return other.__radd__(self) return Bool(must=[self, other]) @@ -51,52 +76,62 @@ def __invert__(self): def __or__(self, other): # make sure we give queries that know how to combine themselves # preference - if hasattr(other, '__ror__'): + if hasattr(other, "__ror__"): return other.__ror__(self) return Bool(should=[self, other]) def __and__(self, other): # make sure we give queries that know how to combine themselves # preference - if hasattr(other, '__rand__'): + if hasattr(other, "__rand__"): return other.__rand__(self) return Bool(must=[self, other]) class MatchAll(Query): - name = 'match_all' + name = "match_all" + def __add__(self, other): return other._clone() + __and__ = __rand__ = __radd__ = __add__ def __or__(self, other): return self + __ror__ = __or__ def __invert__(self): return MatchNone() + + EMPTY_QUERY = MatchAll() + class MatchNone(Query): - name = 'match_none' + name = "match_none" + def __add__(self, other): return self + __and__ = __rand__ = __radd__ = __add__ def __or__(self, other): return other._clone() + __ror__ = __or__ def __invert__(self): return MatchAll() + class Bool(Query): - name = 'bool' + name = "bool" _param_defs = { - 'must': {'type': 'query', 'multi': True}, - 'should': {'type': 'query', 'multi': True}, - 'must_not': {'type': 'query', 'multi': True}, - 'filter': {'type': 'query', 'multi': True}, + "must": {"type": "query", "multi": True}, + "should": {"type": "query", "multi": True}, + "must_not": {"type": "query", "multi": True}, + "filter": {"type": "query", "multi": True}, } def __add__(self, other): @@ -109,27 +144,47 @@ def __add__(self, other): else: q.must.append(other) return q + __radd__ = __add__ def __or__(self, other): for q in (self, other): - if isinstance(q, Bool) and not any((q.must, q.must_not, q.filter, getattr(q, 'minimum_should_match', None))): + if isinstance(q, Bool) and not any( + (q.must, q.must_not, q.filter, getattr(q, "minimum_should_match", None)) + ): other = self if q is other else other q = q._clone() - if isinstance(other, Bool) and not any((other.must, other.must_not, other.filter, getattr(other, 'minimum_should_match', None))): + if isinstance(other, Bool) and not any( + ( + other.must, + other.must_not, + other.filter, + getattr(other, "minimum_should_match", None), + ) + ): q.should.extend(other.should) else: q.should.append(other) return q return Bool(should=[self, other]) + __ror__ = __or__ @property def _min_should_match(self): - return getattr(self, 'minimum_should_match', 0 if not self.should or (self.must or self.filter) else 1) + return getattr( + self, + "minimum_should_match", + 0 if not self.should or (self.must or self.filter) else 1, + ) def __invert__(self): + # Because an empty Bool query is treated like + # MatchAll the inverse should be MatchNone + if not any(chain(self.must, self.filter, self.should, self.must_not)): + return MatchNone() + negations = [] for q in chain(self.must, self.filter): negations.append(~q) @@ -153,8 +208,8 @@ def __and__(self, other): q.should = [] # reset minimum_should_match as it will get calculated below - if 'minimum_should_match' in q._params: - del q._params['minimum_should_match'] + if "minimum_should_match" in q._params: + del q._params["minimum_should_match"] for qx in (self, other): # TODO: percentages will fail here @@ -171,27 +226,31 @@ def __and__(self, other): q.should.extend(qx.should) # not all are required, add a should list to the must with proper min_should_match else: - q.must.append(Bool(should=qx.should, minimum_should_match=min_should_match)) + q.must.append( + Bool(should=qx.should, minimum_should_match=min_should_match) + ) else: if not (q.must or q.filter) and q.should: - q._params.setdefault('minimum_should_match', 1) + q._params.setdefault("minimum_should_match", 1) q.must.append(other) return q + __rand__ = __and__ + class FunctionScore(Query): - name = 'function_score' + name = "function_score" _param_defs = { - 'query': {'type': 'query'}, - 'filter': {'type': 'query'}, - 'functions': {'type': 'score_function', 'multi': True}, + "query": {"type": "query"}, + "filter": {"type": "query"}, + "functions": {"type": "score_function", "multi": True}, } def __init__(self, **kwargs): - if 'functions' in kwargs: + if "functions" in kwargs: pass else: - fns = kwargs['functions'] = [] + fns = kwargs["functions"] = [] for name in ScoreFunction._classes: if name in kwargs: fns.append({name: kwargs.pop(name)}) @@ -200,181 +259,258 @@ def __init__(self, **kwargs): # compound queries class Boosting(Query): - name = 'boosting' - _param_defs = {'positive': {'type': 'query'}, 'negative': {'type': 'query'}} + name = "boosting" + _param_defs = {"positive": {"type": "query"}, "negative": {"type": "query"}} + class ConstantScore(Query): - name = 'constant_score' - _param_defs = {'query': {'type': 'query'}, 'filter': {'type': 'query'}} + name = "constant_score" + _param_defs = {"query": {"type": "query"}, "filter": {"type": "query"}} + class DisMax(Query): - name = 'dis_max' - _param_defs = {'queries': {'type': 'query', 'multi': True}} + name = "dis_max" + _param_defs = {"queries": {"type": "query", "multi": True}} + class Filtered(Query): - name = 'filtered' - _param_defs = {'query': {'type': 'query'}, 'filter': {'type': 'query'}} + name = "filtered" + _param_defs = {"query": {"type": "query"}, "filter": {"type": "query"}} + class Indices(Query): - name = 'indices' - _param_defs = {'query': {'type': 'query'}, 'no_match_query': {'type': 'query'}} + name = "indices" + _param_defs = {"query": {"type": "query"}, "no_match_query": {"type": "query"}} + class Percolate(Query): - name = 'percolate' + name = "percolate" + # relationship queries class Nested(Query): - name = 'nested' - _param_defs = {'query': {'type': 'query'}} + name = "nested" + _param_defs = {"query": {"type": "query"}} + class HasChild(Query): - name = 'has_child' - _param_defs = {'query': {'type': 'query'}} + name = "has_child" + _param_defs = {"query": {"type": "query"}} + class HasParent(Query): - name = 'has_parent' - _param_defs = {'query': {'type': 'query'}} + name = "has_parent" + _param_defs = {"query": {"type": "query"}} + class TopChildren(Query): - name = 'top_children' - _param_defs = {'query': {'type': 'query'}} + name = "top_children" + _param_defs = {"query": {"type": "query"}} # compount span queries class SpanFirst(Query): - name = 'span_first' - _param_defs = {'match': {'type': 'query'}} + name = "span_first" + _param_defs = {"match": {"type": "query"}} + class SpanMulti(Query): - name = 'span_multi' - _param_defs = {'match': {'type': 'query'}} + name = "span_multi" + _param_defs = {"match": {"type": "query"}} + class SpanNear(Query): - name = 'span_near' - _param_defs = {'clauses': {'type': 'query', 'multi': True}} + name = "span_near" + _param_defs = {"clauses": {"type": "query", "multi": True}} + class SpanNot(Query): - name = 'span_not' - _param_defs = {'exclude': {'type': 'query'}, 'include': {'type': 'query'}} + name = "span_not" + _param_defs = {"exclude": {"type": "query"}, "include": {"type": "query"}} + class SpanOr(Query): - name = 'span_or' - _param_defs = {'clauses': {'type': 'query', 'multi': True}} + name = "span_or" + _param_defs = {"clauses": {"type": "query", "multi": True}} + class FieldMaskingSpan(Query): - name = 'field_masking_span' - _param_defs = {'query': {'type': 'query'}} + name = "field_masking_span" + _param_defs = {"query": {"type": "query"}} + + +class SpanContaining(Query): + name = "span_containing" + _param_defs = {"little": {"type": "query"}, "big": {"type": "query"}} + + +# Original implementation contained +# a typo: remove in v8.0. +SpanContainining = SpanContaining -class SpanContainining(Query): - name = 'span_containing' - _param_defs = {'little': {'type': 'query'}, 'big': {'type': 'query'}} class SpanWithin(Query): - name = 'span_within' - _param_defs = {'little': {'type': 'query'}, 'big': {'type': 'query'}} + name = "span_within" + _param_defs = {"little": {"type": "query"}, "big": {"type": "query"}} + # core queries class Common(Query): - name = 'common' + name = "common" + class Fuzzy(Query): - name = 'fuzzy' + name = "fuzzy" + class FuzzyLikeThis(Query): - name = 'fuzzy_like_this' + name = "fuzzy_like_this" + class FuzzyLikeThisField(Query): - name = 'fuzzy_like_this_field' + name = "fuzzy_like_this_field" + class RankFeature(Query): - name = 'rank_feature' + name = "rank_feature" + class DistanceFeature(Query): - name = 'distance_feature' + name = "distance_feature" + class GeoBoundingBox(Query): - name = 'geo_bounding_box' + name = "geo_bounding_box" + class GeoDistance(Query): - name = 'geo_distance' + name = "geo_distance" + class GeoDistanceRange(Query): - name = 'geo_distance_range' + name = "geo_distance_range" + class GeoPolygon(Query): - name = 'geo_polygon' + name = "geo_polygon" + class GeoShape(Query): - name = 'geo_shape' + name = "geo_shape" + class GeohashCell(Query): - name = 'geohash_cell' + name = "geohash_cell" + class Ids(Query): - name = 'ids' + name = "ids" + + +class Intervals(Query): + name = "intervals" + class Limit(Query): - name = 'limit' + name = "limit" + class Match(Query): - name = 'match' + name = "match" + class MatchPhrase(Query): - name = 'match_phrase' + name = "match_phrase" + class MatchPhrasePrefix(Query): - name = 'match_phrase_prefix' + name = "match_phrase_prefix" + + +class MatchBoolPrefix(Query): + name = "match_bool_prefix" + class Exists(Query): - name = 'exists' + name = "exists" + class MoreLikeThis(Query): - name = 'more_like_this' + name = "more_like_this" + class MoreLikeThisField(Query): - name = 'more_like_this_field' + name = "more_like_this_field" + class MultiMatch(Query): - name = 'multi_match' + name = "multi_match" + class Prefix(Query): - name = 'prefix' + name = "prefix" + class QueryString(Query): - name = 'query_string' + name = "query_string" + class Range(Query): - name = 'range' + name = "range" + class Regexp(Query): - name = 'regexp' + name = "regexp" + + +class Shape(Query): + name = "shape" + class SimpleQueryString(Query): - name = 'simple_query_string' + name = "simple_query_string" + class SpanTerm(Query): - name = 'span_term' + name = "span_term" + class Template(Query): - name = 'template' + name = "template" + class Term(Query): - name = 'term' + name = "term" + class Terms(Query): - name = 'terms' + name = "terms" + class TermsSet(Query): - name = 'terms_set' + name = "terms_set" + class Wildcard(Query): - name = 'wildcard' + name = "wildcard" + class Script(Query): - name = 'script' + name = "script" + + +class ScriptScore(Query): + name = "script_score" + _param_defs = {"query": {"type": "query"}} + class Type(Query): - name = 'type' + name = "type" + class ParentId(Query): - name = 'parent_id' + name = "parent_id" + + +class Wrapper(Query): + name = "wrapper" diff --git a/elasticsearch_dsl/response/__init__.py b/elasticsearch_dsl/response/__init__.py index 7f04fce00..022ab158b 100644 --- a/elasticsearch_dsl/response/__init__.py +++ b/elasticsearch_dsl/response/__init__.py @@ -1,11 +1,30 @@ -from ..utils import AttrDict, AttrList, _wrap +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from ..utils import AttrDict, AttrList, _wrap from .hit import Hit, HitMeta +__all__ = ["Response", "AggResponse", "UpdateByQueryResponse", "Hit", "HitMeta"] + + class Response(AttrDict): def __init__(self, search, response, doc_class=None): - super(AttrDict, self).__setattr__('_search', search) - super(AttrDict, self).__setattr__('_doc_class', doc_class) + super(AttrDict, self).__setattr__("_search", search) + super(AttrDict, self).__setattr__("_doc_class", doc_class) super(Response, self).__init__(response) def __iter__(self): @@ -19,10 +38,11 @@ def __getitem__(self, key): def __nonzero__(self): return bool(self.hits) + __bool__ = __nonzero__ def __repr__(self): - return '' % (self.hits or self.aggregations) + return "" % (self.hits or self.aggregations) def __len__(self): return len(self.hits) @@ -31,26 +51,26 @@ def __getstate__(self): return self._d_, self._search, self._doc_class def __setstate__(self, state): - super(AttrDict, self).__setattr__('_d_', state[0]) - super(AttrDict, self).__setattr__('_search', state[1]) - super(AttrDict, self).__setattr__('_doc_class', state[2]) + super(AttrDict, self).__setattr__("_d_", state[0]) + super(AttrDict, self).__setattr__("_search", state[1]) + super(AttrDict, self).__setattr__("_doc_class", state[2]) def success(self): return self._shards.total == self._shards.successful and not self.timed_out @property def hits(self): - if not hasattr(self, '_hits'): - h = self._d_['hits'] + if not hasattr(self, "_hits"): + h = self._d_["hits"] try: - hits = AttrList(map(self._search._get_result, h['hits'])) + hits = AttrList(map(self._search._get_result, h["hits"])) except AttributeError as e: # avoid raising AttributeError since it will be hidden by the property raise TypeError("Could not parse hits.", e) # avoid assigning _hits into self._d_ - super(AttrDict, self).__setattr__('_hits', hits) + super(AttrDict, self).__setattr__("_hits", hits) for k in h: setattr(self._hits, k, _wrap(h[k])) return self._hits @@ -61,33 +81,38 @@ def aggregations(self): @property def aggs(self): - if not hasattr(self, '_aggs'): - aggs = AggResponse(self._search.aggs, self._search, self._d_.get('aggregations', {})) + if not hasattr(self, "_aggs"): + aggs = AggResponse( + self._search.aggs, self._search, self._d_.get("aggregations", {}) + ) # avoid assigning _aggs into self._d_ - super(AttrDict, self).__setattr__('_aggs', aggs) + super(AttrDict, self).__setattr__("_aggs", aggs) return self._aggs + class AggResponse(AttrDict): def __init__(self, aggs, search, data): - super(AttrDict, self).__setattr__('_meta', {'search': search, 'aggs': aggs}) + super(AttrDict, self).__setattr__("_meta", {"search": search, "aggs": aggs}) super(AggResponse, self).__init__(data) def __getitem__(self, attr_name): - if attr_name in self._meta['aggs']: + if attr_name in self._meta["aggs"]: # don't do self._meta['aggs'][attr_name] to avoid copying - agg = self._meta['aggs'].aggs[attr_name] - return agg.result(self._meta['search'], self._d_[attr_name]) + agg = self._meta["aggs"].aggs[attr_name] + return agg.result(self._meta["search"], self._d_[attr_name]) return super(AggResponse, self).__getitem__(attr_name) def __iter__(self): - for name in self._meta['aggs']: + for name in self._meta["aggs"]: yield self[name] class UpdateByQueryResponse(AttrDict): - def __init__(self, search, response, doc_class=None): - super(AttrDict, self).__setattr__('_search', search) - super(AttrDict, self).__setattr__('_doc_class', doc_class) + super(AttrDict, self).__setattr__("_search", search) + super(AttrDict, self).__setattr__("_doc_class", doc_class) super(UpdateByQueryResponse, self).__init__(response) + + def success(self): + return not self.timed_out and not self.failures diff --git a/elasticsearch_dsl/response/aggs.py b/elasticsearch_dsl/response/aggs.py index 5df16cee3..c708549f0 100644 --- a/elasticsearch_dsl/response/aggs.py +++ b/elasticsearch_dsl/response/aggs.py @@ -1,21 +1,46 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from ..utils import AttrDict, AttrList -from . import Response, AggResponse +from . import AggResponse, Response + class Bucket(AggResponse): def __init__(self, aggs, search, data, field=None): super(Bucket, self).__init__(aggs, search, data) + class FieldBucket(Bucket): def __init__(self, aggs, search, data, field=None): if field: - data['key'] = field.deserialize(data['key']) + data["key"] = field.deserialize(data["key"]) super(FieldBucket, self).__init__(aggs, search, data, field) + class BucketData(AggResponse): _bucket_class = Bucket + def _wrap_bucket(self, data): - return self._bucket_class(self._meta['aggs'], self._meta['search'], - data, field=self._meta.get('field')) + return self._bucket_class( + self._meta["aggs"], + self._meta["search"], + data, + field=self._meta.get("field"), + ) def __iter__(self): return iter(self.buckets) @@ -30,22 +55,26 @@ def __getitem__(self, key): @property def buckets(self): - if not hasattr(self, '_buckets'): - field = getattr(self._meta['aggs'], 'field', None) + if not hasattr(self, "_buckets"): + field = getattr(self._meta["aggs"], "field", None) if field: - self._meta['field'] = self._meta['search']._resolve_field(field) - bs = self._d_['buckets'] + self._meta["field"] = self._meta["search"]._resolve_field(field) + bs = self._d_["buckets"] if isinstance(bs, list): bs = AttrList(bs, obj_wrapper=self._wrap_bucket) else: bs = AttrDict({k: self._wrap_bucket(bs[k]) for k in bs}) - super(AttrDict, self).__setattr__('_buckets', bs) + super(AttrDict, self).__setattr__("_buckets", bs) return self._buckets + class FieldBucketData(BucketData): _bucket_class = FieldBucket + class TopHitsData(Response): def __init__(self, agg, search, data): - super(AttrDict, self).__setattr__('meta', AttrDict({'agg': agg, 'search': search})) + super(AttrDict, self).__setattr__( + "meta", AttrDict({"agg": agg, "search": search}) + ) super(TopHitsData, self).__init__(search, data) diff --git a/elasticsearch_dsl/response/hit.py b/elasticsearch_dsl/response/hit.py index 15d289f32..ffbf40aae 100644 --- a/elasticsearch_dsl/response/hit.py +++ b/elasticsearch_dsl/response/hit.py @@ -1,34 +1,51 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from ..utils import AttrDict, HitMeta + class Hit(AttrDict): def __init__(self, document): data = {} - if '_source' in document: - data = document['_source'] - if 'fields' in document: - data.update(document['fields']) + if "_source" in document: + data = document["_source"] + if "fields" in document: + data.update(document["fields"]) super(Hit, self).__init__(data) # assign meta as attribute and not as key in self._d_ - super(AttrDict, self).__setattr__('meta', HitMeta(document)) + super(AttrDict, self).__setattr__("meta", HitMeta(document)) def __getstate__(self): # add self.meta since it is not in self.__dict__ - return super(Hit, self).__getstate__() + (self.meta, ) + return super(Hit, self).__getstate__() + (self.meta,) def __setstate__(self, state): - super(AttrDict, self).__setattr__('meta', state[-1]) + super(AttrDict, self).__setattr__("meta", state[-1]) super(Hit, self).__setstate__(state[:-1]) def __dir__(self): # be sure to expose meta in dir(self) - return super(Hit, self).__dir__() + ['meta'] + return super(Hit, self).__dir__() + ["meta"] def __repr__(self): - return ''.format( - '/'.join( - getattr(self.meta, key) - for key in ('index', 'id') - if key in self.meta), - super(Hit, self).__repr__() + return "".format( + "/".join( + getattr(self.meta, key) for key in ("index", "id") if key in self.meta + ), + super(Hit, self).__repr__(), ) diff --git a/elasticsearch_dsl/search.py b/elasticsearch_dsl/search.py index 5b0196c1d..47e1916ad 100644 --- a/elasticsearch_dsl/search.py +++ b/elasticsearch_dsl/search.py @@ -1,3 +1,20 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + import copy try: @@ -5,17 +22,16 @@ except ImportError: import collections as collections_abc -from six import iteritems, string_types - -from elasticsearch.helpers import scan from elasticsearch.exceptions import TransportError +from elasticsearch.helpers import scan +from six import iteritems, string_types -from .query import Q, Bool from .aggs import A, AggBase -from .utils import DslBase, AttrDict -from .response import Response, Hit -from .connections import get_connection +from .connections import CLIENT_HAS_NAMED_BODY_PARAMS, get_connection from .exceptions import IllegalOperation +from .query import Bool, Q +from .response import Hit, Response +from .utils import AttrDict, DslBase, recursive_to_dict class QueryProxy(object): @@ -24,6 +40,7 @@ class QueryProxy(object): (to add query/post_filter) and also allows attribute access which is proxied to the wrapped query. """ + def __init__(self, search, attr_name): self._search = search self._proxied = None @@ -31,6 +48,7 @@ def __init__(self, search, attr_name): def __nonzero__(self): return self._proxied is not None + __bool__ = __nonzero__ def __call__(self, *args, **kwargs): @@ -51,7 +69,7 @@ def __getattr__(self, attr_name): return getattr(self._proxied, attr_name) def __setattr__(self, attr_name, value): - if not attr_name.startswith('_'): + if not attr_name.startswith("_"): self._proxied = Q(self._proxied.to_dict()) setattr(self._proxied, attr_name, value) super(QueryProxy, self).__setattr__(attr_name, value) @@ -71,8 +89,9 @@ class ProxyDescriptor(object): s.query = Q(...) """ + def __init__(self, name): - self._attr_name = '_%s_proxy' % name + self._attr_name = "_%s_proxy" % name def __get__(self, instance, owner): return getattr(instance, self._attr_name) @@ -83,18 +102,19 @@ def __set__(self, instance, value): class AggsProxy(AggBase, DslBase): - name = 'aggs' + name = "aggs" + def __init__(self, search): self._base = self self._search = search - self._params = {'aggs': {}} + self._params = {"aggs": {}} def to_dict(self): - return super(AggsProxy, self).to_dict().get('aggs', {}) + return super(AggsProxy, self).to_dict().get("aggs", {}) class Request(object): - def __init__(self, using='default', index=None, doc_type=None, extra=None): + def __init__(self, using="default", index=None, doc_type=None, extra=None): self._using = using self._index = None @@ -118,11 +138,11 @@ def __init__(self, using='default', index=None, doc_type=None, extra=None): def __eq__(self, other): return ( - isinstance(other, Request) and - other._params == self._params and - other._index == self._index and - other._doc_type == self._doc_type and - other.to_dict() == self.to_dict() + isinstance(other, Request) + and other._params == self._params + and other._index == self._index + and other._doc_type == self._doc_type + and other.to_dict() == self.to_dict() ) def __copy__(self): @@ -174,7 +194,7 @@ def index(self, *index): def _resolve_field(self, path): for dt in self._doc_type: - if not hasattr(dt, '_index'): + if not hasattr(dt, "_index"): continue field = dt._index.resolve_field(path) if field is not None: @@ -184,13 +204,13 @@ def _resolve_nested(self, hit, parent_class=None): doc_class = Hit nested_path = [] - nesting = hit['_nested'] - while nesting and 'field' in nesting: - nested_path.append(nesting['field']) - nesting = nesting.get('_nested') - nested_path = '.'.join(nested_path) + nesting = hit["_nested"] + while nesting and "field" in nesting: + nested_path.append(nesting["field"]) + nesting = nesting.get("_nested") + nested_path = ".".join(nested_path) - if hasattr(parent_class, '_index'): + if hasattr(parent_class, "_index"): nested_field = parent_class._index.resolve_field(nested_path) else: nested_field = self._resolve_field(nested_path) @@ -202,9 +222,9 @@ def _resolve_nested(self, hit, parent_class=None): def _get_result(self, hit, parent_class=None): doc_class = Hit - dt = hit.get('_type') + dt = hit.get("_type") - if '_nested' in hit: + if "_nested" in hit: doc_class = self._resolve_nested(hit, parent_class) elif dt in self._doc_type_map: @@ -212,14 +232,16 @@ def _get_result(self, hit, parent_class=None): else: for doc_type in self._doc_type: - if hasattr(doc_type, '_matches') and doc_type._matches(hit): + if hasattr(doc_type, "_matches") and doc_type._matches(hit): doc_class = doc_type break - for t in hit.get('inner_hits', ()): - hit['inner_hits'][t] = Response(self, hit['inner_hits'][t], doc_class=doc_class) + for t in hit.get("inner_hits", ()): + hit["inner_hits"][t] = Response( + self, hit["inner_hits"][t], doc_class=doc_class + ) - callback = getattr(doc_class, 'from_es', doc_class) + callback = getattr(doc_class, "from_es", doc_class) return callback(hit) def doc_type(self, *doc_type, **kwargs): @@ -267,14 +289,15 @@ def extra(self, **kwargs): compatibility. """ s = self._clone() - if 'from_' in kwargs: - kwargs['from'] = kwargs.pop('from_') + if "from_" in kwargs: + kwargs["from"] = kwargs.pop("from_") s._extra.update(kwargs) return s def _clone(self): - s = self.__class__(using=self._using, index=self._index, - doc_type=self._doc_type) + s = self.__class__( + using=self._using, index=self._index, doc_type=self._doc_type + ) s._doc_type_map = self._doc_type_map.copy() s._extra = self._extra.copy() s._params = self._params.copy() @@ -282,8 +305,8 @@ def _clone(self): class Search(Request): - query = ProxyDescriptor('query') - post_filter = ProxyDescriptor('post_filter') + query = ProxyDescriptor("query") + post_filter = ProxyDescriptor("post_filter") def __init__(self, **kwargs): """ @@ -307,8 +330,8 @@ def __init__(self, **kwargs): self._script_fields = {} self._response_class = Response - self._query_proxy = QueryProxy(self, 'query') - self._post_filter_proxy = QueryProxy(self, 'post_filter') + self._query_proxy = QueryProxy(self, "query") + self._post_filter_proxy = QueryProxy(self, "post_filter") def filter(self, *args, **kwargs): return self.query(Bool(filter=[Q(*args, **kwargs)])) @@ -343,15 +366,17 @@ def __getitem__(self, n): raise ValueError("Search does not support negative slicing.") # Elasticsearch won't get all results so we default to size: 10 if # stop not given. - s._extra['from'] = n.start or 0 - s._extra['size'] = n.stop - (n.start or 0) if n.stop is not None else 10 + s._extra["from"] = n.start or 0 + s._extra["size"] = max( + 0, n.stop - (n.start or 0) if n.stop is not None else 10 + ) return s else: # This is an index lookup, equivalent to slicing by [n:n+1]. # If negative index, abort. if n < 0: raise ValueError("Search does not support negative indexing.") - s._extra['from'] = n - s._extra['size'] = 1 + s._extra["from"] = n + s._extra["size"] = 1 return s @classmethod @@ -386,18 +411,17 @@ def _clone(self): s._response_class = self._response_class s._sort = self._sort[:] - s._source = copy.copy(self._source) \ - if self._source is not None else None + s._source = copy.copy(self._source) if self._source is not None else None s._highlight = self._highlight.copy() s._highlight_opts = self._highlight_opts.copy() s._suggest = self._suggest.copy() s._script_fields = self._script_fields.copy() - for x in ('query', 'post_filter'): + for x in ("query", "post_filter"): getattr(s, x)._proxied = getattr(self, x)._proxied # copy top-level bucket definitions - if self.aggs._params.get('aggs'): - s.aggs._params = {'aggs': self.aggs._params['aggs'].copy()} + if self.aggs._params.get("aggs"): + s.aggs._params = {"aggs": self.aggs._params["aggs"].copy()} return s def response_class(self, cls): @@ -414,33 +438,32 @@ def update_from_dict(self, d): the object in-place. Used mostly by ``from_dict``. """ d = d.copy() - if 'query' in d: - self.query._proxied = Q(d.pop('query')) - if 'post_filter' in d: - self.post_filter._proxied = Q(d.pop('post_filter')) + if "query" in d: + self.query._proxied = Q(d.pop("query")) + if "post_filter" in d: + self.post_filter._proxied = Q(d.pop("post_filter")) - aggs = d.pop('aggs', d.pop('aggregations', {})) + aggs = d.pop("aggs", d.pop("aggregations", {})) if aggs: self.aggs._params = { - 'aggs': { - name: A(value) for (name, value) in iteritems(aggs)} + "aggs": {name: A(value) for (name, value) in iteritems(aggs)} } - if 'sort' in d: - self._sort = d.pop('sort') - if '_source' in d: - self._source = d.pop('_source') - if 'highlight' in d: - high = d.pop('highlight').copy() - self._highlight = high.pop('fields') + if "sort" in d: + self._sort = d.pop("sort") + if "_source" in d: + self._source = d.pop("_source") + if "highlight" in d: + high = d.pop("highlight").copy() + self._highlight = high.pop("fields") self._highlight_opts = high - if 'suggest' in d: - self._suggest = d.pop('suggest') - if 'text' in self._suggest: - text = self._suggest.pop('text') + if "suggest" in d: + self._suggest = d.pop("suggest") + if "text" in self._suggest: + text = self._suggest.pop("text") for s in self._suggest.values(): - s.setdefault('text', text) - if 'script_fields' in d: - self._script_fields = d.pop('script_fields') + s.setdefault("text", text) + if "script_fields" in d: + self._script_fields = d.pop("script_fields") self._extra.update(d) return self @@ -457,7 +480,8 @@ def script_fields(self, **kwargs): s = s.script_fields( times_three={ 'script': { - 'inline': "doc['field'].value * params.n", + 'lang': 'painless', + 'source': "doc['field'].value * params.n", 'params': {'n': 3} } } @@ -467,7 +491,7 @@ def script_fields(self, **kwargs): s = self._clone() for name in kwargs: if isinstance(kwargs[name], string_types): - kwargs[name] = {'script': kwargs[name]} + kwargs[name] = {"script": kwargs[name]} s._script_fields.update(kwargs) return s @@ -542,9 +566,9 @@ def sort(self, *keys): s = self._clone() s._sort = [] for k in keys: - if isinstance(k, string_types) and k.startswith('-'): - if k[1:] == '_score': - raise IllegalOperation('Sorting by `-_score` is not allowed.') + if isinstance(k, string_types) and k.startswith("-"): + if k[1:] == "_score": + raise IllegalOperation("Sorting by `-_score` is not allowed.") k = {k[1:]: {"order": "desc"}} s._sort.append(k) return s @@ -579,7 +603,8 @@ def highlight(self, *fields, **kwargs): } } - If you want to have different options for different fields you can call ``highlight`` twice:: + If you want to have different options for different fields + you can call ``highlight`` twice:: Search().highlight('title', fragment_size=50).highlight('body', fragment_size=100) @@ -613,7 +638,7 @@ def suggest(self, name, text, **kwargs): s = s.suggest('suggestion-1', 'Elasticsearch', term={'field': 'body'}) """ s = self._clone() - s._suggest[name] = {'text': text} + s._suggest[name] = {"text": text} s._suggest[name].update(kwargs) return s @@ -635,30 +660,30 @@ def to_dict(self, count=False, **kwargs): # count request doesn't care for sorting and other things if not count: if self.post_filter: - d['post_filter'] = self.post_filter.to_dict() + d["post_filter"] = self.post_filter.to_dict() if self.aggs.aggs: d.update(self.aggs.to_dict()) if self._sort: - d['sort'] = self._sort + d["sort"] = self._sort - d.update(self._extra) + d.update(recursive_to_dict(self._extra)) if self._source not in (None, {}): - d['_source'] = self._source + d["_source"] = self._source if self._highlight: - d['highlight'] = {'fields': self._highlight} - d['highlight'].update(self._highlight_opts) + d["highlight"] = {"fields": self._highlight} + d["highlight"].update(self._highlight_opts) if self._suggest: - d['suggest'] = self._suggest + d["suggest"] = self._suggest if self._script_fields: - d['script_fields'] = self._script_fields + d["script_fields"] = self._script_fields - d.update(kwargs) + d.update(recursive_to_dict(kwargs)) return d def count(self): @@ -666,18 +691,14 @@ def count(self): Return the number of hits matching the query and filters. Note that only the actual number is returned. """ - if hasattr(self, '_response') and self._response.hits.total.relation == 'eq': + if hasattr(self, "_response") and self._response.hits.total.relation == "eq": return self._response.hits.total.value es = get_connection(self._using) d = self.to_dict(count=True) # TODO: failed shards detection - return es.count( - index=self._index, - body=d, - **self._params - )['count'] + return es.count(index=self._index, body=d, **self._params)["count"] def execute(self, ignore_cache=False): """ @@ -687,16 +708,19 @@ def execute(self, ignore_cache=False): :arg ignore_cache: if set to ``True``, consecutive calls will hit ES, while cached result will be ignored. Defaults to `False` """ - if ignore_cache or not hasattr(self, '_response'): + if ignore_cache or not hasattr(self, "_response"): es = get_connection(self._using) + if CLIENT_HAS_NAMED_BODY_PARAMS: + params = self.to_dict() + if "from" in params: + params["from_"] = params.pop("from") + else: + params = {"body": self.to_dict()} + params.update(self._params) + self._response = self._response_class( - self, - es.search( - index=self._index, - body=self.to_dict(), - **self._params - ) + self, es.search(index=self._index, **params) ) return self._response @@ -712,12 +736,7 @@ def scan(self): """ es = get_connection(self._using) - for hit in scan( - es, - query=self.to_dict(), - index=self._index, - **self._params - ): + for hit in scan(es, query=self.to_dict(), index=self._index, **self._params): yield self._get_result(hit) def delete(self): @@ -728,11 +747,7 @@ def delete(self): es = get_connection(self._using) return AttrDict( - es.delete_by_query( - index=self._index, - body=self.to_dict(), - **self._params - ) + es.delete_by_query(index=self._index, body=self.to_dict(), **self._params) ) @@ -741,6 +756,7 @@ class MultiSearch(Request): Combine multiple :class:`~elasticsearch_dsl.Search` objects into a single request. """ + def __init__(self, **kwargs): super(MultiSearch, self).__init__(**kwargs) self._searches = [] @@ -773,7 +789,7 @@ def to_dict(self): for s in self._searches: meta = {} if s._index: - meta['index'] = s._index + meta["index"] = s._index meta.update(s._params) out.append(meta) @@ -785,20 +801,18 @@ def execute(self, ignore_cache=False, raise_on_error=True): """ Execute the multi search request and return a list of search results. """ - if ignore_cache or not hasattr(self, '_response'): + if ignore_cache or not hasattr(self, "_response"): es = get_connection(self._using) responses = es.msearch( - index=self._index, - body=self.to_dict(), - **self._params + index=self._index, body=self.to_dict(), **self._params ) out = [] - for s, r in zip(self._searches, responses['responses']): - if r.get('error', False): + for s, r in zip(self._searches, responses["responses"]): + if r.get("error", False): if raise_on_error: - raise TransportError('N/A', r['error']['type'], r['error']) + raise TransportError("N/A", r["error"]["type"], r["error"]) r = None else: r = Response(s, r) diff --git a/elasticsearch_dsl/serializer.py b/elasticsearch_dsl/serializer.py index ed0956bed..1f9832251 100644 --- a/elasticsearch_dsl/serializer.py +++ b/elasticsearch_dsl/serializer.py @@ -1,13 +1,32 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from elasticsearch.serializer import JSONSerializer from .utils import AttrList + class AttrJSONSerializer(JSONSerializer): def default(self, data): if isinstance(data, AttrList): return data._l_ - if hasattr(data, 'to_dict'): + if hasattr(data, "to_dict"): return data.to_dict() return super(AttrJSONSerializer, self).default(data) + serializer = AttrJSONSerializer() diff --git a/elasticsearch_dsl/update_by_query.py b/elasticsearch_dsl/update_by_query.py index 01ac97652..36e2c698b 100644 --- a/elasticsearch_dsl/update_by_query.py +++ b/elasticsearch_dsl/update_by_query.py @@ -1,12 +1,30 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from .connections import get_connection from .query import Bool, Q from .response import UpdateByQueryResponse from .search import ProxyDescriptor, QueryProxy, Request +from .utils import recursive_to_dict class UpdateByQuery(Request): - query = ProxyDescriptor('query') + query = ProxyDescriptor("query") def __init__(self, **kwargs): """ @@ -17,13 +35,13 @@ def __init__(self, **kwargs): :arg doc_type: only query this type. All the parameters supplied (or omitted) at creation type can be later - overriden by methods (`using`, `index` and `doc_type` respectively). + overridden by methods (`using`, `index` and `doc_type` respectively). """ super(UpdateByQuery, self).__init__(**kwargs) self._response_class = UpdateByQueryResponse self._script = {} - self._query_proxy = QueryProxy(self, 'query') + self._query_proxy = QueryProxy(self, "query") def filter(self, *args, **kwargs): return self.query(Bool(filter=[Q(*args, **kwargs)])) @@ -80,10 +98,10 @@ def update_from_dict(self, d): the object in-place. Used mostly by ``from_dict``. """ d = d.copy() - if 'query' in d: - self.query._proxied = Q(d.pop('query')) - if 'script' in d: - self._script = d.pop('script') + if "query" in d: + self.query._proxied = Q(d.pop("query")) + if "script" in d: + self._script = d.pop("script") self._extra.update(d) return self @@ -93,7 +111,8 @@ def script(self, **kwargs): https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-scripting-using.html for more details. - Note: the API only accepts a single script, so calling the script multiple times will overwrite. + Note: the API only accepts a single script, so + calling the script multiple times will overwrite. Example:: @@ -121,11 +140,10 @@ def to_dict(self, **kwargs): d["query"] = self.query.to_dict() if self._script: - d['script'] = self._script - - d.update(self._extra) + d["script"] = self._script - d.update(kwargs) + d.update(recursive_to_dict(self._extra)) + d.update(recursive_to_dict(kwargs)) return d def execute(self): @@ -137,10 +155,6 @@ def execute(self): self._response = self._response_class( self, - es.update_by_query( - index=self._index, - body=self.to_dict(), - **self._params - ) + es.update_by_query(index=self._index, body=self.to_dict(), **self._params), ) return self._response diff --git a/elasticsearch_dsl/utils.py b/elasticsearch_dsl/utils.py index 2352a560e..1f28b29fd 100644 --- a/elasticsearch_dsl/utils.py +++ b/elasticsearch_dsl/utils.py @@ -1,3 +1,20 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from __future__ import unicode_literals try: @@ -7,22 +24,33 @@ from copy import copy -from six import iteritems, add_metaclass +from six import add_metaclass, iteritems from six.moves import map from .exceptions import UnknownDslObject, ValidationException -SKIP_VALUES = ('', None) +SKIP_VALUES = ("", None) EXPAND__TO_DOT = True -DOC_META_FIELDS = frozenset(( - 'id', 'routing', -)) +DOC_META_FIELDS = frozenset( + ( + "id", + "routing", + ) +) + +META_FIELDS = frozenset( + ( + # Elasticsearch metadata fields, except 'type' + "index", + "using", + "score", + "version", + "seq_no", + "primary_term", + ) +).union(DOC_META_FIELDS) -META_FIELDS = frozenset(( - # Elasticsearch metadata fields, except 'type' - 'index', 'using', 'score', 'version', 'seq_no', 'primary_term' -)).union(DOC_META_FIELDS) def _wrap(val, obj_wrapper=None): if isinstance(val, collections_abc.Mapping): @@ -31,6 +59,7 @@ def _wrap(val, obj_wrapper=None): return AttrList(val) return val + class AttrList(object): def __init__(self, l, obj_wrapper=None): # make iterables into lists @@ -68,6 +97,7 @@ def __len__(self): def __nonzero__(self): return bool(self._l_) + __bool__ = __nonzero__ def __getattr__(self, name): @@ -86,15 +116,17 @@ class AttrDict(object): dictionaries. Used to provide a convenient way to access both results and nested dsl dicts. """ + def __init__(self, d): # assign the inner dict manually to prevent __setattr__ from firing - super(AttrDict, self).__setattr__('_d_', d) + super(AttrDict, self).__setattr__("_d_", d) def __contains__(self, key): return key in self._d_ def __nonzero__(self): return bool(self._d_) + __bool__ = __nonzero__ def __dir__(self): @@ -113,28 +145,34 @@ def __ne__(self, other): def __repr__(self): r = repr(self._d_) if len(r) > 60: - r = r[:60] + '...}' + r = r[:60] + "...}" return r def __getstate__(self): - return self._d_, + return (self._d_,) def __setstate__(self, state): - super(AttrDict, self).__setattr__('_d_', state[0]) + super(AttrDict, self).__setattr__("_d_", state[0]) def __getattr__(self, attr_name): try: return self.__getitem__(attr_name) except KeyError: raise AttributeError( - '{!r} object has no attribute {!r}'.format(self.__class__.__name__, attr_name)) + "{!r} object has no attribute {!r}".format( + self.__class__.__name__, attr_name + ) + ) def __delattr__(self, attr_name): try: del self._d_[attr_name] except KeyError: raise AttributeError( - '{!r} object has no attribute {!r}'.format(self.__class__.__name__, attr_name)) + "{!r} object has no attribute {!r}".format( + self.__class__.__name__, attr_name + ) + ) def __getitem__(self, key): return _wrap(self._d_[key]) @@ -171,17 +209,19 @@ class DslMeta(type): For typical use see `QueryMeta` and `Query` in `elasticsearch_dsl.query`. """ + _types = {} + def __init__(cls, name, bases, attrs): super(DslMeta, cls).__init__(name, bases, attrs) # skip for DslBase - if not hasattr(cls, '_type_shortcut'): + if not hasattr(cls, "_type_shortcut"): return if cls.name is None: # abstract base class, register it's shortcut cls._types[cls._type_name] = cls._type_shortcut # and create a registry for subclasses - if not hasattr(cls, '_classes'): + if not hasattr(cls, "_classes"): cls._classes = {} elif cls.name not in cls._classes: # normal class, register it @@ -192,7 +232,7 @@ def get_dsl_type(cls, name): try: return cls._types[name] except KeyError: - raise UnknownDslObject('DSL type %s does not exist.' % name) + raise UnknownDslObject("DSL type %s does not exist." % name) @add_metaclass(DslMeta) @@ -211,6 +251,7 @@ class DslBase(object): attributes based on the `_param_defs` definition (for example turning all values in the `must` attribute into Query objects) """ + _param_defs = {} @classmethod @@ -220,29 +261,28 @@ def get_dsl_class(cls, name, default=None): except KeyError: if default is not None: return cls._classes[default] - raise UnknownDslObject('DSL class `{}` does not exist in {}.'.format(name, cls._type_name)) + raise UnknownDslObject( + "DSL class `{}` does not exist in {}.".format(name, cls._type_name) + ) def __init__(self, _expand__to_dot=EXPAND__TO_DOT, **params): self._params = {} for pname, pvalue in iteritems(params): - if '__' in pname and _expand__to_dot: - pname = pname.replace('__', '.') + if "__" in pname and _expand__to_dot: + pname = pname.replace("__", ".") self._setattr(pname, pvalue) def _repr_params(self): - """ Produce a repr of all our parameters to be used in __repr__. """ - return ', '.join( - '{}={!r}'.format(n.replace('.', '__'), v) + """Produce a repr of all our parameters to be used in __repr__.""" + return ", ".join( + "{}={!r}".format(n.replace(".", "__"), v) for (n, v) in sorted(iteritems(self._params)) # make sure we don't include empty typed params - if 'type' not in self._param_defs.get(n, {}) or v + if "type" not in self._param_defs.get(n, {}) or v ) def __repr__(self): - return '{}({})'.format( - self.__class__.__name__, - self._repr_params() - ) + return "{}({})".format(self.__class__.__name__, self._repr_params()) def __eq__(self, other): return isinstance(other, self.__class__) and other.to_dict() == self.to_dict() @@ -251,7 +291,7 @@ def __ne__(self, other): return not self == other def __setattr__(self, name, value): - if name.startswith('_'): + if name.startswith("_"): return super(DslBase, self).__setattr__(name, value) return self._setattr(name, value) @@ -260,22 +300,24 @@ def _setattr(self, name, value): if name in self._param_defs: pinfo = self._param_defs[name] - if 'type' in pinfo: + if "type" in pinfo: # get the shortcut used to construct this type (query.Q, aggs.A, etc) - shortcut = self.__class__.get_dsl_type(pinfo['type']) + shortcut = self.__class__.get_dsl_type(pinfo["type"]) # list of dict(name -> DslBase) - if pinfo.get('multi') and pinfo.get('hash'): + if pinfo.get("multi") and pinfo.get("hash"): if not isinstance(value, (tuple, list)): - value = (value, ) - value = list({k: shortcut(v) for (k, v) in iteritems(obj)} for obj in value) - elif pinfo.get('multi'): + value = (value,) + value = list( + {k: shortcut(v) for (k, v) in iteritems(obj)} for obj in value + ) + elif pinfo.get("multi"): if not isinstance(value, (tuple, list)): - value = (value, ) + value = (value,) value = list(map(shortcut, value)) # dict(name -> DslBase), make sure we pickup all the objs - elif pinfo.get('hash'): + elif pinfo.get("hash"): value = {k: shortcut(v) for (k, v) in iteritems(value)} # single value object, just convert @@ -284,9 +326,12 @@ def _setattr(self, name, value): self._params[name] = value def __getattr__(self, name): - if name.startswith('_'): + if name.startswith("_"): raise AttributeError( - '{!r} object has no attribute {!r}'.format(self.__class__.__name__, name)) + "{!r} object has no attribute {!r}".format( + self.__class__.__name__, name + ) + ) value = None try: @@ -296,13 +341,16 @@ def __getattr__(self, name): # container instead if name in self._param_defs: pinfo = self._param_defs[name] - if pinfo.get('multi'): + if pinfo.get("multi"): value = self._params.setdefault(name, []) - elif pinfo.get('hash'): + elif pinfo.get("hash"): value = self._params.setdefault(name, {}) if value is None: raise AttributeError( - '{!r} object has no attribute {!r}'.format(self.__class__.__name__, name)) + "{!r} object has no attribute {!r}".format( + self.__class__.__name__, name + ) + ) # wrap nested dicts in AttrDict for convenient access if isinstance(value, collections_abc.Mapping): @@ -318,24 +366,23 @@ def to_dict(self): pinfo = self._param_defs.get(pname) # typed param - if pinfo and 'type' in pinfo: + if pinfo and "type" in pinfo: # don't serialize empty lists and dicts for typed fields if value in ({}, []): continue # list of dict(name -> DslBase) - if pinfo.get('multi') and pinfo.get('hash'): + if pinfo.get("multi") and pinfo.get("hash"): value = list( - {k: v.to_dict() for k, v in iteritems(obj)} - for obj in value + {k: v.to_dict() for k, v in iteritems(obj)} for obj in value ) # multi-values are serialized as list of dicts - elif pinfo.get('multi'): + elif pinfo.get("multi"): value = list(map(lambda x: x.to_dict(), value)) # squash all the hash values into one dict - elif pinfo.get('hash'): + elif pinfo.get("hash"): value = {k: v.to_dict() for k, v in iteritems(value)} # serialize single values @@ -343,7 +390,7 @@ def to_dict(self): value = value.to_dict() # serialize anything with to_dict method - elif hasattr(value, 'to_dict'): + elif hasattr(value, "to_dict"): value = value.to_dict() d[pname] = value @@ -355,22 +402,28 @@ def _clone(self): c._params[attr] = copy(self._params[attr]) return c + class HitMeta(AttrDict): - def __init__(self, document, exclude=('_source', '_fields')): - d = {k[1:] if k.startswith('_') else k: v for (k, v) in iteritems(document) if k not in exclude} - if 'type' in d: + def __init__(self, document, exclude=("_source", "_fields")): + d = { + k[1:] if k.startswith("_") else k: v + for (k, v) in iteritems(document) + if k not in exclude + } + if "type" in d: # make sure we are consistent everywhere in python - d['doc_type'] = d.pop('type') + d["doc_type"] = d.pop("type") super(HitMeta, self).__init__(d) + class ObjectBase(AttrDict): def __init__(self, meta=None, **kwargs): meta = meta or {} for k in list(kwargs): - if k.startswith('_') and k[1:] in META_FIELDS: + if k.startswith("_") and k[1:] in META_FIELDS: meta[k] = kwargs.pop(k) - super(AttrDict, self).__setattr__('meta', HitMeta(meta)) + super(AttrDict, self).__setattr__("meta", HitMeta(meta)) super(ObjectBase, self).__init__(kwargs) @@ -385,7 +438,7 @@ def __list_fields(cls): field = cls._doc_type.mapping[name] yield name, field, False - if hasattr(cls.__class__, '_index'): + if hasattr(cls.__class__, "_index"): if not cls._index._mapping: return for name in cls._index._mapping: @@ -401,7 +454,7 @@ def __get_field(cls, name): return cls._doc_type.mapping[name] except KeyError: # fallback to fields on the Index - if hasattr(cls, '_index') and cls._index._mapping: + if hasattr(cls, "_index") and cls._index._mapping: try: return cls._index._mapping[name] except KeyError: @@ -410,7 +463,7 @@ def __get_field(cls, name): @classmethod def from_es(cls, hit): meta = hit.copy() - data = meta.pop('_source', {}) + data = meta.pop("_source", {}) doc = cls(meta=meta) doc._from_dict(data) return doc @@ -427,8 +480,8 @@ def __getstate__(self): def __setstate__(self, state): data, meta = state - super(AttrDict, self).__setattr__('_d_', {}) - super(AttrDict, self).__setattr__('meta', HitMeta(meta)) + super(AttrDict, self).__setattr__("_d_", {}) + super(AttrDict, self).__setattr__("meta", HitMeta(meta)) self._from_dict(data) def __getattr__(self, name): @@ -436,7 +489,7 @@ def __getattr__(self, name): return super(ObjectBase, self).__getattr__(name) except AttributeError: f = self.__get_field(name) - if hasattr(f, 'empty'): + if hasattr(f, "empty"): value = f.empty() if value not in SKIP_VALUES: setattr(self, name, value) @@ -490,16 +543,42 @@ def full_clean(self): self.clean_fields() self.clean() + def merge(data, new_data, raise_on_conflict=False): - if not (isinstance(data, (AttrDict, collections_abc.Mapping)) - and isinstance(new_data, (AttrDict, collections_abc.Mapping))): - raise ValueError('You can only merge two dicts! Got {!r} and {!r} instead.'.format(data, new_data)) + if not ( + isinstance(data, (AttrDict, collections_abc.Mapping)) + and isinstance(new_data, (AttrDict, collections_abc.Mapping)) + ): + raise ValueError( + "You can only merge two dicts! Got {!r} and {!r} instead.".format( + data, new_data + ) + ) for key, value in iteritems(new_data): - if key in data and isinstance(data[key], (AttrDict, collections_abc.Mapping)) and \ - isinstance(value, (AttrDict, collections_abc.Mapping)): + if ( + key in data + and isinstance(data[key], (AttrDict, collections_abc.Mapping)) + and isinstance(value, (AttrDict, collections_abc.Mapping)) + ): merge(data[key], value, raise_on_conflict) elif key in data and data[key] != value and raise_on_conflict: - raise ValueError('Incompatible data for key %r, cannot be merged.' % key) + raise ValueError("Incompatible data for key %r, cannot be merged." % key) else: data[key] = value + + +def recursive_to_dict(data): + """Recursively transform objects that potentially have .to_dict() + into dictionary literals by traversing AttrList, AttrDict, list, + tuple, and Mapping types. + """ + if isinstance(data, AttrList): + data = list(data._l_) + elif hasattr(data, "to_dict"): + data = data.to_dict() + if isinstance(data, (list, tuple)): + return type(data)(recursive_to_dict(inner) for inner in data) + elif isinstance(data, collections_abc.Mapping): + return {key: recursive_to_dict(val) for key, val in data.items()} + return data diff --git a/elasticsearch_dsl/wrappers.py b/elasticsearch_dsl/wrappers.py index 50666a22c..18d8a80ad 100644 --- a/elasticsearch_dsl/wrappers.py +++ b/elasticsearch_dsl/wrappers.py @@ -1,40 +1,58 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + import operator -from six import string_types, iteritems +from six import iteritems, string_types from .utils import AttrDict -__all__ = ['Range'] +__all__ = ["Range"] + class Range(AttrDict): OPS = { - 'lt': operator.lt, 'lte': operator.le, - 'gt': operator.gt, 'gte': operator.ge, + "lt": operator.lt, + "lte": operator.le, + "gt": operator.gt, + "gte": operator.ge, } def __init__(self, *args, **kwargs): - if args and ( - len(args) > 1 or - kwargs or - not isinstance(args[0], dict) - ): - raise ValueError('Range accepts a single dictionary or a set of keyword arguments.') + if args and (len(args) > 1 or kwargs or not isinstance(args[0], dict)): + raise ValueError( + "Range accepts a single dictionary or a set of keyword arguments." + ) data = args[0] if args else kwargs for k in data: if k not in self.OPS: - raise ValueError('Range received an unknown operator %r' % k) + raise ValueError("Range received an unknown operator %r" % k) - if 'gt' in data and 'gte' in data: - raise ValueError('You cannot specify both gt and gte for Range.') + if "gt" in data and "gte" in data: + raise ValueError("You cannot specify both gt and gte for Range.") - if 'lt' in data and 'lte' in data: - raise ValueError('You cannot specify both lt and lte for Range.') + if "lt" in data and "lte" in data: + raise ValueError("You cannot specify both lt and lte for Range.") super(Range, self).__init__(args[0] if args else kwargs) def __repr__(self): - return 'Range(%s)' % ', '.join('%s=%r' % op for op in iteritems(self._d_)) + return "Range(%s)" % ", ".join("%s=%r" % op for op in iteritems(self._d_)) def __contains__(self, item): if isinstance(item, string_types): @@ -47,16 +65,16 @@ def __contains__(self, item): @property def upper(self): - if 'lt' in self._d_: - return self._d_['lt'], False - if 'lte' in self._d_: - return self._d_['lte'], True + if "lt" in self._d_: + return self._d_["lt"], False + if "lte" in self._d_: + return self._d_["lte"], True return None, False @property def lower(self): - if 'gt' in self._d_: - return self._d_['gt'], False - if 'gte' in self._d_: - return self._d_['gte'], True + if "gt" in self._d_: + return self._d_["gt"], False + if "gte" in self._d_: + return self._d_["gte"], True return None, False diff --git a/examples/alias_migration.py b/examples/alias_migration.py index 725376e11..e0a44da30 100644 --- a/examples/alias_migration.py +++ b/examples/alias_migration.py @@ -1,3 +1,20 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + """ Simple example with a single Document demonstrating how schema can be managed, including upgrading with reindexing. @@ -21,10 +38,11 @@ from datetime import datetime from fnmatch import fnmatch -from elasticsearch_dsl import Document, Date, Text, Keyword, connections +from elasticsearch_dsl import Date, Document, Keyword, Text, connections + +ALIAS = "test-blog" +PATTERN = ALIAS + "-*" -ALIAS = 'test-blog' -PATTERN = ALIAS + '-*' class BlogPost(Document): title = Text() @@ -39,17 +57,15 @@ def is_published(self): def _matches(cls, hit): # override _matches to match indices in a pattern instead of just ALIAS # hit is the raw dict as returned by elasticsearch - return fnmatch(hit['_index'], PATTERN) + return fnmatch(hit["_index"], PATTERN) class Index: # we will use an alias instead of the index name = ALIAS # set settings and possibly other attributes of the index like # analyzers - settings = { - 'number_of_shards': 1, - 'number_of_replicas': 0 - } + settings = {"number_of_shards": 1, "number_of_replicas": 0} + def setup(): """ @@ -67,6 +83,7 @@ def setup(): if not BlogPost._index.exists(): migrate(move_data=False) + def migrate(move_data=True, update_alias=True): """ Upgrade function that creates a new index for the data. Optionally it also can @@ -79,7 +96,7 @@ def migrate(move_data=True, update_alias=True): not perform any writes at this time as those might be lost. """ # construct a new index name by appending current timestamp - next_index = PATTERN.replace('*', datetime.now().strftime('%Y%m%d%H%M%S%f')) + next_index = PATTERN.replace("*", datetime.now().strftime("%Y%m%d%H%M%S%f")) # get the low level connection es = connections.get_connection() @@ -91,21 +108,24 @@ def migrate(move_data=True, update_alias=True): # move data from current alias to the new index es.reindex( body={"source": {"index": ALIAS}, "dest": {"index": next_index}}, - request_timeout=3600 + request_timeout=3600, ) # refresh the index to make the changes visible es.indices.refresh(index=next_index) if update_alias: # repoint the alias to point to the newly created index - es.indices.update_aliases(body={ - 'actions': [ - {"remove": {"alias": ALIAS, "index": PATTERN}}, - {"add": {"alias": ALIAS, "index": next_index}}, - ] - }) - -if __name__ == '__main__': + es.indices.update_aliases( + body={ + "actions": [ + {"remove": {"alias": ALIAS, "index": PATTERN}}, + {"add": {"alias": ALIAS, "index": next_index}}, + ] + } + ) + + +if __name__ == "__main__": # initiate the default connection to elasticsearch connections.create_connection() @@ -115,9 +135,9 @@ def migrate(move_data=True, update_alias=True): # create a new document bp = BlogPost( _id=0, - title='Hello World!', - tags = ['testing', 'dummy'], - content=open(__file__).read() + title="Hello World!", + tags=["testing", "dummy"], + content=open(__file__).read(), ) bp.save(refresh=True) diff --git a/examples/completion.py b/examples/completion.py index ded295b3f..d815a218b 100644 --- a/examples/completion.py +++ b/examples/completion.py @@ -1,4 +1,21 @@ # -*- coding: utf-8 -*- +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + """ Example ``Document`` with completion suggester. @@ -13,23 +30,28 @@ from itertools import permutations -from elasticsearch_dsl import connections, Document, Completion, Text, Long, \ - Keyword, analyzer, token_filter +from elasticsearch_dsl import ( + Completion, + Document, + Keyword, + Long, + Text, + analyzer, + connections, + token_filter, +) # custom analyzer for names ascii_fold = analyzer( - 'ascii_fold', + "ascii_fold", # we don't want to split O'Brian or Toulouse-Lautrec - tokenizer='whitespace', - filter=[ - 'lowercase', - token_filter('ascii_fold', 'asciifolding') - ] + tokenizer="whitespace", + filter=["lowercase", token_filter("ascii_fold", "asciifolding")], ) class Person(Document): - name = Text(fields={'keyword': Keyword()}) + name = Text(fields={"keyword": Keyword()}) popularity = Long() # copletion field with a custom analyzer @@ -42,19 +64,16 @@ def clean(self): popularity as ``weight``. """ self.suggest = { - 'input': [' '.join(p) for p in permutations(self.name.split())], - 'weight': self.popularity + "input": [" ".join(p) for p in permutations(self.name.split())], + "weight": self.popularity, } class Index: - name = 'test-suggest' - settings = { - 'number_of_shards': 1, - 'number_of_replicas': 0 - } + name = "test-suggest" + settings = {"number_of_shards": 1, "number_of_replicas": 0} -if __name__ == '__main__': +if __name__ == "__main__": # initiate the default connection to elasticsearch connections.create_connection() @@ -62,21 +81,20 @@ class Index: Person.init() # index some sample data - for id, (name, popularity) in enumerate([ - ('Henri de Toulouse-Lautrec', 42), - ('Jára Cimrman', 124), - ]): + for id, (name, popularity) in enumerate( + [("Henri de Toulouse-Lautrec", 42), ("Jára Cimrman", 124)] + ): Person(_id=id, name=name, popularity=popularity).save() # refresh index manually to make changes live Person._index.refresh() # run some suggestions - for text in ('já', 'Jara Cimr', 'tou', 'de hen'): + for text in ("já", "Jara Cimr", "tou", "de hen"): s = Person.search() - s = s.suggest('auto_complete', text, completion={'field': 'suggest'}) + s = s.suggest("auto_complete", text, completion={"field": "suggest"}) response = s.execute() # print out all the options we got for option in response.suggest.auto_complete[0].options: - print('%10s: %25s (%d)' % (text, option._source.name, option._score)) + print("%10s: %25s (%d)" % (text, option._source.name, option._score)) diff --git a/examples/composite_agg.py b/examples/composite_agg.py index 9e035a457..b9d22b555 100644 --- a/examples/composite_agg.py +++ b/examples/composite_agg.py @@ -1,6 +1,24 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from __future__ import print_function -from elasticsearch_dsl import connections, Search, A +from elasticsearch_dsl import A, Search, connections + def scan_aggs(search, source_aggs, inner_aggs={}, size=10): """ @@ -8,31 +26,35 @@ def scan_aggs(search, source_aggs, inner_aggs={}, size=10): ``source_aggs``, returning results of ``inner_aggs`` for each. Uses the ``composite`` aggregation under the hood to perform this. """ + def run_search(**kwargs): s = search[:0] - s.aggs.bucket('comp', 'composite', sources=source_aggs, size=size, **kwargs) + s.aggs.bucket("comp", "composite", sources=source_aggs, size=size, **kwargs) for agg_name, agg in inner_aggs.items(): - s.aggs['comp'][agg_name] = agg + s.aggs["comp"][agg_name] = agg return s.execute() response = run_search() while response.aggregations.comp.buckets: for b in response.aggregations.comp.buckets: yield b - if 'after_key' in response.aggregations.comp: + if "after_key" in response.aggregations.comp: after = response.aggregations.comp.after_key else: - after= response.aggregations.comp.buckets[-1].key + after = response.aggregations.comp.buckets[-1].key response = run_search(after=after) -if __name__ == '__main__': +if __name__ == "__main__": # initiate the default connection to elasticsearch connections.create_connection() - for b in scan_aggs(Search(index='git'), - {'files': A('terms', field='files')}, - {'first_seen': A('min', field='committed_date')}): - print('File %s has been modified %d times, first seen at %s.' % ( - b.key.files, b.doc_count, b.first_seen.value_as_string - )) + for b in scan_aggs( + Search(index="git"), + {"files": A("terms", field="files")}, + {"first_seen": A("min", field="committed_date")}, + ): + print( + "File %s has been modified %d times, first seen at %s." + % (b.key.files, b.doc_count, b.first_seen.value_as_string) + ) diff --git a/examples/parent_child.py b/examples/parent_child.py index 0bff5b458..3bd40964a 100644 --- a/examples/parent_child.py +++ b/examples/parent_child.py @@ -1,3 +1,20 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + """ Complex data model example modeling stackoverflow-like data. @@ -24,36 +41,53 @@ """ from datetime import datetime -from elasticsearch_dsl import Document, Date, Text, Keyword, Boolean, InnerDoc, \ - Nested, Object, Join, Long, connections +from elasticsearch_dsl import ( + Boolean, + Date, + Document, + InnerDoc, + Join, + Keyword, + Long, + Nested, + Object, + Text, + connections, +) + class User(InnerDoc): """ Class used to represent a denormalized user stored on other objects. """ + id = Long(required=True) signed_up = Date() - username = Text(fields={'keyword': Keyword()}, required=True) - email = Text(fields={'keyword': Keyword()}) - location = Text(fields={'keyword': Keyword()}) + username = Text(fields={"keyword": Keyword()}, required=True) + email = Text(fields={"keyword": Keyword()}) + location = Text(fields={"keyword": Keyword()}) + class Comment(InnerDoc): """ Class wrapper for nested comment objects. """ + author = Object(User, required=True) created = Date(required=True) content = Text(required=True) + class Post(Document): """ Base class for Question and Answer containing the common fields. """ + author = Object(User, required=True) created = Date(required=True) body = Text(required=True) comments = Nested(Comment) - question_answer = Join(relations={'question': 'answer'}) + question_answer = Join(relations={"question": "answer"}) @classmethod def _matches(cls, hit): @@ -62,18 +96,14 @@ def _matches(cls, hit): return False class Index: - name = 'test-qa-site' + name = "test-qa-site" settings = { "number_of_shards": 1, "number_of_replicas": 0, } def add_comment(self, user, content, created=None, commit=True): - c = Comment( - author=user, - content=content, - created=created or datetime.now() - ) + c = Comment(author=user, content=content, created=created or datetime.now()) self.comments.append(c) if commit: self.save() @@ -85,19 +115,20 @@ def save(self, **kwargs): self.created = datetime.now() return super(Post, self).save(**kwargs) + class Question(Post): # use multi True so that .tags will return empty list if not present tags = Keyword(multi=True) - title = Text(fields={'keyword': Keyword()}) + title = Text(fields={"keyword": Keyword()}) @classmethod def _matches(cls, hit): - """ Use Question class for parent documents """ - return hit['_source']['question_answer'] == 'question' + """Use Question class for parent documents""" + return hit["_source"]["question_answer"] == "question" @classmethod def search(cls, **kwargs): - return cls._index.search(**kwargs).filter('term', question_answer='question') + return cls._index.search(**kwargs).filter("term", question_answer="question") def add_answer(self, user, body, created=None, accepted=False, commit=True): answer = Answer( @@ -106,13 +137,12 @@ def add_answer(self, user, body, created=None, accepted=False, commit=True): # since we don't have explicit index, ensure same index as self _index=self.meta.index, # set up the parent/child mapping - question_answer={'name': 'answer', 'parent': self.meta.id}, - + question_answer={"name": "answer", "parent": self.meta.id}, # pass in the field values author=user, created=created, body=body, - accepted=accepted + accepted=accepted, ) if commit: answer.save() @@ -122,7 +152,7 @@ def search_answers(self): # search only our index s = Answer.search() # filter for answers belonging to us - s = s.filter('parent_id', type="answer", id=self.meta.id) + s = s.filter("parent_id", type="answer", id=self.meta.id) # add routing to only go to specific shard s = s.params(routing=self.meta.id) return s @@ -132,12 +162,12 @@ def get_answers(self): Get answers either from inner_hits already present or by searching elasticsearch. """ - if 'inner_hits' in self.meta and 'answer' in self.meta.inner_hits: + if "inner_hits" in self.meta and "answer" in self.meta.inner_hits: return self.meta.inner_hits.answer.hits return list(self.search_answers()) def save(self, **kwargs): - self.question_answer = 'question' + self.question_answer = "question" return super(Question, self).save(**kwargs) @@ -146,21 +176,24 @@ class Answer(Post): @classmethod def _matches(cls, hit): - """ Use Answer class for child documents with child name 'answer' """ - return isinstance(hit['_source']['question_answer'], dict) \ - and hit['_source']['question_answer'].get('name') == 'answer' + """Use Answer class for child documents with child name 'answer'""" + return ( + isinstance(hit["_source"]["question_answer"], dict) + and hit["_source"]["question_answer"].get("name") == "answer" + ) @classmethod def search(cls, **kwargs): - return cls._index.search(**kwargs).exclude('term', question_answer='question') + return cls._index.search(**kwargs).exclude("term", question_answer="question") @property def question(self): # cache question in self.meta # any attributes set on self would be interpretted as fields - if 'question' not in self.meta: + if "question" not in self.meta: self.meta.question = Question.get( - id=self.question_answer.parent, index=self.meta.index) + id=self.question_answer.parent, index=self.meta.index + ) return self.meta.question def save(self, **kwargs): @@ -170,11 +203,12 @@ def save(self, **kwargs): def setup(): - """ Create an IndexTemplate and save it into elasticsearch. """ - index_template = Post._index.as_template('base') + """Create an IndexTemplate and save it into elasticsearch.""" + index_template = Post._index.as_template("base") index_template.save() -if __name__ == '__main__': + +if __name__ == "__main__": # initiate the default connection to elasticsearch connections.create_connection() @@ -182,21 +216,30 @@ def setup(): setup() # user objects to use - nick = User(id=47, signed_up=datetime(2017, 4, 3), username='fxdgear', - email='nick.lang@elastic.co', location='Colorado') - honza = User(id=42, signed_up=datetime(2013, 4, 3), username='honzakral', - email='honza@elastic.co', location='Prague') + nick = User( + id=47, + signed_up=datetime(2017, 4, 3), + username="fxdgear", + email="nick.lang@elastic.co", + location="Colorado", + ) + honza = User( + id=42, + signed_up=datetime(2013, 4, 3), + username="honzakral", + email="honza@elastic.co", + location="Prague", + ) # create a question object question = Question( _id=1, author=nick, - tags=['elasticsearch', 'python'], - title='How do I use elasticsearch from Python?', - body=''' + tags=["elasticsearch", "python"], + title="How do I use elasticsearch from Python?", + body=""" I want to use elasticsearch, how do I do it from Python? - ''', + """, ) question.save() answer = question.add_answer(honza, "Just use `elasticsearch-py`!") - diff --git a/examples/percolate.py b/examples/percolate.py index 8a74faae4..7ca49b635 100644 --- a/examples/percolate.py +++ b/examples/percolate.py @@ -1,23 +1,48 @@ -from elasticsearch_dsl import Document, Percolator, Text, Keyword, \ - connections, Q, Search +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from elasticsearch_dsl import ( + Document, + Keyword, + Percolator, + Q, + Search, + Text, + connections, +) + class BlogPost(Document): """ Blog posts that will be automatically tagged based on percolation queries. """ + content = Text() tags = Keyword(multi=True) class Index: - name = 'test-blogpost' + name = "test-blogpost" def add_tags(self): # run a percolation to automatically tag the blog post. - s = Search(index='test-percolator') - s = s.query('percolate', - field='query', - index=self._get_index(), - document=self.to_dict()) + s = Search(index="test-percolator") + s = s.query( + "percolate", field="query", index=self._get_index(), document=self.to_dict() + ) # collect all the tags from matched percolators for percolator in s: @@ -30,10 +55,12 @@ def save(self, **kwargs): self.add_tags() return super(BlogPost, self).save(**kwargs) + class PercolatorDoc(Document): """ Document class used for storing the percolation queries. """ + # relevant fields from BlogPost must be also present here for the queries # to be able to use them. Another option would be to use document # inheritance but save() would have to be reset to normal behavior. @@ -45,11 +72,9 @@ class PercolatorDoc(Document): tags = Keyword(multi=True) class Index: - name = 'test-percolator' - settings = { - "number_of_shards": 1, - "number_of_replicas": 0 - } + name = "test-percolator" + settings = {"number_of_shards": 1, "number_of_replicas": 0} + def setup(): # create the percolator index if it doesn't exist @@ -58,13 +83,13 @@ def setup(): # register a percolation query looking for documents about python PercolatorDoc( - _id='python', - tags=['programming', 'development', 'python'], - query=Q('match', content='python') + _id="python", + tags=["programming", "development", "python"], + query=Q("match", content="python"), ).save(refresh=True) -if __name__ == '__main__': +if __name__ == "__main__": # initiate the default connection to elasticsearch connections.create_connection() diff --git a/examples/search_as_you_type.py b/examples/search_as_you_type.py index 4668c43d0..1c41cb73d 100644 --- a/examples/search_as_you_type.py +++ b/examples/search_as_you_type.py @@ -1,26 +1,47 @@ # -*- coding: utf-8 -*- +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + """ Example ``Document`` with search_as_you_type field datatype and how to search it. -When creating a field with search_as_you_type datatype ElasticSearch creates additional subfields to enable efficient -as-you-type completion, matching terms at any position within the input. +When creating a field with search_as_you_type datatype ElasticSearch creates additional +subfields to enable efficient as-you-type completion, matching terms at any position +within the input. To custom analyzer with ascii folding allow search to work in different languages. """ from __future__ import print_function, unicode_literals -from elasticsearch_dsl import connections, Document, analyzer, token_filter, SearchAsYouType +from elasticsearch_dsl import ( + Document, + SearchAsYouType, + analyzer, + connections, + token_filter, +) from elasticsearch_dsl.query import MultiMatch # custom analyzer for names ascii_fold = analyzer( - 'ascii_fold', + "ascii_fold", # we don't want to split O'Brian or Toulouse-Lautrec - tokenizer='whitespace', - filter=[ - 'lowercase', - token_filter('ascii_fold', 'asciifolding') - ] + tokenizer="whitespace", + filter=["lowercase", token_filter("ascii_fold", "asciifolding")], ) @@ -28,14 +49,11 @@ class Person(Document): name = SearchAsYouType(max_shingle_size=3) class Index: - name = 'test-search-as-you-type' - settings = { - 'number_of_shards': 1, - 'number_of_replicas': 0 - } + name = "test-search-as-you-type" + settings = {"number_of_shards": 1, "number_of_replicas": 0} -if __name__ == '__main__': +if __name__ == "__main__": # initiate the default connection to elasticsearch connections.create_connection() @@ -43,14 +61,15 @@ class Index: Person.init() import pprint + pprint.pprint(Person().to_dict(), indent=2) # index some sample data names = [ - 'Andy Warhol', - 'Alphonse Mucha', - 'Henri de Toulouse-Lautrec', - 'Jára Cimrman', + "Andy Warhol", + "Alphonse Mucha", + "Henri de Toulouse-Lautrec", + "Jára Cimrman", ] for id, name in enumerate(names): Person(_id=id, name=name).save() @@ -59,21 +78,17 @@ class Index: Person._index.refresh() # run some suggestions - for text in ('já', 'Cimr', 'toulouse', 'Henri Tou', 'a'): + for text in ("já", "Cimr", "toulouse", "Henri Tou", "a"): s = Person.search() s.query = MultiMatch( query=text, type="bool_prefix", - fields=[ - "name", - "name._2gram", - "name._3gram" - ] + fields=["name", "name._2gram", "name._3gram"], ) response = s.execute() # print out all the options we got for h in response: - print('%15s: %25s' % (text, h.name)) + print("%15s: %25s" % (text, h.name)) diff --git a/noxfile.py b/noxfile.py new file mode 100644 index 000000000..12ff627cb --- /dev/null +++ b/noxfile.py @@ -0,0 +1,78 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import nox + +SOURCE_FILES = ( + "setup.py", + "noxfile.py", + "docs/", + "elasticsearch_dsl/", + "examples/", + "tests/", + "utils/", +) + + +@nox.session(python=["2.7", "3.4", "3.5", "3.6", "3.7", "3.8", "3.9"]) +def test(session): + session.install(".[develop]") + + if session.posargs: + argv = session.posargs + else: + argv = ( + "-vvv", + "--cov=elasticsearch_dsl", + "--cov=tests.test_integration.test_examples", + "tests/", + ) + session.run("pytest", *argv) + + +@nox.session() +def format(session): + session.install("black==21.12b0", "click==8.0.4", "isort") + session.run( + "black", "--target-version=py27", "--target-version=py37", *SOURCE_FILES + ) + session.run("isort", *SOURCE_FILES) + session.run("python", "utils/license-headers.py", "fix", *SOURCE_FILES) + + lint(session) + + +@nox.session +def lint(session): + session.install("flake8", "black==21.12b0", "click==8.0.4", "isort") + session.run( + "black", + "--check", + "--target-version=py27", + "--target-version=py37", + *SOURCE_FILES + ) + session.run("isort", "--check", *SOURCE_FILES) + session.run("flake8", "--ignore=E501,E741,W503", *SOURCE_FILES) + session.run("python", "utils/license-headers.py", "check", *SOURCE_FILES) + + +@nox.session() +def docs(session): + session.install(".[develop]", "sphinx-rtd-theme") + + session.run("sphinx-build", "docs/", "docs/_build", "-b", "html") diff --git a/setup.cfg b/setup.cfg index 7d5a6f764..38d0aee11 100644 --- a/setup.cfg +++ b/setup.cfg @@ -5,3 +5,6 @@ all_files = 1 [wheel] universal = 1 + +[isort] +profile = black diff --git a/setup.py b/setup.py index 6f7adb6aa..6f87b7aff 100644 --- a/setup.py +++ b/setup.py @@ -1,49 +1,67 @@ # -*- coding: utf-8 -*- -from os.path import join, dirname -from setuptools import setup, find_packages +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. -VERSION = (7, 2, 0) +from os.path import dirname, join + +from setuptools import find_packages, setup + +VERSION = (7, 4, 1) __version__ = VERSION -__versionstr__ = '.'.join(map(str, VERSION)) +__versionstr__ = ".".join(map(str, VERSION)) -f = open(join(dirname(__file__), 'README')) +f = open(join(dirname(__file__), "README")) long_description = f.read().strip() f.close() install_requires = [ - 'six', - 'python-dateutil', - 'elasticsearch>=7.0.0,<8.0.0', + "six", + "python-dateutil", + "elasticsearch>=7.0.0,<8.0.0", # ipaddress is included in stdlib since python 3.3 - 'ipaddress; python_version<"3.3"' + 'ipaddress; python_version<"3.3"', ] -tests_require = [ +develop_requires = [ "mock", "pytest>=3.0.0", "pytest-cov", "pytest-mock<3.0.0", "pytz", - "coverage<5.0.0" + "coverage<5.0.0", + "sphinx", + "sphinx_rtd_theme", ] setup( - name = "elasticsearch-dsl", - description = "Python client for Elasticsearch", + name="elasticsearch-dsl", + description="Python client for Elasticsearch", license="Apache-2.0", - url = "https://github.com/elasticsearch/elasticsearch-dsl-py", - long_description = long_description, - version = __versionstr__, - author = "Honza Král", - author_email = "honza.kral@gmail.com", - maintainer = "Seth Michael Larson", - maintainer_email = "seth.larson@elastic.co", - packages=find_packages( - where='.', - exclude=('test_elasticsearch_dsl*', ) - ), - python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*', - classifiers = [ + url="https://github.com/elasticsearch/elasticsearch-dsl-py", + long_description=long_description, + long_description_content_type="text/x-rst", + version=__versionstr__, + author="Honza Král", + author_email="honza.kral@gmail.com", + maintainer="Seth Michael Larson", + maintainer_email="seth.larson@elastic.co", + packages=find_packages(where=".", exclude=("tests*",)), + python_requires=">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*", + classifiers=[ "Development Status :: 4 - Beta", "License :: OSI Approved :: Apache Software License", "Intended Audience :: Developers", @@ -57,15 +75,10 @@ "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", ], install_requires=install_requires, - - test_suite = "test_elasticsearch_dsl.run_tests.run_all", - tests_require=tests_require, - - extras_require={ - 'develop': tests_require + ["sphinx", "sphinx_rtd_theme"] - }, + extras_require={"develop": develop_requires}, ) diff --git a/test_elasticsearch_dsl/__init__.py b/test_elasticsearch_dsl/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/test_elasticsearch_dsl/conftest.py b/test_elasticsearch_dsl/conftest.py deleted file mode 100644 index 4abc1571a..000000000 --- a/test_elasticsearch_dsl/conftest.py +++ /dev/null @@ -1,248 +0,0 @@ -# -*- coding: utf-8 -*- - -import os -from datetime import datetime - -from elasticsearch.helpers import bulk -from elasticsearch.helpers.test import SkipTest, get_test_client -from mock import Mock -from pytest import fixture, skip - -from elasticsearch_dsl.connections import connections, add_connection -from .test_integration.test_data import DATA, FLAT_DATA, TEST_GIT_DATA, \ - create_git_index, create_flat_git_index -from .test_integration.test_document import PullRequest, Comment, User, History - - -@fixture(scope='session') -def client(): - try: - connection = get_test_client(nowait='WAIT_FOR_ES' not in os.environ) - add_connection('default', connection) - return connection - except SkipTest: - skip() - -@fixture -def write_client(client): - yield client - client.indices.delete('test-*', ignore=404) - client.indices.delete_template('test-template', ignore=404) - -@fixture -def mock_client(dummy_response): - client = Mock() - client.search.return_value = dummy_response - add_connection('mock', client) - yield client - connections._conn = {} - connections._kwargs = {} - -@fixture(scope='session') -def data_client(client): - # create mappings - create_git_index(client, 'git') - create_flat_git_index(client, 'flat-git') - # load data - bulk(client, DATA, raise_on_error=True, refresh=True) - bulk(client, FLAT_DATA, raise_on_error=True, refresh=True) - yield client - client.indices.delete('git') - client.indices.delete('flat-git') - -@fixture -def dummy_response(): - return { - "_shards": { - "failed": 0, - "successful": 10, - "total": 10 - }, - "hits": { - "hits": [ - { - "_index": "test-index", - "_type": "company", - "_id": "elasticsearch", - "_score": 12.0, - - "_source": { - "city": "Amsterdam", - "name": "Elasticsearch", - }, - }, - { - "_index": "test-index", - "_type": "employee", - "_id": "42", - "_score": 11.123, - "_routing": "elasticsearch", - - "_source": { - "name": { - "first": "Shay", - "last": "Bannon" - }, - "lang": "java", - "twitter": "kimchy", - }, - }, - { - "_index": "test-index", - "_type": "employee", - "_id": "47", - "_score": 1, - "_routing": "elasticsearch", - - "_source": { - "name": { - "first": "Honza", - "last": "Král" - }, - "lang": "python", - "twitter": "honzakral", - }, - }, - { - "_index": "test-index", - "_type": "employee", - "_id": "53", - "_score": 16.0, - "_routing": "elasticsearch", - }, - ], - "max_score": 12.0, - "total": 123 - }, - "timed_out": False, - "took": 123 - } - -@fixture -def aggs_search(): - from elasticsearch_dsl import Search - s = Search(index='flat-git') - s.aggs\ - .bucket('popular_files', 'terms', field='files', size=2)\ - .metric('line_stats', 'stats', field='stats.lines')\ - .metric('top_commits', 'top_hits', size=2, _source=["stats.*", "committed_date"]) - s.aggs.bucket('per_month', 'date_histogram', interval='month', field='info.committed_date') - s.aggs.metric('sum_lines', 'sum', field='stats.lines') - return s - -@fixture -def aggs_data(): - return { - 'took': 4, - 'timed_out': False, - '_shards': {'total': 1, 'successful': 1, 'failed': 0}, - 'hits': {'total': 52, 'hits': [], 'max_score': 0.0}, - 'aggregations': { - 'sum_lines': {'value': 25052.0}, - 'per_month': { - 'buckets': [ - {'doc_count': 38, 'key': 1393632000000, 'key_as_string': '2014-03-01T00:00:00.000Z'}, - {'doc_count': 11, 'key': 1396310400000, 'key_as_string': '2014-04-01T00:00:00.000Z'}, - {'doc_count': 3, 'key': 1398902400000, 'key_as_string': '2014-05-01T00:00:00.000Z'}, - ] - }, - 'popular_files': { - 'buckets': [ - { - 'key': 'elasticsearch_dsl', - 'line_stats': {'count': 40, 'max': 228.0, 'min': 2.0, 'sum': 2151.0, 'avg': 53.775}, - 'doc_count': 40, - 'top_commits': { - 'hits': { - 'total': 40, - 'hits': [ - { - '_id': '3ca6e1e73a071a705b4babd2f581c91a2a3e5037', - '_type': 'doc', - '_source': { - 'stats': {'files': 4, 'deletions': 7, 'lines': 30, 'insertions': 23}, - 'committed_date': '2014-05-02T13:47:19' - }, - '_score': 1.0, - '_index': 'flat-git' - }, - { - '_id': 'eb3e543323f189fd7b698e66295427204fff5755', - '_type': 'doc', - '_source': { - 'stats': {'files': 1, 'deletions': 0, 'lines': 18, 'insertions': 18}, - 'committed_date': '2014-05-01T13:32:14' - }, - '_score': 1.0, - '_index': 'flat-git' - } - ], - 'max_score': 1.0 - } - } - }, - { - 'key': 'test_elasticsearch_dsl', - 'line_stats': {'count': 35, 'max': 228.0, 'min': 2.0, 'sum': 1939.0, 'avg': 55.4}, - 'doc_count': 35, - 'top_commits': { - 'hits': { - 'total': 35, - 'hits': [ - { - '_id': '3ca6e1e73a071a705b4babd2f581c91a2a3e5037', - '_type': 'doc', - '_source': { - 'stats': {'files': 4, 'deletions': 7, 'lines': 30, 'insertions': 23}, - 'committed_date': '2014-05-02T13:47:19' - }, - '_score': 1.0, - '_index': 'flat-git' - }, { - '_id': 'dd15b6ba17dd9ba16363a51f85b31f66f1fb1157', - '_type': 'doc', - '_source': { - 'stats': {'files': 3, 'deletions': 18, 'lines': 62, 'insertions': 44}, - 'committed_date': '2014-05-01T13:30:44' - }, - '_score': 1.0, - '_index': 'flat-git' - } - ], - 'max_score': 1.0 - } - } - } - ], - 'doc_count_error_upper_bound': 0, - 'sum_other_doc_count': 120 - } - } - } - -@fixture -def pull_request(write_client): - PullRequest.init() - pr = PullRequest(_id=42, - comments=[ - Comment(content='Hello World!', - author=User(name='honzakral'), - created_at=datetime(2018, 1, 9, 10, 17, 3, 21184), - history=[ - History( - timestamp=datetime(2012, 1, 1), - diff='-Ahoj Svete!\n+Hello World!' - ) - ] - ), - ], - created_at=datetime(2018, 1, 9, 9, 17, 3, 21184)) - pr.save(refresh=True) - return pr - -@fixture -def setup_ubq_tests(client): - index = 'test-git' - create_git_index(client, index) - bulk(client, TEST_GIT_DATA, raise_on_error=True, refresh=True) - return index diff --git a/test_elasticsearch_dsl/run_tests.py b/test_elasticsearch_dsl/run_tests.py deleted file mode 100755 index 1c9232014..000000000 --- a/test_elasticsearch_dsl/run_tests.py +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env python -import sys - -import pytest - -def run_all(argv=None): - # always insert coverage when running tests through setup.py - if argv is None: - argv = ['--cov', 'elasticsearch_dsl', '--cov', 'test_elasticsearch_dsl.test_integration.test_examples', '--verbose', '--junitxml', 'junit.xml', '--cov-report', 'xml'] - else: - argv = argv[1:] - - sys.exit(pytest.main(argv)) - -if __name__ == '__main__': - run_all(sys.argv) - - diff --git a/test_elasticsearch_dsl/test_aggs.py b/test_elasticsearch_dsl/test_aggs.py deleted file mode 100644 index 5c8ab9a32..000000000 --- a/test_elasticsearch_dsl/test_aggs.py +++ /dev/null @@ -1,274 +0,0 @@ -from elasticsearch_dsl import aggs, query - -from pytest import raises - -def test_repr(): - max_score = aggs.Max(field='score') - a = aggs.A('terms', field='tags', aggs={'max_score': max_score}) - - assert "Terms(aggs={'max_score': Max(field='score')}, field='tags')" == repr(a) - -def test_meta(): - max_score = aggs.Max(field='score') - a = aggs.A('terms', field='tags', aggs={'max_score': max_score}, meta={'some': 'metadata'}) - - assert { - 'terms': {'field': 'tags'}, - 'aggs': {'max_score': {'max': {'field': 'score'}}}, - 'meta': {'some': 'metadata'} - } == a.to_dict() - -def test_meta_from_dict(): - max_score = aggs.Max(field='score') - a = aggs.A('terms', field='tags', aggs={'max_score': max_score}, meta={'some': 'metadata'}) - - assert aggs.A(a.to_dict()) == a - -def test_A_creates_proper_agg(): - a = aggs.A('terms', field='tags') - - assert isinstance(a, aggs.Terms) - assert a._params == {'field': 'tags'} - -def test_A_handles_nested_aggs_properly(): - max_score = aggs.Max(field='score') - a = aggs.A('terms', field='tags', aggs={'max_score': max_score}) - - assert isinstance(a, aggs.Terms) - assert a._params == {'field': 'tags', 'aggs': {'max_score': max_score}} - -def test_A_passes_aggs_through(): - a = aggs.A('terms', field='tags') - assert aggs.A(a) is a - -def test_A_from_dict(): - d = { - 'terms': {'field': 'tags'}, - 'aggs': {'per_author': {'terms': {'field': 'author.raw'}}}, - } - a = aggs.A(d) - - assert isinstance(a, aggs.Terms) - assert a._params == {'field': 'tags', 'aggs': {'per_author': aggs.A('terms', field='author.raw')}} - assert a['per_author'] == aggs.A('terms', field='author.raw') - assert a.aggs.per_author == aggs.A('terms', field='author.raw') - -def test_A_fails_with_incorrect_dict(): - correct_d = { - 'terms': {'field': 'tags'}, - 'aggs': {'per_author': {'terms': {'field': 'author.raw'}}}, - } - - with raises(Exception): - aggs.A(correct_d, field='f') - - d = correct_d.copy() - del d['terms'] - with raises(Exception): - aggs.A(d) - - d = correct_d.copy() - d['xx'] = {} - with raises(Exception): - aggs.A(d) - -def test_A_fails_with_agg_and_params(): - a = aggs.A('terms', field='tags') - - with raises(Exception): - aggs.A(a, field='score') - -def test_buckets_are_nestable(): - a = aggs.Terms(field='tags') - b = a.bucket('per_author', 'terms', field='author.raw') - - assert isinstance(b, aggs.Terms) - assert b._params == {'field': 'author.raw'} - assert a.aggs == {'per_author': b} - -def test_metric_inside_buckets(): - a = aggs.Terms(field='tags') - b = a.metric('max_score', 'max', field='score') - - # returns bucket so it's chainable - assert a is b - assert a.aggs['max_score'] == aggs.Max(field='score') - -def test_buckets_equals_counts_subaggs(): - a = aggs.Terms(field='tags') - a.bucket('per_author', 'terms', field='author.raw') - b = aggs.Terms(field='tags') - - assert a != b - -def test_buckets_to_dict(): - a = aggs.Terms(field='tags') - a.bucket('per_author', 'terms', field='author.raw') - - assert { - 'terms': {'field': 'tags'}, - 'aggs': {'per_author': {'terms': {'field': 'author.raw'}}}, - } == a.to_dict() - - a = aggs.Terms(field='tags') - a.metric('max_score', 'max', field='score') - - assert { - 'terms': {'field': 'tags'}, - 'aggs': {'max_score': {'max': {'field': 'score'}}}, - } == a.to_dict() - -def test_nested_buckets_are_reachable_as_getitem(): - a = aggs.Terms(field='tags') - b = a.bucket('per_author', 'terms', field='author.raw') - - assert a['per_author'] is not b - assert a['per_author'] == b - -def test_nested_buckets_are_settable_as_getitem(): - a = aggs.Terms(field='tags') - b = a['per_author'] = aggs.A('terms', field='author.raw') - - assert a.aggs['per_author'] is b - -def test_filter_can_be_instantiated_using_positional_args(): - a = aggs.Filter(query.Q('term', f=42)) - - assert { - 'filter': { - 'term': {'f': 42} - } - } == a.to_dict() - - assert a == aggs.A('filter', query.Q('term', f=42)) - -def test_filter_aggregation_as_nested_agg(): - a = aggs.Terms(field='tags') - a.bucket('filtered', 'filter', query.Q('term', f=42)) - - assert { - 'terms': {'field': 'tags'}, - 'aggs': { - 'filtered': { - 'filter': { - 'term': {'f': 42} - }, - } - } - } == a.to_dict() - -def test_filter_aggregation_with_nested_aggs(): - a = aggs.Filter(query.Q('term', f=42)) - a.bucket('testing', 'terms', field='tags') - - assert { - 'filter': { - 'term': {'f': 42} - }, - 'aggs': { - 'testing': {'terms': {'field': 'tags'}} - } - } == a.to_dict() - -def test_filters_correctly_identifies_the_hash(): - a = aggs.A('filters', filters={'group_a': {'term': {'group': 'a'}}, 'group_b': {'term': {'group': 'b'}}}) - - assert { - 'filters': { - 'filters': { - 'group_a': {'term': {'group': 'a'}}, - 'group_b': {'term': {'group': 'b'}} - } - } - } == a.to_dict() - assert a.filters.group_a == query.Q('term', group='a') - -def test_bucket_sort_agg(): - bucket_sort_agg = aggs.BucketSort( - sort=[{"total_sales": {"order": "desc"}}], - size=3 - ) - assert bucket_sort_agg.to_dict() == { - "bucket_sort": { - "sort": [ - {"total_sales": {"order": "desc"}} - ], - "size": 3 - } - } - - a = aggs.DateHistogram(field='date', interval='month') - a.bucket('total_sales', 'sum', field='price') - a.bucket( - 'sales_bucket_sort', - 'bucket_sort', - sort=[{"total_sales": {"order": "desc"}}], - size=3 - ) - assert { - "date_histogram": { - "field": "date", - "interval": "month" - }, - "aggs": { - "total_sales": { - "sum": { - "field": "price" - } - }, - "sales_bucket_sort": { - "bucket_sort": { - "sort": [ - {"total_sales": {"order": "desc"}} - ], - "size": 3 - } - } - } - } == a.to_dict() - -def test_bucket_sort_agg_only_trnunc(): - bucket_sort_agg = aggs.BucketSort(**{'from': 1, 'size': 1}) - assert bucket_sort_agg.to_dict() == { - "bucket_sort": { - "from": 1, - "size": 1 - } - } - - a = aggs.DateHistogram(field='date', interval='month') - a.bucket('bucket_truncate', 'bucket_sort', **{'from': 1, 'size': 1}) - assert { - "date_histogram": { - "field": "date", - "interval": "month" - }, - "aggs": { - "bucket_truncate": { - "bucket_sort": { - "from": 1, - "size": 1 - } - } - } - } == a.to_dict() - -def test_geohash_grid_aggregation(): - a = aggs.GeohashGrid(**{'field': 'centroid', 'precision': 3}) - - assert { - 'geohash_grid': { - 'field': 'centroid', - 'precision': 3 - } - } == a.to_dict() - -def test_geotile_grid_aggregation(): - a = aggs.GeotileGrid(**{'field': 'centroid', 'precision': 3}) - - assert { - 'geotile_grid': { - 'field': 'centroid', - 'precision': 3 - } - } == a.to_dict() diff --git a/test_elasticsearch_dsl/test_analysis.py b/test_elasticsearch_dsl/test_analysis.py deleted file mode 100644 index 34efe8bdc..000000000 --- a/test_elasticsearch_dsl/test_analysis.py +++ /dev/null @@ -1,251 +0,0 @@ -# coding: utf-8 -from elasticsearch_dsl import analysis - -from pytest import raises - -def test_analyzer_serializes_as_name(): - a = analysis.analyzer('my_analyzer') - - assert 'my_analyzer' == a.to_dict() - -def test_analyzer_has_definition(): - a = analysis.CustomAnalyzer( - 'my_analyzer', - tokenizer='keyword', - filter=['lowercase'] - ) - - assert { - 'type': 'custom', - 'tokenizer': 'keyword', - 'filter': ["lowercase"], - } == a.get_definition() - -def test_simple_multiplexer_filter(): - a = analysis.analyzer( - 'my_analyzer', - tokenizer='keyword', - filter=[ - analysis.token_filter( - 'my_multi', - 'multiplexer', - filters=['lowercase', 'lowercase, stop'] - ) - ] - ) - - assert { - "analyzer": { - "my_analyzer": { - "filter": [ - "my_multi" - ], - "tokenizer": "keyword", - "type": "custom" - } - }, - "filter": { - "my_multi": { - "filters": [ - "lowercase", - "lowercase, stop" - ], - "type": "multiplexer" - } - } - } == a.get_analysis_definition() - -def test_multiplexer_with_custom_filter(): - a = analysis.analyzer( - 'my_analyzer', - tokenizer='keyword', - filter=[ - analysis.token_filter( - 'my_multi', - 'multiplexer', - filters=[ - [ - analysis.token_filter( - 'en', - 'snowball', - language='English' - ) - ], - 'lowercase, stop' - ] - ) - ] - ) - - assert { - "analyzer": { - "my_analyzer": { - "filter": [ - "my_multi" - ], - "tokenizer": "keyword", - "type": "custom" - } - }, - "filter": { - "en": { - "type": "snowball", - "language": "English" - }, - "my_multi": { - "filters": [ - "en", - "lowercase, stop" - ], - "type": "multiplexer" - } - } - } == a.get_analysis_definition() - -def test_conditional_token_filter(): - a = analysis.analyzer( - 'my_cond', - tokenizer=analysis.tokenizer('keyword'), - filter=[ - analysis.token_filter( - 'testing', - 'condition', - script={'source': 'return true'}, - filter=[ - 'lowercase', - analysis.token_filter( - 'en', - 'snowball', - language='English' - ) - ] - ), - 'stop' - ] - ) - - assert { - "analyzer": { - "my_cond": { - "filter": [ - "testing", - "stop" - ], - "tokenizer": "keyword", - "type": "custom" - } - }, - "filter": { - "en": { - "language": "English", - "type": "snowball" - }, - "testing": { - "script": {"source": "return true"}, - "filter": [ - "lowercase", - "en" - ], - "type": "condition" - } - } - } == a.get_analysis_definition() - -def test_conflicting_nested_filters_cause_error(): - a = analysis.analyzer( - 'my_cond', - tokenizer=analysis.tokenizer('keyword'), - filter=[ - analysis.token_filter( - 'en', - 'stemmer', - language='english' - ), - analysis.token_filter( - 'testing', - 'condition', - script={'source': 'return true'}, - filter=[ - 'lowercase', - analysis.token_filter( - 'en', - 'snowball', - language='English' - ) - ] - ) - ] - ) - - with raises(ValueError): - a.get_analysis_definition() - - -def test_normalizer_serializes_as_name(): - n = analysis.normalizer('my_normalizer') - - assert 'my_normalizer' == n.to_dict() - -def test_normalizer_has_definition(): - n = analysis.CustomNormalizer( - 'my_normalizer', - filter=['lowercase', 'asciifolding'], - char_filter=['quote'] - ) - - assert { - 'type': 'custom', - 'filter': ['lowercase', 'asciifolding'], - 'char_filter': ['quote'] - } == n.get_definition() - -def test_tokenizer(): - t = analysis.tokenizer('trigram', 'nGram', min_gram=3, max_gram=3) - - assert t.to_dict() == 'trigram' - assert { - 'type': 'nGram', - 'min_gram': 3, - 'max_gram': 3 - } == t.get_definition() - -def test_custom_analyzer_can_collect_custom_items(): - trigram = analysis.tokenizer('trigram', 'nGram', min_gram=3, max_gram=3) - my_stop = analysis.token_filter('my_stop', 'stop', stopwords=['a', 'b']) - umlauts = analysis.char_filter('umlauts', 'pattern_replace', mappings=['ü=>ue']) - a = analysis.analyzer( - 'my_analyzer', - tokenizer=trigram, - filter=['lowercase', my_stop], - char_filter=['html_strip', umlauts] - ) - - assert a.to_dict() == 'my_analyzer' - assert { - 'analyzer': { - 'my_analyzer': { - 'type': 'custom', - 'tokenizer': 'trigram', - 'filter': ['lowercase', 'my_stop'], - 'char_filter': ['html_strip', 'umlauts'] - } - }, - 'tokenizer': { - 'trigram': trigram.get_definition() - }, - 'filter': { - 'my_stop': my_stop.get_definition() - }, - 'char_filter': { - 'umlauts': umlauts.get_definition() - } - } == a.get_analysis_definition() - -def test_stemmer_analyzer_can_pass_name(): - t = analysis.token_filter('my_english_filter', name="minimal_english", type="stemmer") - assert t.to_dict() == 'my_english_filter' - assert { - "type" : "stemmer", - "name" : "minimal_english" - } == t.get_definition() - diff --git a/test_elasticsearch_dsl/test_connections.py b/test_elasticsearch_dsl/test_connections.py deleted file mode 100644 index 639022564..000000000 --- a/test_elasticsearch_dsl/test_connections.py +++ /dev/null @@ -1,69 +0,0 @@ -from elasticsearch import Elasticsearch - -from elasticsearch_dsl import connections, serializer - -from pytest import raises - -def test_default_connection_is_returned_by_default(): - c = connections.Connections() - - con, con2 = object(), object() - c.add_connection('default', con) - - c.add_connection('not-default', con2) - - assert c.get_connection() is con - -def test_get_connection_created_connection_if_needed(): - c = connections.Connections() - c.configure(default={'hosts': ['es.com']}, local={'hosts': ['localhost']}) - - default = c.get_connection() - local = c.get_connection('local') - - assert isinstance(default, Elasticsearch) - assert isinstance(local, Elasticsearch) - - assert [{'host': 'es.com'}] == default.transport.hosts - assert [{'host': 'localhost'}] == local.transport.hosts - -def test_configure_preserves_unchanged_connections(): - c = connections.Connections() - - c.configure(default={'hosts': ['es.com']}, local={'hosts': ['localhost']}) - default = c.get_connection() - local = c.get_connection('local') - - c.configure(default={'hosts': ['not-es.com']}, local={'hosts': ['localhost']}) - new_default = c.get_connection() - new_local = c.get_connection('local') - - assert new_local is local - assert new_default is not default - -def test_remove_connection_removes_both_conn_and_conf(): - c = connections.Connections() - - c.configure(default={'hosts': ['es.com']}, local={'hosts': ['localhost']}) - c.add_connection('local2', object()) - - c.remove_connection('default') - c.get_connection('local2') - c.remove_connection('local2') - - with raises(Exception): - c.get_connection('local2') - c.get_connection('default') - -def test_create_connection_constructs_client(): - c = connections.Connections() - c.create_connection('testing', hosts=['es.com']) - - con = c.get_connection('testing') - assert [{'host': 'es.com'}] == con.transport.hosts - -def test_create_connection_adds_our_serializer(): - c = connections.Connections() - c.create_connection('testing', hosts=['es.com']) - - assert c.get_connection('testing').transport.serializer is serializer.serializer diff --git a/test_elasticsearch_dsl/test_document.py b/test_elasticsearch_dsl/test_document.py deleted file mode 100644 index 4b5073a9a..000000000 --- a/test_elasticsearch_dsl/test_document.py +++ /dev/null @@ -1,584 +0,0 @@ -import pickle -import codecs -from hashlib import md5 -from datetime import datetime -import ipaddress - -from elasticsearch_dsl import document, field, Mapping, utils, InnerDoc, analyzer, Index, Range -from elasticsearch_dsl.exceptions import ValidationException, IllegalOperation - -from pytest import raises - -class MyInner(InnerDoc): - old_field = field.Text() - -class MyDoc(document.Document): - title = field.Keyword() - name = field.Text() - created_at = field.Date() - inner = field.Object(MyInner) - -class MySubDoc(MyDoc): - name = field.Keyword() - - class Index: - name = 'default-index' - -class MyDoc2(document.Document): - extra = field.Long() - -class MyMultiSubDoc(MyDoc2, MySubDoc): - pass - -class Comment(document.InnerDoc): - title = field.Text() - tags = field.Keyword(multi=True) - -class DocWithNested(document.Document): - comments = field.Nested(Comment) - - class Index: - name = 'test-doc-with-nested' - -class SimpleCommit(document.Document): - files = field.Text(multi=True) - - class Index: - name = 'test-git' - -class Secret(str): pass - -class SecretField(field.CustomField): - builtin_type = 'text' - - def _serialize(self, data): - return codecs.encode(data, 'rot_13') - - def _deserialize(self, data): - if isinstance(data, Secret): - return data - return Secret(codecs.decode(data, 'rot_13')) - -class SecretDoc(document.Document): - title = SecretField(index='no') - - class Index: - name = 'test-secret-doc' - -class NestedSecret(document.Document): - secrets = field.Nested(SecretDoc) - - class Index: - name = 'test-nested-secret' - -class OptionalObjectWithRequiredField(document.Document): - comments = field.Nested(properties={'title': field.Keyword(required=True)}) - - class Index: - name = 'test-required' - -class Host(document.Document): - ip = field.Ip() - - class Index: - name = 'test-host' - -def test_range_serializes_properly(): - class D(document.Document): - lr = field.LongRange() - - d = D(lr=Range(lt=42)) - assert 40 in d.lr - assert 47 not in d.lr - assert { - 'lr': {'lt': 42} - } == d.to_dict() - - d = D(lr={'lt': 42}) - assert { - 'lr': {'lt': 42} - } == d.to_dict() - -def test_range_deserializes_properly(): - class D(document.InnerDoc): - lr = field.LongRange() - - d = D.from_es({'lr': {'lt': 42}}, True) - assert isinstance(d.lr, Range) - assert 40 in d.lr - assert 47 not in d.lr - -def test_resolve_nested(): - nested, field = NestedSecret._index.resolve_nested('secrets.title') - assert nested == ['secrets'] - assert field is NestedSecret._doc_type.mapping['secrets']['title'] - -def test_conflicting_mapping_raises_error_in_index_to_dict(): - class A(document.Document): - name = field.Text() - - class B(document.Document): - name = field.Keyword() - - i = Index('i') - i.document(A) - i.document(B) - - with raises(ValueError): - i.to_dict() - -def test_ip_address_serializes_properly(): - host = Host(ip=ipaddress.IPv4Address(u'10.0.0.1')) - - assert {'ip': '10.0.0.1'} == host.to_dict() - -def test_matches_uses_index(): - assert SimpleCommit._matches({ - '_index': 'test-git' - }) - assert not SimpleCommit._matches({ - '_index': 'not-test-git' - }) - -def test_matches_with_no_name_always_matches(): - class D(document.Document): - pass - - assert D._matches({}) - assert D._matches({'_index': 'whatever'}) - -def test_matches_accepts_wildcards(): - class MyDoc(document.Document): - class Index: - name = 'my-*' - - assert MyDoc._matches({ - '_index': 'my-index' - }) - assert not MyDoc._matches({ - '_index': 'not-my-index' - }) - -def test_assigning_attrlist_to_field(): - sc = SimpleCommit() - l = ['README', 'README.rst'] - sc.files = utils.AttrList(l) - - assert sc.to_dict()['files'] is l - -def test_optional_inner_objects_are_not_validated_if_missing(): - d = OptionalObjectWithRequiredField() - - assert d.full_clean() is None - -def test_custom_field(): - s = SecretDoc(title=Secret('Hello')) - - assert {'title': 'Uryyb'} == s.to_dict() - assert s.title == 'Hello' - - s = SecretDoc.from_es({'_source': {'title': 'Uryyb'}}) - assert s.title == 'Hello' - assert isinstance(s.title, Secret) - -def test_custom_field_mapping(): - assert { - 'properties': { - 'title': {'index': 'no', 'type': 'text'} - } - } == SecretDoc._doc_type.mapping.to_dict() - -def test_custom_field_in_nested(): - s = NestedSecret() - s.secrets.append(SecretDoc(title=Secret('Hello'))) - - assert {'secrets': [{'title': 'Uryyb'}]} == s.to_dict() - assert s.secrets[0].title == 'Hello' - -def test_multi_works_after_doc_has_been_saved(): - c = SimpleCommit() - c.full_clean() - c.files.append('setup.py') - - assert c.to_dict() == {'files': ['setup.py']} - -def test_multi_works_in_nested_after_doc_has_been_serialized(): - # Issue #359 - c = DocWithNested(comments=[Comment(title='First!')]) - - assert [] == c.comments[0].tags - assert {'comments': [{'title': 'First!'}]} == c.to_dict() - assert [] == c.comments[0].tags - -def test_null_value_for_object(): - d = MyDoc(inner=None) - - assert d.inner is None - -def test_inherited_doc_types_can_override_index(): - class MyDocDifferentIndex(MySubDoc): - class Index: - name = 'not-default-index' - settings = { - 'number_of_replicas': 0 - } - aliases = {'a': {}} - analyzers = [analyzer('my_analizer', tokenizer='keyword')] - - assert MyDocDifferentIndex._index._name == 'not-default-index' - assert MyDocDifferentIndex()._get_index() == 'not-default-index' - assert MyDocDifferentIndex._index.to_dict() == { - 'aliases': {'a': {}}, - 'mappings': { - 'properties': { - 'created_at': {'type': 'date'}, - 'inner': { - 'type': 'object', - 'properties': { - 'old_field': {'type': 'text'} - }, - }, - 'name': {'type': 'keyword'}, - 'title': {'type': 'keyword'} - } - }, - 'settings': { - 'analysis': { - 'analyzer': { - 'my_analizer': {'tokenizer': 'keyword', 'type': 'custom'} - } - }, - 'number_of_replicas': 0 - } - } - - - -def test_to_dict_with_meta(): - d = MySubDoc(title='hello') - d.meta.routing = 'some-parent' - - assert { - '_index': 'default-index', - '_routing': 'some-parent', - '_source': {'title': 'hello'}, - } == d.to_dict(True) - -def test_to_dict_with_meta_includes_custom_index(): - d = MySubDoc(title='hello') - d.meta.index = 'other-index' - - assert { - '_index': 'other-index', - '_source': {'title': 'hello'}, - } == d.to_dict(True) - -def test_to_dict_without_skip_empty_will_include_empty_fields(): - d = MySubDoc(tags=[], title=None, inner={}) - - assert {} == d.to_dict() - assert { - "tags": [], - "title": None, - "inner": {} - } == d.to_dict(skip_empty=False) - -def test_attribute_can_be_removed(): - d = MyDoc(title='hello') - - del d.title - assert 'title' not in d._d_ - -def test_doc_type_can_be_correctly_pickled(): - d = DocWithNested(title='Hello World!', comments=[Comment(title='hellp')], meta={'id': 42}) - s = pickle.dumps(d) - - d2 = pickle.loads(s) - - assert d2 == d - assert 42 == d2.meta.id - assert 'Hello World!' == d2.title - assert [{'title': 'hellp'}] == d2.comments - assert isinstance(d2.comments[0], Comment) - -def test_meta_is_accessible_even_on_empty_doc(): - d = MyDoc() - d.meta - - d = MyDoc(title='aaa') - d.meta - -def test_meta_field_mapping(): - class User(document.Document): - username = field.Text() - class Meta: - all = document.MetaField(enabled=False) - _index = document.MetaField(enabled=True) - dynamic = document.MetaField('strict') - dynamic_templates = document.MetaField([42]) - - assert { - 'properties': { - 'username': {'type': 'text'} - }, - '_all': {'enabled': False}, - '_index': {'enabled': True}, - 'dynamic': 'strict', - 'dynamic_templates': [42] - } == User._doc_type.mapping.to_dict() - -def test_multi_value_fields(): - class Blog(document.Document): - tags = field.Keyword(multi=True) - - b = Blog() - assert [] == b.tags - b.tags.append('search') - b.tags.append('python') - assert ['search', 'python'] == b.tags - -def test_docs_with_properties(): - class User(document.Document): - pwd_hash = field.Text() - - def check_password(self, pwd): - return md5(pwd).hexdigest() == self.pwd_hash - - @property - def password(self): - raise AttributeError('readonly') - - @password.setter - def password(self, pwd): - self.pwd_hash = md5(pwd).hexdigest() - - u = User(pwd_hash=md5(b'secret').hexdigest()) - assert u.check_password(b'secret') - assert not u.check_password(b'not-secret') - - u.password = b'not-secret' - assert 'password' not in u._d_ - assert not u.check_password(b'secret') - assert u.check_password(b'not-secret') - - with raises(AttributeError): - u.password - -def test_nested_can_be_assigned_to(): - d1 = DocWithNested(comments=[Comment(title='First!')]) - d2 = DocWithNested() - - d2.comments = d1.comments - assert isinstance(d1.comments[0], Comment) - assert d2.comments == [{'title': 'First!'}] - assert {'comments': [{'title': 'First!'}]} == d2.to_dict() - assert isinstance(d2.comments[0], Comment) - -def test_nested_can_be_none(): - d = DocWithNested(comments=None, title='Hello World!') - - assert {"title": 'Hello World!'} == d.to_dict() - -def test_nested_defaults_to_list_and_can_be_updated(): - md = DocWithNested() - - assert [] == md.comments - - md.comments.append({'title': 'hello World!'}) - assert {'comments': [{'title': 'hello World!'}]} == md.to_dict() - -def test_to_dict_is_recursive_and_can_cope_with_multi_values(): - md = MyDoc(name=['a', 'b', 'c']) - md.inner = [MyInner(old_field='of1'), MyInner(old_field='of2')] - - assert isinstance(md.inner[0], MyInner) - - assert { - 'name': ['a', 'b', 'c'], - 'inner': [{'old_field': 'of1'}, {'old_field': 'of2'}], - } == md.to_dict() - -def test_to_dict_ignores_empty_collections(): - md = MySubDoc(name='', address={}, count=0, valid=False, tags=[]) - - assert {'name': '', 'count': 0, 'valid': False} == md.to_dict() - - -def test_declarative_mapping_definition(): - assert issubclass(MyDoc, document.Document) - assert hasattr(MyDoc, '_doc_type') - assert { - 'properties': { - 'created_at': {'type': 'date'}, - 'name': {'type': 'text'}, - 'title': {'type': 'keyword'}, - 'inner': { - 'type': 'object', - 'properties': {'old_field': {'type': 'text'}} - } - } - } == MyDoc._doc_type.mapping.to_dict() - -def test_you_can_supply_own_mapping_instance(): - class MyD(document.Document): - title = field.Text() - - class Meta: - mapping = Mapping() - mapping.meta('_all', enabled=False) - - assert { - '_all': {'enabled': False}, - 'properties': {'title': {'type': 'text'}} - } == MyD._doc_type.mapping.to_dict() - -def test_document_can_be_created_dynamically(): - n = datetime.now() - md = MyDoc(title='hello') - md.name = 'My Fancy Document!' - md.created_at = n - - inner = md.inner - # consistent returns - assert inner is md.inner - inner.old_field = 'Already defined.' - - md.inner.new_field = ['undefined', 'field'] - - assert { - 'title': 'hello', - 'name': 'My Fancy Document!', - 'created_at': n, - 'inner': { - 'old_field': 'Already defined.', - 'new_field': ['undefined', 'field'] - } - } == md.to_dict() - -def test_invalid_date_will_raise_exception(): - md = MyDoc() - md.created_at = 'not-a-date' - with raises(ValidationException): - md.full_clean() - -def test_document_inheritance(): - assert issubclass(MySubDoc, MyDoc) - assert issubclass(MySubDoc, document.Document) - assert hasattr(MySubDoc, '_doc_type') - assert { - 'properties': { - 'created_at': {'type': 'date'}, - 'name': {'type': 'keyword'}, - 'title': {'type': 'keyword'}, - 'inner': { - 'type': 'object', - 'properties': {'old_field': {'type': 'text'}} - } - } - } == MySubDoc._doc_type.mapping.to_dict() - -def test_child_class_can_override_parent(): - class A(document.Document): - o = field.Object(dynamic=False, properties={'a': field.Text()}) - class B(A): - o = field.Object(dynamic='strict', properties={'b': field.Text()}) - - assert { - 'properties': { - 'o': { - 'dynamic': 'strict', - 'properties': { - 'a': {'type': 'text'}, - 'b': {'type': 'text'} - }, - 'type': 'object' - } - } - } == B._doc_type.mapping.to_dict() - -def test_meta_fields_are_stored_in_meta_and_ignored_by_to_dict(): - md = MySubDoc(meta={'id': 42}, name='My First doc!') - - md.meta.index = 'my-index' - assert md.meta.index == 'my-index' - assert md.meta.id == 42 - assert {'name': 'My First doc!'} == md.to_dict() - assert {'id': 42, 'index': 'my-index'} == md.meta.to_dict() - -def test_index_inheritance(): - assert issubclass(MyMultiSubDoc, MySubDoc) - assert issubclass(MyMultiSubDoc, MyDoc2) - assert issubclass(MyMultiSubDoc, document.Document) - assert hasattr(MyMultiSubDoc, '_doc_type') - assert hasattr(MyMultiSubDoc, '_index') - assert { - 'properties': { - 'created_at': {'type': 'date'}, - 'name': {'type': 'keyword'}, - 'title': {'type': 'keyword'}, - 'inner': { - 'type': 'object', - 'properties': {'old_field': {'type': 'text'}} - }, - 'extra': {'type': 'long'} - } - } == MyMultiSubDoc._doc_type.mapping.to_dict() - -def test_meta_fields_can_be_set_directly_in_init(): - p = object() - md = MyDoc(_id=p, title='Hello World!') - - assert md.meta.id is p - -def test_save_no_index(mock_client): - md = MyDoc() - with raises(ValidationException): - md.save(using='mock') - -def test_delete_no_index(mock_client): - md = MyDoc() - with raises(ValidationException): - md.delete(using='mock') - -def test_update_no_fields(): - md = MyDoc() - with raises(IllegalOperation): - md.update() - -def test_search_with_custom_alias_and_index(mock_client): - search_object = MyDoc.search( - using="staging", - index=["custom_index1", "custom_index2"]) - - assert search_object._using == "staging" - assert search_object._index == ["custom_index1", "custom_index2"] - -def test_from_es_respects_underscored_non_meta_fields(): - doc = { - "_index": "test-index", - "_id": "elasticsearch", - "_score": 12.0, - - "fields": { - "hello": "world", - "_routing": "es", - "_tags": ["search"] - - }, - - "_source": { - "city": "Amsterdam", - "name": "Elasticsearch", - "_tagline": "You know, for search" - } - } - - class Company(document.Document): - class Index: - name = 'test-company' - - c = Company.from_es(doc) - - assert c.meta.fields._tags == ['search'] - assert c.meta.fields._routing == 'es' - assert c._tagline == 'You know, for search' diff --git a/test_elasticsearch_dsl/test_faceted_search.py b/test_elasticsearch_dsl/test_faceted_search.py deleted file mode 100644 index 717ac0fbc..000000000 --- a/test_elasticsearch_dsl/test_faceted_search.py +++ /dev/null @@ -1,136 +0,0 @@ -from datetime import datetime - -from elasticsearch_dsl.faceted_search import (FacetedSearch, TermsFacet, - DateHistogramFacet) - - -class BlogSearch(FacetedSearch): - doc_types = ['user', 'post'] - fields = ('title^5', 'body', ) - - facets = { - 'category': TermsFacet(field='category.raw'), - 'tags': TermsFacet(field='tags'), - } - - -def test_query_is_created_properly(): - bs = BlogSearch('python search') - s = bs.build_search() - - assert s._doc_type == ['user', 'post'] - assert { - 'aggs': { - '_filter_tags': { - 'filter': { - 'match_all': {}, - }, - 'aggs': {'tags': {'terms': {'field': 'tags'}}}, - }, - '_filter_category': { - 'filter': { - 'match_all': {}, - }, - 'aggs': {'category': {'terms': {'field': 'category.raw'}}}, - }, - }, - 'query': { - 'multi_match': {'fields': ('title^5', 'body'), 'query': 'python search'} - }, - 'highlight': {'fields': {'body': {}, 'title': {}}} - } == s.to_dict() - -def test_query_is_created_properly_with_sort_tuple(): - bs = BlogSearch('python search', sort=('category', '-title')) - s = bs.build_search() - - assert s._doc_type == ['user', 'post'] - assert { - 'aggs': { - '_filter_tags': { - 'filter': { - 'match_all': {}, - }, - 'aggs': {'tags': {'terms': {'field': 'tags'}}}, - }, - '_filter_category': { - 'filter': { - 'match_all': {}, - }, - 'aggs': {'category': {'terms': {'field': 'category.raw'}}}, - }, - }, - 'query': { - 'multi_match': {'fields': ('title^5', 'body'), 'query': 'python search'} - }, - 'highlight': {'fields': {'body': {}, 'title': {}}}, - 'sort': ['category', {'title': {'order': 'desc'}}] - } == s.to_dict() - -def test_filter_is_applied_to_search_but_not_relevant_facet(): - bs = BlogSearch('python search', filters={'category': 'elastic'}) - s = bs.build_search() - - assert { - 'aggs': { - '_filter_tags': { - 'filter': {'terms': {'category.raw': ['elastic']}}, - 'aggs': {'tags': {'terms': {'field': 'tags'}}}, - }, - '_filter_category': { - 'filter': { - 'match_all': {}, - }, - 'aggs': {'category': {'terms': {'field': 'category.raw'}}}, - } - }, - 'post_filter': {'terms': {'category.raw': ['elastic']}}, - 'query': { - 'multi_match': {'fields': ('title^5', 'body'), 'query': 'python search'} - }, - 'highlight': {'fields': {'body': {}, 'title': {}}} - } == s.to_dict() - -def test_filters_are_applied_to_search_ant_relevant_facets(): - bs = BlogSearch('python search', filters={'category': 'elastic', 'tags': ['python', 'django']}) - s = bs.build_search() - - d = s.to_dict() - - - # we need to test post_filter without relying on order - f = d['post_filter']['bool'].pop('must') - assert len(f) == 2 - assert {'terms': {'category.raw': ['elastic']}} in f - assert {'terms': {'tags': ['python', 'django']}} in f - - assert { - 'aggs': { - '_filter_tags': { - 'filter': { - 'terms': {'category.raw': ['elastic']}, - }, - 'aggs': {'tags': {'terms': {'field': 'tags'}}}, - }, - '_filter_category': { - 'filter': { - 'terms': {'tags': ['python', 'django']}, - }, - 'aggs': {'category': {'terms': {'field': 'category.raw'}}}, - } - }, - 'query': { - 'multi_match': {'fields': ('title^5', 'body'), 'query': 'python search'} - }, - 'post_filter': { - 'bool': { - } - }, - 'highlight': {'fields': {'body': {}, 'title': {}}} - } == d - - -def test_date_histogram_facet_with_1970_01_01_date(): - dhf = DateHistogramFacet() - assert dhf.get_value({'key': None}) == datetime(1970, 1, 1, 0, 0) - assert dhf.get_value({'key': 0}) == datetime(1970, 1, 1, 0, 0) diff --git a/test_elasticsearch_dsl/test_field.py b/test_elasticsearch_dsl/test_field.py deleted file mode 100644 index f9d4e197b..000000000 --- a/test_elasticsearch_dsl/test_field.py +++ /dev/null @@ -1,178 +0,0 @@ -import base64 -import ipaddress -from datetime import datetime -from dateutil import tz - -import pytest - -from elasticsearch_dsl import field, InnerDoc, ValidationException, Range - -def test_date_range_deserialization(): - data = { - 'lt': '2018-01-01T00:30:10' - } - - r = field.DateRange().deserialize(data) - - assert isinstance(r, Range) - assert r.lt == datetime(2018, 1, 1, 0, 30, 10) - -def test_boolean_deserialization(): - bf = field.Boolean() - - assert not bf.deserialize("false") - assert not bf.deserialize(False) - assert not bf.deserialize("") - assert not bf.deserialize(0) - - assert bf.deserialize(True) - assert bf.deserialize("true") - assert bf.deserialize(1) - -def test_date_field_can_have_default_tz(): - f = field.Date(default_timezone='UTC') - now = datetime.now() - - now_with_tz = f._deserialize(now) - - assert now_with_tz.tzinfo == tz.gettz('UTC') - assert now.isoformat() + '+00:00' == now_with_tz.isoformat() - - now_with_tz = f._deserialize(now.isoformat()) - - assert now_with_tz.tzinfo == tz.gettz('UTC') - assert now.isoformat() + '+00:00' == now_with_tz.isoformat() - -def test_custom_field_car_wrap_other_field(): - class MyField(field.CustomField): - @property - def builtin_type(self): - return field.Text(**self._params) - - assert {'type': 'text', 'index': 'not_analyzed'} == MyField(index='not_analyzed').to_dict() - -def test_field_from_dict(): - f = field.construct_field({'type': 'text', 'index': 'not_analyzed'}) - - assert isinstance(f, field.Text) - assert {'type': 'text', 'index': 'not_analyzed'} == f.to_dict() - - -def test_multi_fields_are_accepted_and_parsed(): - f = field.construct_field( - 'text', - fields={ - 'raw': {'type': 'keyword'}, - 'eng': field.Text(analyzer='english'), - } - ) - - assert isinstance(f, field.Text) - assert { - 'type': 'text', - 'fields': { - 'raw': {'type': 'keyword'}, - 'eng': {'type': 'text', 'analyzer': 'english'}, - } - } == f.to_dict() - -def test_nested_provides_direct_access_to_its_fields(): - f = field.Nested(properties={'name': {'type': 'text', 'index': 'not_analyzed'}}) - - assert 'name' in f - assert f['name'] == field.Text(index='not_analyzed') - - -def test_field_supports_multiple_analyzers(): - f = field.Text(analyzer='snowball', search_analyzer='keyword') - assert {'analyzer': 'snowball', 'search_analyzer': 'keyword', 'type': 'text'} == f.to_dict() - - -def test_multifield_supports_multiple_analyzers(): - f = field.Text(fields={ - 'f1': field.Text(search_analyzer='keyword', analyzer='snowball'), - 'f2': field.Text(analyzer='keyword') - }) - assert { - 'fields': { - 'f1': {'analyzer': 'snowball', - 'search_analyzer': 'keyword', - 'type': 'text' - }, - 'f2': { - 'analyzer': 'keyword', 'type': 'text'} - }, - 'type': 'text' - } == f.to_dict() - - -def test_scaled_float(): - with pytest.raises(TypeError): - field.ScaledFloat() - f = field.ScaledFloat(123) - assert f.to_dict() == {'scaling_factor': 123, 'type': 'scaled_float'} - - -def test_ipaddress(): - f = field.Ip() - assert f.deserialize('127.0.0.1') == ipaddress.ip_address(u'127.0.0.1') - assert f.deserialize(u'::1') == ipaddress.ip_address(u'::1') - assert f.serialize(f.deserialize('::1')) == '::1' - assert f.deserialize(None) is None - with pytest.raises(ValueError): - assert f.deserialize('not_an_ipaddress') - - -def test_float(): - f = field.Float() - assert f.deserialize('42') == 42.0 - assert f.deserialize(None) is None - with pytest.raises(ValueError): - assert f.deserialize('not_a_float') - - -def test_integer(): - f = field.Integer() - assert f.deserialize('42') == 42 - assert f.deserialize(None) is None - with pytest.raises(ValueError): - assert f.deserialize('not_an_integer') - - -def test_binary(): - f = field.Binary() - assert f.deserialize(base64.b64encode(b'42')) == b'42' - assert f.deserialize(f.serialize(b'42')) == b'42' - assert f.deserialize(None) is None - - -def test_object_dynamic_values(): - for dynamic in True, False, 'strict': - f = field.Object(dynamic=dynamic) - assert f.to_dict()['dynamic'] == dynamic - -def test_object_disabled(): - f = field.Object(enabled=False) - assert f.to_dict() == { - "type": "object", - "enabled": False - } - - -def test_object_constructor(): - expected = {'type': 'object', 'properties': {'inner_int': {'type': 'integer'}}} - - class Inner(InnerDoc): - inner_int = field.Integer() - - obj_from_doc = field.Object(doc_class=Inner) - assert obj_from_doc.to_dict() == expected - - obj_from_props = field.Object(properties={'inner_int': field.Integer()}) - assert obj_from_props.to_dict() == expected - - with pytest.raises(ValidationException): - field.Object(doc_class=Inner, properties={'inner_int': field.Integer()}) - - with pytest.raises(ValidationException): - field.Object(doc_class=Inner, dynamic=False) diff --git a/test_elasticsearch_dsl/test_index.py b/test_elasticsearch_dsl/test_index.py deleted file mode 100644 index 00ad27082..000000000 --- a/test_elasticsearch_dsl/test_index.py +++ /dev/null @@ -1,166 +0,0 @@ -import string -from random import choice - -from pytest import raises - -from elasticsearch_dsl import Date, Document, Index, IndexTemplate, Text, analyzer - - -class Post(Document): - title = Text() - published_from = Date() - - -def test_multiple_doc_types_will_combine_mappings(): - class User(Document): - username = Text() - - i = Index('i') - i.document(Post) - i.document(User) - assert { - 'mappings': { - 'properties': { - 'title': {'type': 'text'}, - 'username': {'type': 'text'}, - 'published_from': {'type': 'date'} - } - } - } == i.to_dict() - - -def test_search_is_limited_to_index_name(): - i = Index('my-index') - s = i.search() - - assert s._index == ['my-index'] - - -def test_cloned_index_has_copied_settings_and_using(): - client = object() - i = Index('my-index', using=client) - i.settings(number_of_shards=1) - - i2 = i.clone('my-other-index') - - assert 'my-other-index' == i2._name - assert client is i2._using - assert i._settings == i2._settings - assert i._settings is not i2._settings - - -def test_cloned_index_has_analysis_attribute(): - """ - Regression test for Issue #582 in which `Index.clone()` was not copying - over the `_analysis` attribute. - """ - client = object() - i = Index('my-index', using=client) - - random_analyzer_name = ''.join((choice(string.ascii_letters) for _ in range(100))) - random_analyzer = analyzer(random_analyzer_name, tokenizer="standard", filter="standard") - - i.analyzer(random_analyzer) - - i2 = i.clone('my-clone-index') - - assert i.to_dict()['settings']['analysis'] == i2.to_dict()['settings']['analysis'] - - -def test_settings_are_saved(): - i = Index('i') - i.settings(number_of_replicas=0) - i.settings(number_of_shards=1) - - assert { - 'settings': { - 'number_of_shards': 1, - 'number_of_replicas': 0, - } - } == i.to_dict() - - -def test_registered_doc_type_included_in_to_dict(): - i = Index('i', using='alias') - i.document(Post) - - assert { - 'mappings': { - 'properties': { - 'title': {'type': 'text'}, - 'published_from': {'type': 'date'}, - } - } - } == i.to_dict() - - -def test_registered_doc_type_included_in_search(): - i = Index('i', using='alias') - i.document(Post) - - s = i.search() - - assert s._doc_type == [Post] - - -def test_aliases_add_to_object(): - random_alias = ''.join((choice(string.ascii_letters) for _ in range(100))) - alias_dict = {random_alias: {}} - - index = Index('i', using='alias') - index.aliases(**alias_dict) - - assert index._aliases == alias_dict - - -def test_aliases_returned_from_to_dict(): - random_alias = ''.join((choice(string.ascii_letters) for _ in range(100))) - alias_dict = {random_alias: {}} - - index = Index('i', using='alias') - index.aliases(**alias_dict) - - assert index._aliases == index.to_dict()['aliases'] == alias_dict - - -def test_analyzers_added_to_object(): - random_analyzer_name = ''.join((choice(string.ascii_letters) for _ in range(100))) - random_analyzer = analyzer(random_analyzer_name, tokenizer="standard", filter="standard") - - index = Index('i', using='alias') - index.analyzer(random_analyzer) - - assert index._analysis["analyzer"][random_analyzer_name] == {"filter": ["standard"], "type": "custom", "tokenizer": "standard"} - - -def test_analyzers_returned_from_to_dict(): - random_analyzer_name = ''.join((choice(string.ascii_letters) for _ in range(100))) - random_analyzer = analyzer(random_analyzer_name, tokenizer="standard", filter="standard") - index = Index('i', using='alias') - index.analyzer(random_analyzer) - - assert index.to_dict()["settings"]["analysis"]["analyzer"][random_analyzer_name] == {"filter": ["standard"], "type": "custom", "tokenizer": "standard"} - - -def test_conflicting_analyzer_raises_error(): - i = Index('i') - i.analyzer('my_analyzer', tokenizer='whitespace', filter=['lowercase', 'stop']) - - with raises(ValueError): - i.analyzer('my_analyzer', tokenizer='keyword', filter=['lowercase', 'stop']) - - -def test_index_template_can_have_order(): - i = Index('i-*') - it = i.as_template('i', order=2) - - assert { - "index_patterns": ["i-*"], - "order": 2 - } == it.to_dict() - - -def test_index_template_save_result(mock_client): - it = IndexTemplate('test-template', 'test-*') - - assert it.save(using='mock') == mock_client.indices.put_template() diff --git a/test_elasticsearch_dsl/test_integration/__init__.py b/test_elasticsearch_dsl/test_integration/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/test_elasticsearch_dsl/test_integration/test_analysis.py b/test_elasticsearch_dsl/test_integration/test_analysis.py deleted file mode 100644 index 19d47d070..000000000 --- a/test_elasticsearch_dsl/test_integration/test_analysis.py +++ /dev/null @@ -1,24 +0,0 @@ -from elasticsearch_dsl import analyzer, tokenizer, token_filter - -def test_simulate_with_just__builtin_tokenizer(client): - a = analyzer('my-analyzer', tokenizer='keyword') - tokens = a.simulate('Hello World!', using=client).tokens - - assert len(tokens) == 1 - assert tokens[0].token == 'Hello World!' - -def test_simulate_complex(client): - a = analyzer('my-analyzer', - tokenizer=tokenizer('split_words', 'simple_pattern_split', pattern=':'), - filter=['lowercase', token_filter('no-ifs', 'stop', stopwords=['if'])]) - - tokens = a.simulate('if:this:works', using=client).tokens - - assert len(tokens) == 2 - assert ['this', 'works'] == [t.token for t in tokens] - -def test_simulate_builtin(client): - a = analyzer('my-analyzer', 'english') - tokens = a.simulate('fixes running').tokens - - assert ['fix', 'run'] == [t.token for t in tokens] diff --git a/test_elasticsearch_dsl/test_integration/test_count.py b/test_elasticsearch_dsl/test_integration/test_count.py deleted file mode 100644 index 2326b174a..000000000 --- a/test_elasticsearch_dsl/test_integration/test_count.py +++ /dev/null @@ -1,25 +0,0 @@ -from elasticsearch_dsl.search import Search, Q - - -def test_count_all(data_client): - s = Search(using=data_client).index('git') - assert 53 == s.count() - - -def test_count_prefetch(data_client, mocker): - mocker.spy(data_client, 'count') - - search = Search(using=data_client).index('git') - search.execute() - assert search.count() == 53 - assert data_client.count.call_count == 0 - - search._response.hits.total.relation = 'gte' - assert search.count() == 53 - assert data_client.count.call_count == 1 - - -def test_count_filter(data_client): - s = Search(using=data_client).index('git').filter(~Q('exists', field='parent_shas')) - # initial commit + repo document - assert 2 == s.count() diff --git a/test_elasticsearch_dsl/test_integration/test_data.py b/test_elasticsearch_dsl/test_integration/test_data.py deleted file mode 100644 index 288b80635..000000000 --- a/test_elasticsearch_dsl/test_integration/test_data.py +++ /dev/null @@ -1,189 +0,0 @@ -from __future__ import unicode_literals - -def create_flat_git_index(client, index): - # we will use user on several places - user_mapping = { - 'properties': { - 'name': { - 'type': 'text', - 'fields': { - 'raw': {'type' : 'keyword'}, - } - } - } - } - - client.indices.create( - index=index, - body={ - 'settings': { - # just one shard, no replicas for testing - 'number_of_shards': 1, - 'number_of_replicas': 0, - - # custom analyzer for analyzing file paths - 'analysis': { - 'analyzer': { - 'file_path': { - 'type': 'custom', - 'tokenizer': 'path_hierarchy', - 'filter': ['lowercase'] - } - } - } - }, - 'mappings': { - 'properties': { - 'description': {'type': 'text', 'analyzer': 'snowball'}, - 'author': user_mapping, - 'authored_date': {'type': 'date'}, - 'committer': user_mapping, - 'committed_date': {'type': 'date'}, - 'parent_shas': {'type': 'keyword'}, - 'files': {'type': 'text', 'analyzer': 'file_path', 'fielddata': True}, - } - } - } - ) - -def create_git_index(client, index): - # we will use user on several places - user_mapping = { - 'properties': { - 'name': { - 'type': 'text', - 'fields': { - 'raw': {'type' : 'keyword'}, - } - } - } - } - - client.indices.create( - index=index, - body={ - 'settings': { - # just one shard, no replicas for testing - 'number_of_shards': 1, - 'number_of_replicas': 0, - - # custom analyzer for analyzing file paths - 'analysis': { - 'analyzer': { - 'file_path': { - 'type': 'custom', - 'tokenizer': 'path_hierarchy', - 'filter': ['lowercase'] - } - } - } - }, - 'mappings': { - 'properties': { - # common fields - 'description': {'type': 'text', 'analyzer': 'snowball'}, - 'commit_repo': {'type': 'join', 'relations': {'repo': 'commit'}}, - - # COMMIT mappings - 'author': user_mapping, - 'authored_date': {'type': 'date'}, - 'committer': user_mapping, - 'committed_date': {'type': 'date'}, - 'parent_shas': {'type': 'keyword'}, - 'files': {'type': 'text', 'analyzer': 'file_path', 'fielddata': True}, - - # REPO mappings - 'is_public': {'type': 'boolean'}, - 'owner': user_mapping, - 'created_at': {'type': 'date'}, - 'tags': {'type': 'keyword'} - } - } - } - ) - - -DATA = [ - # repository - {'_id': 'elasticsearch-dsl-py', '_source': {'commit_repo': 'repo', 'organization': 'elasticsearch', 'created_at': '2014-03-03', 'owner': {'name': 'elasticsearch'}, 'is_public': True}, '_index': 'git'}, - # documents - {'_id': '3ca6e1e73a071a705b4babd2f581c91a2a3e5037', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/aggs.py', 'elasticsearch_dsl/search.py', 'test_elasticsearch_dsl/test_aggs.py', 'test_elasticsearch_dsl/test_search.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 7, 'insertions': 23, 'lines': 30, 'files': 4}, 'description': "Make sure buckets aren't modified in-place", 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['eb3e543323f189fd7b698e66295427204fff5755'], 'committed_date': '2014-05-02T13:47:19', 'authored_date': '2014-05-02T13:47:19.123+02:00'}, '_index': 'git'}, - {'_id': 'eb3e543323f189fd7b698e66295427204fff5755', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/search.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 0, 'insertions': 18, 'lines': 18, 'files': 1}, 'description': 'Add communication with ES server', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['dd15b6ba17dd9ba16363a51f85b31f66f1fb1157'], 'committed_date': '2014-05-01T13:32:14', 'authored_date': '2014-05-01T13:32:14'}, '_index': 'git'}, - {'_id': 'dd15b6ba17dd9ba16363a51f85b31f66f1fb1157', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/utils.py', 'test_elasticsearch_dsl/test_result.py', 'elasticsearch_dsl/result.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 18, 'insertions': 44, 'lines': 62, 'files': 3}, 'description': 'Minor cleanup and adding helpers for interactive python', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['ed19caf25abd25300e707fadf3f81b05c5673446'], 'committed_date': '2014-05-01T13:30:44', 'authored_date': '2014-05-01T13:30:44'}, '_index': 'git'}, - {'_id': 'ed19caf25abd25300e707fadf3f81b05c5673446', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/aggs.py', 'elasticsearch_dsl/search.py', 'test_elasticsearch_dsl/test_search.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 0, 'insertions': 28, 'lines': 28, 'files': 3}, 'description': 'Make sure aggs do copy-on-write', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['583e52c71e9a72c1b291ec5843683d8fa8f1ce2d'], 'committed_date': '2014-04-27T16:28:09', 'authored_date': '2014-04-27T16:28:09'}, '_index': 'git'}, - {'_id': '583e52c71e9a72c1b291ec5843683d8fa8f1ce2d', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/aggs.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 1, 'insertions': 1, 'lines': 2, 'files': 1}, 'description': 'Use __setitem__ from DslBase in AggsBase', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['1dd19210b5be92b960f7db6f66ae526288edccc3'], 'committed_date': '2014-04-27T15:51:53', 'authored_date': '2014-04-27T15:51:53'}, '_index': 'git'}, - {'_id': '1dd19210b5be92b960f7db6f66ae526288edccc3', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/aggs.py', 'elasticsearch_dsl/query.py', 'test_elasticsearch_dsl/test_search.py', 'elasticsearch_dsl/search.py', 'elasticsearch_dsl/filter.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 21, 'insertions': 98, 'lines': 119, 'files': 5}, 'description': 'Have Search clone itself on any change besides aggs', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['b4c9e29376af2e42a4e6dc153f0f293b1a18bac3'], 'committed_date': '2014-04-26T14:49:43', 'authored_date': '2014-04-26T14:49:43'}, '_index': 'git'}, - {'_id': 'b4c9e29376af2e42a4e6dc153f0f293b1a18bac3', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['test_elasticsearch_dsl/test_result.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 0, 'insertions': 5, 'lines': 5, 'files': 1}, 'description': 'Add tests for [] on response', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['a64a54181b232bb5943bd16960be9416e402f5f5'], 'committed_date': '2014-04-26T13:56:52', 'authored_date': '2014-04-26T13:56:52'}, '_index': 'git'}, - {'_id': 'a64a54181b232bb5943bd16960be9416e402f5f5', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['test_elasticsearch_dsl/test_result.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 1, 'insertions': 7, 'lines': 8, 'files': 1}, 'description': 'Test access to missing fields raises appropriate exceptions', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['df3f778a3d37b170bde6979a4ef2d9e3e6400778'], 'committed_date': '2014-04-25T16:01:07', 'authored_date': '2014-04-25T16:01:07'}, '_index': 'git'}, - {'_id': 'df3f778a3d37b170bde6979a4ef2d9e3e6400778', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/utils.py', 'test_elasticsearch_dsl/test_result.py', 'elasticsearch_dsl/result.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 8, 'insertions': 31, 'lines': 39, 'files': 3}, 'description': 'Support attribute access even for inner/nested objects', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['7e599e116b5ff5d271ce3fe1ebc80e82ab3d5925'], 'committed_date': '2014-04-25T15:59:02', 'authored_date': '2014-04-25T15:59:02'}, '_index': 'git'}, - {'_id': '7e599e116b5ff5d271ce3fe1ebc80e82ab3d5925', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['test_elasticsearch_dsl/test_result.py', 'elasticsearch_dsl/result.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 0, 'insertions': 149, 'lines': 149, 'files': 2}, 'description': 'Added a prototype of a Respose and Result classes', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['e2882d28cb8077eaa3e5d8ae76543482d4d90f7e'], 'committed_date': '2014-04-25T15:12:15', 'authored_date': '2014-04-25T15:12:15'}, '_index': 'git'}, - {'_id': 'e2882d28cb8077eaa3e5d8ae76543482d4d90f7e', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['docs/index.rst'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 0, 'insertions': 6, 'lines': 6, 'files': 1}, 'description': 'add warning to the docs', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['51f94d83d1c47d3b81207736ca97a1ec6302678f'], 'committed_date': '2014-04-22T19:16:21', 'authored_date': '2014-04-22T19:16:21'}, '_index': 'git'}, - {'_id': '51f94d83d1c47d3b81207736ca97a1ec6302678f', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/utils.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 3, 'insertions': 29, 'lines': 32, 'files': 1}, 'description': 'Add some comments to the code', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['0950f6c600b49e2bf012d03b02250fb71c848555'], 'committed_date': '2014-04-22T19:12:06', 'authored_date': '2014-04-22T19:12:06'}, '_index': 'git'}, - {'_id': '0950f6c600b49e2bf012d03b02250fb71c848555', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['README.rst'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 0, 'insertions': 6, 'lines': 6, 'files': 1}, 'description': 'Added a WIP warning', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['54d058f5ac6be8225ef61d5529772aada42ec6c8'], 'committed_date': '2014-04-20T00:19:25', 'authored_date': '2014-04-20T00:19:25'}, '_index': 'git'}, - {'_id': '54d058f5ac6be8225ef61d5529772aada42ec6c8', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/__init__.py', 'elasticsearch_dsl/search.py', 'test_elasticsearch_dsl/test_search.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 36, 'insertions': 7, 'lines': 43, 'files': 3}, 'description': 'Remove the operator kwarg from .query', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['4cb07845e45787abc1f850c0b561e487e0034424'], 'committed_date': '2014-04-20T00:17:25', 'authored_date': '2014-04-20T00:17:25'}, '_index': 'git'}, - {'_id': '4cb07845e45787abc1f850c0b561e487e0034424', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/aggs.py', 'test_elasticsearch_dsl/test_search.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 35, 'insertions': 49, 'lines': 84, 'files': 2}, 'description': 'Complex example', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['578abe80f76aafd7e81fe46a44403e601733a938'], 'committed_date': '2014-03-24T20:48:45', 'authored_date': '2014-03-24T20:48:45'}, '_index': 'git'}, - {'_id': '578abe80f76aafd7e81fe46a44403e601733a938', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['test_elasticsearch_dsl/test_search.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 2, 'insertions': 0, 'lines': 2, 'files': 1}, 'description': 'removing extra whitespace', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['ecb84f03565940c7d294dbc80723420dcfbab340'], 'committed_date': '2014-03-24T20:42:23', 'authored_date': '2014-03-24T20:42:23'}, '_index': 'git'}, - {'_id': 'ecb84f03565940c7d294dbc80723420dcfbab340', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['test_elasticsearch_dsl/test_search.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 1, 'insertions': 3, 'lines': 4, 'files': 1}, 'description': 'Make sure attribute access works for .query on Search', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['9a247c876ab66e2bca56b25f392d054e613b1b2a'], 'committed_date': '2014-03-24T20:35:02', 'authored_date': '2014-03-24T20:34:46'}, '_index': 'git'}, - {'_id': '9a247c876ab66e2bca56b25f392d054e613b1b2a', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/search.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 0, 'insertions': 2, 'lines': 2, 'files': 1}, 'description': 'Make sure .index and .doc_type methods are chainable', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['cee5e46947d510a49edd3609ff91aab7b1f3ac89'], 'committed_date': '2014-03-24T20:27:46', 'authored_date': '2014-03-24T20:27:46'}, '_index': 'git'}, - {'_id': 'cee5e46947d510a49edd3609ff91aab7b1f3ac89', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/search.py', 'test_elasticsearch_dsl/test_search.py', 'elasticsearch_dsl/filter.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 13, 'insertions': 128, 'lines': 141, 'files': 3}, 'description': 'Added .filter and .post_filter to Search', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['1d6857182b09a556d58c6bc5bdcb243092812ba3'], 'committed_date': '2014-03-24T20:26:57', 'authored_date': '2014-03-24T20:26:57'}, '_index': 'git'}, - {'_id': '1d6857182b09a556d58c6bc5bdcb243092812ba3', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/utils.py', 'elasticsearch_dsl/query.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 24, 'insertions': 29, 'lines': 53, 'files': 2}, 'description': 'Extracted combination logic into DslBase', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['4ad92f15a1955846c01642318303a821e8435b75'], 'committed_date': '2014-03-24T20:03:51', 'authored_date': '2014-03-24T20:03:51'}, '_index': 'git'}, - {'_id': '4ad92f15a1955846c01642318303a821e8435b75', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/utils.py', 'elasticsearch_dsl/query.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 43, 'insertions': 45, 'lines': 88, 'files': 2}, 'description': 'Extracted bool-related logic to a mixin to be reused by filters', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['6eb39dc2825605543ac1ed0b45b9b6baeecc44c2'], 'committed_date': '2014-03-24T19:16:16', 'authored_date': '2014-03-24T19:16:16'}, '_index': 'git'}, - {'_id': '6eb39dc2825605543ac1ed0b45b9b6baeecc44c2', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/search.py', 'test_elasticsearch_dsl/test_search.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 1, 'insertions': 32, 'lines': 33, 'files': 2}, 'description': 'Enable otheroperators when querying on Search object', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['be094c7b307332cb6039bf9a7c984d2c7593ddff'], 'committed_date': '2014-03-24T18:25:10', 'authored_date': '2014-03-24T18:25:10'}, '_index': 'git'}, - {'_id': 'be094c7b307332cb6039bf9a7c984d2c7593ddff', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/utils.py', 'elasticsearch_dsl/query.py', 'test_elasticsearch_dsl/test_query.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 23, 'insertions': 35, 'lines': 58, 'files': 3}, 'description': 'make sure query operations always return copies', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['b2576e3b6437e2cb9d8971fee4ead60df91fd75b'], 'committed_date': '2014-03-24T18:10:37', 'authored_date': '2014-03-24T18:03:13'}, '_index': 'git'}, - {'_id': 'b2576e3b6437e2cb9d8971fee4ead60df91fd75b', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/query.py', 'test_elasticsearch_dsl/test_query.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 1, 'insertions': 53, 'lines': 54, 'files': 2}, 'description': 'Adding or operator for queries', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['1be002170ac3cd59d2e97824b83b88bb3c9c60ed'], 'committed_date': '2014-03-24T17:53:38', 'authored_date': '2014-03-24T17:53:38'}, '_index': 'git'}, - {'_id': '1be002170ac3cd59d2e97824b83b88bb3c9c60ed', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/query.py', 'test_elasticsearch_dsl/test_query.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 0, 'insertions': 35, 'lines': 35, 'files': 2}, 'description': 'Added inverting of queries', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['24e1e38b2f704f65440d96c290b7c6cd54c2e00e'], 'committed_date': '2014-03-23T17:44:36', 'authored_date': '2014-03-23T17:44:36'}, '_index': 'git'}, - {'_id': '24e1e38b2f704f65440d96c290b7c6cd54c2e00e', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/aggs.py', 'elasticsearch_dsl/utils.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 5, 'insertions': 1, 'lines': 6, 'files': 2}, 'description': 'Change equality checks to use .to_dict()', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['277cfaedbaf3705ed74ad6296227e1172c97a63f'], 'committed_date': '2014-03-23T17:43:01', 'authored_date': '2014-03-23T17:43:01'}, '_index': 'git'}, - {'_id': '277cfaedbaf3705ed74ad6296227e1172c97a63f', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/query.py', 'test_elasticsearch_dsl/test_query.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 1, 'insertions': 11, 'lines': 12, 'files': 2}, 'description': 'Test combining of bool queries', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['6aa3868a6a9f35f71553ce96f9d3d63c74d054fd'], 'committed_date': '2014-03-21T15:15:06', 'authored_date': '2014-03-21T15:15:06'}, '_index': 'git'}, - {'_id': '6aa3868a6a9f35f71553ce96f9d3d63c74d054fd', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/query.py', 'test_elasticsearch_dsl/test_query.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 1, 'insertions': 23, 'lines': 24, 'files': 2}, 'description': 'Adding & operator for queries', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['bb311eb35e7eb53fb5ae01e3f80336866c7e3e37'], 'committed_date': '2014-03-21T15:10:08', 'authored_date': '2014-03-21T15:10:08'}, '_index': 'git'}, - {'_id': 'bb311eb35e7eb53fb5ae01e3f80336866c7e3e37', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/utils.py', 'test_elasticsearch_dsl/test_query.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 1, 'insertions': 4, 'lines': 5, 'files': 2}, 'description': "Don't serialize empty typed fields into dict", 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['aea8ea9e421bd53a5b058495e68c3fd57bb1dacc'], 'committed_date': '2014-03-15T16:29:37', 'authored_date': '2014-03-15T16:29:37'}, '_index': 'git'}, - {'_id': 'aea8ea9e421bd53a5b058495e68c3fd57bb1dacc', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/utils.py', 'elasticsearch_dsl/query.py', 'test_elasticsearch_dsl/test_query.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 3, 'insertions': 37, 'lines': 40, 'files': 3}, 'description': 'Bool queries, when combining just adds their params together', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['a8819a510b919be43ff3011b904f257798fb8916'], 'committed_date': '2014-03-15T16:16:40', 'authored_date': '2014-03-15T16:16:40'}, '_index': 'git'}, - {'_id': 'a8819a510b919be43ff3011b904f257798fb8916', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['test_elasticsearch_dsl/run_tests.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 6, 'insertions': 2, 'lines': 8, 'files': 1}, 'description': 'Simpler run_tests.py', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['e35792a725be2325fc54d3fcb95a7d38d8075a99'], 'committed_date': '2014-03-15T16:02:21', 'authored_date': '2014-03-15T16:02:21'}, '_index': 'git'}, - {'_id': 'e35792a725be2325fc54d3fcb95a7d38d8075a99', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/aggs.py', 'elasticsearch_dsl/query.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 2, 'insertions': 2, 'lines': 4, 'files': 2}, 'description': "Maku we don't treat shortcuts as methods.", 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['3179d778dc9e3f2883d5f7ffa63b9ae0399c16bc'], 'committed_date': '2014-03-15T15:59:21', 'authored_date': '2014-03-15T15:59:21'}, '_index': 'git'}, - {'_id': '3179d778dc9e3f2883d5f7ffa63b9ae0399c16bc', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/aggs.py', 'elasticsearch_dsl/query.py', 'elasticsearch_dsl/utils.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 9, 'insertions': 5, 'lines': 14, 'files': 3}, 'description': 'Centralize == of Dsl objects', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['b5e7d0c4b284211df8f7b464fcece93a27a802fb'], 'committed_date': '2014-03-10T21:37:24', 'authored_date': '2014-03-10T21:37:24'}, '_index': 'git'}, - {'_id': 'b5e7d0c4b284211df8f7b464fcece93a27a802fb', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/aggs.py', 'elasticsearch_dsl/search.py', 'test_elasticsearch_dsl/test_search.py', 'elasticsearch_dsl/utils.py', 'elasticsearch_dsl/query.py', 'test_elasticsearch_dsl/test_aggs.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 75, 'insertions': 115, 'lines': 190, 'files': 6}, 'description': 'Experimental draft with more declarative DSL', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['0fe741b43adee5ca1424584ddd3f35fa33f8733c'], 'committed_date': '2014-03-10T21:34:39', 'authored_date': '2014-03-10T21:34:39'}, '_index': 'git'}, - {'_id': '0fe741b43adee5ca1424584ddd3f35fa33f8733c', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['test_elasticsearch_dsl/test_search.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 2, 'insertions': 2, 'lines': 4, 'files': 1}, 'description': 'Make sure .query is chainable', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['a22be5933d4b022cbacee867b1aece120208edf3'], 'committed_date': '2014-03-07T17:41:59', 'authored_date': '2014-03-07T17:41:59'}, '_index': 'git'}, - {'_id': 'a22be5933d4b022cbacee867b1aece120208edf3', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/aggs.py', 'elasticsearch_dsl/search.py', 'test_elasticsearch_dsl/test_search.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 14, 'insertions': 44, 'lines': 58, 'files': 3}, 'description': 'Search now does aggregations', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['e823686aacfc4bdcb34ffdab337a26fa09659a9a'], 'committed_date': '2014-03-07T17:29:55', 'authored_date': '2014-03-07T17:29:55'}, '_index': 'git'}, - {'_id': 'e823686aacfc4bdcb34ffdab337a26fa09659a9a', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['.gitignore'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 0, 'insertions': 1, 'lines': 1, 'files': 1}, 'description': 'Ignore html coverage report', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['e0aedb3011c71d704deec03a8f32b2b360d6e364'], 'committed_date': '2014-03-07T17:03:23', 'authored_date': '2014-03-07T17:03:23'}, '_index': 'git'}, - {'_id': 'e0aedb3011c71d704deec03a8f32b2b360d6e364', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/aggs.py', 'test_elasticsearch_dsl/test_aggs.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 0, 'insertions': 228, 'lines': 228, 'files': 2}, 'description': 'Added aggregation DSL objects', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['61cbc0aa62a0b776ae5e333406659dbb2f5cfbbd'], 'committed_date': '2014-03-07T16:25:55', 'authored_date': '2014-03-07T16:25:55'}, '_index': 'git'}, - {'_id': '61cbc0aa62a0b776ae5e333406659dbb2f5cfbbd', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/utils.py', 'elasticsearch_dsl/query.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 12, 'insertions': 7, 'lines': 19, 'files': 2}, 'description': 'Only retrieve DslClass, leave the instantiation to the caller', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['647f1017a7b17a913e07af70a3b03202f6adbdfd'], 'committed_date': '2014-03-07T15:27:43', 'authored_date': '2014-03-07T15:27:43'}, '_index': 'git'}, - {'_id': '647f1017a7b17a913e07af70a3b03202f6adbdfd', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['test_elasticsearch_dsl/test_search.py', 'elasticsearch_dsl/query.py', 'test_elasticsearch_dsl/test_query.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 19, 'insertions': 19, 'lines': 38, 'files': 3}, 'description': 'No need to replicate Query suffix when in query namespace', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['7c4f94ecdb38f0e91c7ee52f579c0ea148afcc7d'], 'committed_date': '2014-03-07T15:19:01', 'authored_date': '2014-03-07T15:19:01'}, '_index': 'git'}, - {'_id': '7c4f94ecdb38f0e91c7ee52f579c0ea148afcc7d', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/utils.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 2, 'insertions': 3, 'lines': 5, 'files': 1}, 'description': 'Ask forgiveness, not permission', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['c10793c2ca43688195e415b25b674ff34d58eaff'], 'committed_date': '2014-03-07T15:13:22', 'authored_date': '2014-03-07T15:13:22'}, '_index': 'git'}, - {'_id': 'c10793c2ca43688195e415b25b674ff34d58eaff', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/utils.py', 'elasticsearch_dsl/query.py', 'test_elasticsearch_dsl/test_query.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 24, 'insertions': 27, 'lines': 51, 'files': 3}, 'description': 'Extract DSL object registration to DslMeta', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['d8867fdb17fcf4c696657740fa08d29c36adc6ec'], 'committed_date': '2014-03-07T15:12:13', 'authored_date': '2014-03-07T15:10:31'}, '_index': 'git'}, - {'_id': 'd8867fdb17fcf4c696657740fa08d29c36adc6ec', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/search.py', 'test_elasticsearch_dsl/test_search.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 0, 'insertions': 13, 'lines': 13, 'files': 2}, 'description': 'Search.to_dict', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['2eb7cd980d917ed6f4a4dd8e246804f710ec5082'], 'committed_date': '2014-03-07T02:58:33', 'authored_date': '2014-03-07T02:58:33'}, '_index': 'git'}, - {'_id': '2eb7cd980d917ed6f4a4dd8e246804f710ec5082', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/search.py', 'test_elasticsearch_dsl/test_search.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 0, 'insertions': 113, 'lines': 113, 'files': 2}, 'description': 'Basic Search object', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['11708576f9118e0dbf27ae1f8a7b799cf281b511'], 'committed_date': '2014-03-06T21:02:03', 'authored_date': '2014-03-06T21:01:05'}, '_index': 'git'}, - {'_id': '11708576f9118e0dbf27ae1f8a7b799cf281b511', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/query.py', 'test_elasticsearch_dsl/test_query.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 0, 'insertions': 13, 'lines': 13, 'files': 2}, 'description': 'MatchAll query + anything is anything', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['1dc496e5c7c1b2caf290df477fca2db61ebe37e0'], 'committed_date': '2014-03-06T20:40:39', 'authored_date': '2014-03-06T20:39:52'}, '_index': 'git'}, - {'_id': '1dc496e5c7c1b2caf290df477fca2db61ebe37e0', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/query.py', 'test_elasticsearch_dsl/test_query.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 0, 'insertions': 53, 'lines': 53, 'files': 2}, 'description': "From_dict, Q(dict) and bool query parses it's subqueries", 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['d407f99d1959b7b862a541c066d9fd737ce913f3'], 'committed_date': '2014-03-06T20:24:30', 'authored_date': '2014-03-06T20:24:30'}, '_index': 'git'}, - {'_id': 'd407f99d1959b7b862a541c066d9fd737ce913f3', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['CONTRIBUTING.md', 'README.rst'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 6, 'insertions': 21, 'lines': 27, 'files': 2}, 'description': 'Housekeeping - licence and updated generic CONTRIBUTING.md', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['277e8ecc7395754d1ba1f2411ec32337a3e9d73f'], 'committed_date': '2014-03-05T16:21:44', 'authored_date': '2014-03-05T16:21:44'}, '_index': 'git'}, - {'_id': '277e8ecc7395754d1ba1f2411ec32337a3e9d73f', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/query.py', 'setup.py', 'test_elasticsearch_dsl/test_query.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 0, 'insertions': 59, 'lines': 59, 'files': 3}, 'description': 'Automatic query registration and Q function', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['8f1e34bd8f462fec50bcc10971df2d57e2986604'], 'committed_date': '2014-03-05T16:18:52', 'authored_date': '2014-03-05T16:18:52'}, '_index': 'git'}, - {'_id': '8f1e34bd8f462fec50bcc10971df2d57e2986604', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/query.py', 'test_elasticsearch_dsl/test_query.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 0, 'insertions': 54, 'lines': 54, 'files': 2}, 'description': 'Initial implementation of match and bool queries', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['fcff47ddcc6d08be5739d03dd30f504fb9db2608'], 'committed_date': '2014-03-05T15:55:06', 'authored_date': '2014-03-05T15:55:06'}, '_index': 'git'}, - {'_id': 'fcff47ddcc6d08be5739d03dd30f504fb9db2608', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['docs/Makefile', 'CONTRIBUTING.md', 'docs/conf.py', 'LICENSE', 'Changelog.rst', 'docs/index.rst', 'docs/Changelog.rst'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 0, 'insertions': 692, 'lines': 692, 'files': 7}, 'description': 'Docs template', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['febe8127ae48fcc81778c0fb2d628f1bcc0a0350'], 'committed_date': '2014-03-04T01:42:31', 'authored_date': '2014-03-04T01:42:31'}, '_index': 'git'}, - {'_id': 'febe8127ae48fcc81778c0fb2d628f1bcc0a0350', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['elasticsearch_dsl/__init__.py', 'test_elasticsearch_dsl/run_tests.py', 'setup.py', 'README.rst', 'test_elasticsearch_dsl/__init__.py'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 0, 'insertions': 82, 'lines': 82, 'files': 5}, 'description': 'Empty project structure', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': ['2a8f1ce89760bfc72808f3945b539eae650acac9'], 'committed_date': '2014-03-04T01:37:49', 'authored_date': '2014-03-03T18:23:55'}, '_index': 'git'}, - {'_id': '2a8f1ce89760bfc72808f3945b539eae650acac9', 'routing': 'elasticsearch-dsl-py', '_source': {'commit_repo': {'name': 'commit', 'parent': 'elasticsearch-dsl-py'}, 'files': ['.gitignore'], 'committer': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'stats': {'deletions': 0, 'insertions': 9, 'lines': 9, 'files': 1}, 'description': 'Initial commit, .gitignore', 'author': {'name': 'Honza Kr\xe1l', 'email': 'honza.kral@gmail.com'}, 'parent_shas': [], 'committed_date': '2014-03-03T18:15:05', 'authored_date': '2014-03-03T18:15:05'}, '_index': 'git'}, -] - -def flatten_doc(d): - src = d['_source'].copy() - del src['commit_repo'] - return { - '_index': 'flat-git', - '_id': d['_id'], - '_source': src - } - - -FLAT_DATA = [ - flatten_doc(d) for d in DATA if 'routing' in d -] - -def create_test_git_data(d): - src = d['_source'].copy() - return { - '_index': 'test-git', - 'routing': 'elasticsearch-dsl-py', - '_id': d['_id'], - '_source': src - } - -TEST_GIT_DATA = [ - create_test_git_data(d) for d in DATA -] diff --git a/test_elasticsearch_dsl/test_integration/test_document.py b/test_elasticsearch_dsl/test_integration/test_document.py deleted file mode 100644 index edfb09fdb..000000000 --- a/test_elasticsearch_dsl/test_integration/test_document.py +++ /dev/null @@ -1,379 +0,0 @@ -from datetime import datetime -from pytz import timezone -from ipaddress import ip_address - -from elasticsearch import ConflictError, NotFoundError - -from elasticsearch_dsl import Document, Date, Text, Keyword, Mapping, InnerDoc, \ - Object, Nested, MetaField, Q, Long, Boolean, Double, Binary, Ip, analyzer -from elasticsearch_dsl.utils import AttrList - -from pytest import raises, fixture - -snowball = analyzer('my_snow', - tokenizer='standard', - filter=['standard', 'lowercase', 'snowball']) - -class User(InnerDoc): - name = Text(fields={'raw': Keyword()}) - -class Wiki(Document): - owner = Object(User) - views = Long() - - class Index: - name = 'test-wiki' - -class Repository(Document): - owner = Object(User) - created_at = Date() - description = Text(analyzer=snowball) - tags = Keyword() - - @classmethod - def search(cls): - return super(Repository, cls).search().filter('term', commit_repo='repo') - - class Index: - name = 'git' - -class Commit(Document): - committed_date = Date() - authored_date = Date() - description = Text(analyzer=snowball) - - class Index: - name = 'flat-git' - - class Meta: - mapping = Mapping() - -class History(InnerDoc): - timestamp = Date() - diff = Text() - -class Comment(InnerDoc): - content = Text() - created_at = Date() - author = Object(User) - history = Nested(History) - class Meta: - dynamic = MetaField(False) - -class PullRequest(Document): - comments = Nested(Comment) - created_at = Date() - class Index: - name = 'test-prs' - -class SerializationDoc(Document): - i = Long() - b = Boolean() - d = Double() - bin = Binary() - ip = Ip() - - class Index: - name = 'test-serialization' - -def test_serialization(write_client): - SerializationDoc.init() - write_client.index(index='test-serialization', id=42, - body={ - 'i': [1, 2, "3", None], - 'b': [True, False, "true", "false", None], - 'd': [0.1, "-0.1", None], - "bin": ['SGVsbG8gV29ybGQ=', None], - 'ip': ['::1', '127.0.0.1', None] - }) - sd = SerializationDoc.get(id=42) - - assert sd.i == [1, 2, 3, None] - assert sd.b == [True, False, True, False, None] - assert sd.d == [0.1, -0.1, None] - assert sd.bin == [b'Hello World', None] - assert sd.ip == [ip_address(u'::1'), ip_address(u'127.0.0.1'), None] - - assert sd.to_dict() == { - 'b': [True, False, True, False, None], - 'bin': ['SGVsbG8gV29ybGQ=', None], - 'd': [0.1, -0.1, None], - 'i': [1, 2, 3, None], - 'ip': ['::1', '127.0.0.1', None] - } - - -def test_nested_inner_hits_are_wrapped_properly(pull_request): - history_query = Q('nested', path='comments.history', inner_hits={}, - query=Q('match', comments__history__diff='ahoj')) - s = PullRequest.search().query('nested', inner_hits={}, path='comments', - query=history_query) - - response = s.execute() - pr = response.hits[0] - assert isinstance(pr, PullRequest) - assert isinstance(pr.comments[0], Comment) - assert isinstance(pr.comments[0].history[0], History) - - comment = pr.meta.inner_hits.comments.hits[0] - assert isinstance(comment, Comment) - assert comment.author.name == 'honzakral' - assert isinstance(comment.history[0], History) - - history = comment.meta.inner_hits['comments.history'].hits[0] - assert isinstance(history, History) - assert history.timestamp == datetime(2012, 1, 1) - assert 'score' in history.meta - - -def test_nested_inner_hits_are_deserialized_properly(pull_request): - s = PullRequest.search().query('nested', inner_hits={}, path='comments', - query=Q('match', comments__content='hello')) - - response = s.execute() - pr = response.hits[0] - assert isinstance(pr.created_at, datetime) - assert isinstance(pr.comments[0], Comment) - assert isinstance(pr.comments[0].created_at, datetime) - - -def test_nested_top_hits_are_wrapped_properly(pull_request): - s = PullRequest.search() - s.aggs.bucket('comments', 'nested', path='comments').metric('hits', 'top_hits', size=1) - - r = s.execute() - - print(r._d_) - assert isinstance(r.aggregations.comments.hits.hits[0], Comment) - - -def test_update_object_field(write_client): - Wiki.init() - w = Wiki(owner=User(name='Honza Kral'), _id='elasticsearch-py') - w.save() - - assert 'updated' == w.update(owner=[{'name': 'Honza'}, {'name': 'Nick'}]) - assert w.owner[0].name == 'Honza' - assert w.owner[1].name == 'Nick' - - w = Wiki.get(id='elasticsearch-py') - assert w.owner[0].name == 'Honza' - assert w.owner[1].name == 'Nick' - -def test_update_script(write_client): - Wiki.init() - w = Wiki(owner=User(name='Honza Kral'), _id='elasticsearch-py', views=42) - w.save() - - w.update(script="ctx._source.views += params.inc", inc=5) - w = Wiki.get(id='elasticsearch-py') - assert w.views == 47 - -def test_init(write_client): - Repository.init(index='test-git') - - assert write_client.indices.exists(index='test-git') - -def test_get_raises_404_on_index_missing(data_client): - with raises(NotFoundError): - Repository.get('elasticsearch-dsl-php', index='not-there') - -def test_get_raises_404_on_non_existent_id(data_client): - with raises(NotFoundError): - Repository.get('elasticsearch-dsl-php') - -def test_get_returns_none_if_404_ignored(data_client): - assert None is Repository.get('elasticsearch-dsl-php', ignore=404) - -def test_get_returns_none_if_404_ignored_and_index_doesnt_exist(data_client): - assert None is Repository.get('42', index='not-there', ignore=404) - -def test_get(data_client): - elasticsearch_repo = Repository.get('elasticsearch-dsl-py') - - assert isinstance(elasticsearch_repo, Repository) - assert elasticsearch_repo.owner.name == 'elasticsearch' - assert datetime(2014, 3, 3) == elasticsearch_repo.created_at - -def test_get_with_tz_date(data_client): - first_commit = Commit.get(id='3ca6e1e73a071a705b4babd2f581c91a2a3e5037', routing='elasticsearch-dsl-py') - - tzinfo = timezone('Europe/Prague') - assert tzinfo.localize(datetime(2014, 5, 2, 13, 47, 19, 123000)) == first_commit.authored_date - -def test_save_with_tz_date(data_client): - tzinfo = timezone('Europe/Prague') - first_commit = Commit.get(id='3ca6e1e73a071a705b4babd2f581c91a2a3e5037', routing='elasticsearch-dsl-py') - first_commit.committed_date = tzinfo.localize(datetime(2014, 5, 2, 13, 47, 19, 123456)) - first_commit.save() - - first_commit = Commit.get(id='3ca6e1e73a071a705b4babd2f581c91a2a3e5037', routing='elasticsearch-dsl-py') - assert tzinfo.localize(datetime(2014, 5, 2, 13, 47, 19, 123456)) == first_commit.committed_date - -COMMIT_DOCS_WITH_MISSING = [ - {'_id': '0'}, # Missing - {'_id': '3ca6e1e73a071a705b4babd2f581c91a2a3e5037'}, # Existing - {'_id': 'f'}, # Missing - {'_id': 'eb3e543323f189fd7b698e66295427204fff5755'}, # Existing -] - -def test_mget(data_client): - commits = Commit.mget(COMMIT_DOCS_WITH_MISSING) - assert commits[0] is None - assert commits[1].meta.id == '3ca6e1e73a071a705b4babd2f581c91a2a3e5037' - assert commits[2] is None - assert commits[3].meta.id == 'eb3e543323f189fd7b698e66295427204fff5755' - -def test_mget_raises_exception_when_missing_param_is_invalid(data_client): - with raises(ValueError): - Commit.mget(COMMIT_DOCS_WITH_MISSING, missing='raj') - -def test_mget_raises_404_when_missing_param_is_raise(data_client): - with raises(NotFoundError): - Commit.mget(COMMIT_DOCS_WITH_MISSING, missing='raise') - -def test_mget_ignores_missing_docs_when_missing_param_is_skip(data_client): - commits = Commit.mget(COMMIT_DOCS_WITH_MISSING, missing='skip') - assert commits[0].meta.id == '3ca6e1e73a071a705b4babd2f581c91a2a3e5037' - assert commits[1].meta.id == 'eb3e543323f189fd7b698e66295427204fff5755' - -def test_update_works_from_search_response(data_client): - elasticsearch_repo = Repository.search().execute()[0] - - elasticsearch_repo.update(owner={'other_name': 'elastic'}) - assert 'elastic' == elasticsearch_repo.owner.other_name - - new_version = Repository.get('elasticsearch-dsl-py') - assert 'elastic' == new_version.owner.other_name - assert 'elasticsearch' == new_version.owner.name - -def test_update(data_client): - elasticsearch_repo = Repository.get('elasticsearch-dsl-py') - v = elasticsearch_repo.meta.version - - old_seq_no = elasticsearch_repo.meta.seq_no - elasticsearch_repo.update(owner={'new_name': 'elastic'}, new_field='testing-update') - - assert 'elastic' == elasticsearch_repo.owner.new_name - assert 'testing-update' == elasticsearch_repo.new_field - - # assert version has been updated - assert elasticsearch_repo.meta.version == v + 1 - - new_version = Repository.get('elasticsearch-dsl-py') - assert 'testing-update' == new_version.new_field - assert 'elastic' == new_version.owner.new_name - assert 'elasticsearch' == new_version.owner.name - assert 'seq_no' in new_version.meta - assert new_version.meta.seq_no != old_seq_no - assert 'primary_term' in new_version.meta - - -def test_save_updates_existing_doc(data_client): - elasticsearch_repo = Repository.get('elasticsearch-dsl-py') - - elasticsearch_repo.new_field = 'testing-save' - old_seq_no = elasticsearch_repo.meta.seq_no - assert 'updated' == elasticsearch_repo.save() - - new_repo = data_client.get(index='git', id='elasticsearch-dsl-py') - assert 'testing-save' == new_repo['_source']['new_field'] - assert new_repo['_seq_no'] != old_seq_no - assert new_repo['_seq_no'] == elasticsearch_repo.meta.seq_no - -def test_save_automatically_uses_seq_no_and_primary_term(data_client): - elasticsearch_repo = Repository.get('elasticsearch-dsl-py') - elasticsearch_repo.meta.seq_no += 1 - - with raises(ConflictError): - elasticsearch_repo.save() - -def test_delete_automatically_uses_seq_no_and_primary_term(data_client): - elasticsearch_repo = Repository.get('elasticsearch-dsl-py') - elasticsearch_repo.meta.seq_no += 1 - - with raises(ConflictError): - elasticsearch_repo.delete() - -def assert_doc_equals(expected, actual): - for f in expected: - assert f in actual - assert actual[f] == expected[f] - -def test_can_save_to_different_index(write_client): - test_repo = Repository(description='testing', meta={'id': 42}) - assert test_repo.save(index='test-document') - - assert_doc_equals({ - 'found': True, - '_index': 'test-document', - '_id': '42', - '_source': {'description': 'testing'}, - }, - write_client.get(index='test-document', id=42) - ) - -def test_save_without_skip_empty_will_include_empty_fields(write_client): - test_repo = Repository(field_1=[], field_2=None, field_3={}, meta={'id': 42}) - assert test_repo.save(index='test-document', skip_empty=False) - - assert_doc_equals({ - 'found': True, - '_index': 'test-document', - '_id': '42', - '_source': { - "field_1": [], - "field_2": None, - "field_3": {} - }, - }, - write_client.get(index='test-document', id=42) - ) - -def test_delete(write_client): - write_client.create( - index='test-document', - id='elasticsearch-dsl-py', - body={'organization': 'elasticsearch', 'created_at': '2014-03-03', 'owner': {'name': 'elasticsearch'}} - ) - - test_repo = Repository(meta={'id': 'elasticsearch-dsl-py'}) - test_repo.meta.index = 'test-document' - test_repo.delete() - - assert not write_client.exists( - index='test-document', - id='elasticsearch-dsl-py', - ) - -def test_search(data_client): - assert Repository.search().count() == 1 - -def test_search_returns_proper_doc_classes(data_client): - result = Repository.search().execute() - - elasticsearch_repo = result.hits[0] - - assert isinstance(elasticsearch_repo, Repository) - assert elasticsearch_repo.owner.name == 'elasticsearch' - -def test_refresh_mapping(data_client): - class Commit(Document): - class Index: - name = 'git' - - Commit._index.load_mappings() - - assert 'stats' in Commit._index._mapping - assert 'committer' in Commit._index._mapping - assert 'description' in Commit._index._mapping - assert 'committed_date' in Commit._index._mapping - assert isinstance(Commit._index._mapping['committed_date'], Date) - -def test_highlight_in_meta(data_client): - commit = Commit.search().query('match', description='inverting').highlight('description').execute()[0] - - assert isinstance(commit, Commit) - assert 'description' in commit.meta.highlight - assert isinstance(commit.meta.highlight['description'], AttrList) - assert len(commit.meta.highlight['description']) > 0 diff --git a/test_elasticsearch_dsl/test_integration/test_examples/__init__.py b/test_elasticsearch_dsl/test_integration/test_examples/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/test_elasticsearch_dsl/test_integration/test_examples/test_completion.py b/test_elasticsearch_dsl/test_integration/test_examples/test_completion.py deleted file mode 100644 index 2565b844c..000000000 --- a/test_elasticsearch_dsl/test_integration/test_examples/test_completion.py +++ /dev/null @@ -1,18 +0,0 @@ -# -*- coding: utf-8 -*- -from __future__ import unicode_literals - -from .completion import Person - -def test_person_suggests_on_all_variants_of_name(write_client): - Person.init(using=write_client) - - Person(name='Honza Král', popularity=42).save(refresh=True) - - s = Person.search().suggest('t', 'kra', completion={'field': 'suggest'}) - response = s.execute() - - opts = response.suggest.t[0].options - - assert 1 == len(opts) - assert opts[0]._score == 42 - assert opts[0]._source.name == 'Honza Král' diff --git a/test_elasticsearch_dsl/test_integration/test_examples/test_composite_aggs.py b/test_elasticsearch_dsl/test_integration/test_examples/test_composite_aggs.py deleted file mode 100644 index e94c53b9c..000000000 --- a/test_elasticsearch_dsl/test_integration/test_examples/test_composite_aggs.py +++ /dev/null @@ -1,20 +0,0 @@ -from elasticsearch_dsl import Search, A - -from .composite_agg import scan_aggs - -def test_scan_aggs_exhausts_all_files(data_client): - s = Search(index='flat-git') - key_aggs = {'files': A('terms', field='files')} - file_list = list(scan_aggs(s, key_aggs)) - - assert len(file_list) == 26 - -def test_scan_aggs_with_multiple_aggs(data_client): - s = Search(index='flat-git') - key_aggs = [ - {'files': A('terms', field='files')}, - {'months': {'date_histogram': {'field': 'committed_date', 'interval': 'month'}}}, - ] - file_list = list(scan_aggs(s, key_aggs)) - - assert len(file_list) == 47 diff --git a/test_elasticsearch_dsl/test_integration/test_examples/test_parent_child.py b/test_elasticsearch_dsl/test_integration/test_examples/test_parent_child.py deleted file mode 100644 index a882a232b..000000000 --- a/test_elasticsearch_dsl/test_integration/test_examples/test_parent_child.py +++ /dev/null @@ -1,76 +0,0 @@ -from datetime import datetime - -from pytest import fixture - -from elasticsearch_dsl import Q - -from .parent_child import User, Question, Answer, setup, Comment - -honza = User(id=42, signed_up=datetime(2013, 4, 3), username='honzakral', - email='honza@elastic.co', localtion='Prague') - -nick = User(id=47, signed_up=datetime(2017, 4, 3), username='fxdgear', - email='nick.lang@elastic.co', localtion='Colorado') - - -@fixture -def question(write_client): - setup() - assert write_client.indices.exists_template(name='base') - - # create a question object - q = Question( - _id=1, - author=nick, - tags=['elasticsearch', 'python'], - title='How do I use elasticsearch from Python?', - body=''' - I want to use elasticsearch, how do I do it from Python? - ''', - ) - q.save() - return q - -def test_comment(write_client, question): - question.add_comment(nick, "Just use elasticsearch-py") - - q = Question.get(1) - assert isinstance(q, Question) - assert 1 == len(q.comments) - - c = q.comments[0] - assert isinstance(c, Comment) - assert c.author.username == 'fxdgear' - - -def test_question_answer(write_client, question): - a = question.add_answer(honza, "Just use `elasticsearch-py`!") - - assert isinstance(a, Answer) - - # refresh the index so we can search right away - Question._index.refresh() - - # we can now fetch answers from elasticsearch - answers = question.get_answers() - assert 1 == len(answers) - assert isinstance(answers[0], Answer) - - search = Question.search().query('has_child', - type='answer', - inner_hits={}, - query=Q('term', author__username__keyword='honzakral'), - ) - response = search.execute() - - assert 1 == len(response.hits) - - q = response.hits[0] - assert isinstance(q, Question) - assert 1 == len(q.meta.inner_hits.answer.hits) - assert q.meta.inner_hits.answer.hits is q.get_answers() - - a = q.meta.inner_hits.answer.hits[0] - assert isinstance(a, Answer) - assert isinstance(a.question, Question) - assert a.question.meta.id == '1' diff --git a/test_elasticsearch_dsl/test_integration/test_examples/test_percolate.py b/test_elasticsearch_dsl/test_integration/test_examples/test_percolate.py deleted file mode 100644 index f3ea11a5c..000000000 --- a/test_elasticsearch_dsl/test_integration/test_examples/test_percolate.py +++ /dev/null @@ -1,13 +0,0 @@ -from .percolate import setup, BlogPost - -def test_post_gets_tagged_automatically(write_client): - setup() - - bp = BlogPost(_id=47, content='nothing about snakes here!') - bp_py = BlogPost(_id=42, content='something about Python here!') - - bp.save() - bp_py.save() - - assert [] == bp.tags - assert {'programming', 'development', 'python'} == set(bp_py.tags) diff --git a/test_elasticsearch_dsl/test_integration/test_faceted_search.py b/test_elasticsearch_dsl/test_integration/test_faceted_search.py deleted file mode 100644 index 0bc5751f1..000000000 --- a/test_elasticsearch_dsl/test_integration/test_faceted_search.py +++ /dev/null @@ -1,212 +0,0 @@ -from datetime import datetime - -from elasticsearch_dsl import Document, Boolean, Date, A, Keyword -from elasticsearch_dsl.faceted_search import FacetedSearch, TermsFacet, \ - DateHistogramFacet, RangeFacet, NestedFacet - -from .test_document import PullRequest - -class CommitSearch(FacetedSearch): - index = 'flat-git' - fields = ('description', 'files', ) - - facets = { - 'files': TermsFacet(field='files'), - 'frequency': DateHistogramFacet(field='authored_date', interval="day", min_doc_count=1), - 'deletions': RangeFacet(field='stats.deletions', ranges=[('ok', (None, 1)), ('good', (1, 5)), ('better', (5, None))]) - } - - -class Repos(Document): - is_public = Boolean() - created_at = Date() - - class Index: - name = 'git' - - -class Commit(Document): - files = Keyword() - committed_date = Date() - - class Index: - name = 'git' - -class RepoSearch(FacetedSearch): - index = 'git' - doc_types = [Repos] - facets = { - 'public': TermsFacet(field='is_public'), - 'created': DateHistogramFacet(field='created_at', interval='month') - } - - def search(self): - s = super(RepoSearch, self).search() - return s.filter('term', commit_repo='repo') - -class MetricSearch(FacetedSearch): - index = 'git' - doc_types = [Commit] - - facets = { - 'files': TermsFacet(field='files', metric=A('max', field='committed_date')), - } - -class PRSearch(FacetedSearch): - index = 'test-prs' - doc_types = [PullRequest] - facets = { - 'comments': NestedFacet( - 'comments', - DateHistogramFacet( - field='comments.created_at', - interval='month' - ) - ) - } - -def test_facet_with_custom_metric(data_client): - ms = MetricSearch() - r = ms.execute() - - dates = [f[1] for f in r.facets.files] - assert dates == list(sorted(dates, reverse=True)) - assert dates[0] == 1399038439000 - - -def test_nested_facet(pull_request): - prs = PRSearch() - r = prs.execute() - - assert r.hits.total.value == 1 - assert [(datetime(2018, 1, 1, 0, 0), 1, False)] == r.facets.comments - -def test_nested_facet_with_filter(pull_request): - prs = PRSearch(filters={'comments': datetime(2018, 1, 1, 0, 0)}) - r = prs.execute() - - assert r.hits.total.value == 1 - assert [(datetime(2018, 1, 1, 0, 0), 1, True)] == r.facets.comments - - prs = PRSearch(filters={'comments': datetime(2018, 2, 1, 0, 0)}) - r = prs.execute() - assert not r.hits - -def test_datehistogram_facet(data_client): - rs = RepoSearch() - r = rs.execute() - - assert r.hits.total.value == 1 - assert [(datetime(2014, 3, 1, 0, 0), 1, False)] == r.facets.created - -def test_boolean_facet(data_client): - rs = RepoSearch() - r = rs.execute() - - assert r.hits.total.value == 1 - assert [(True, 1, False)] == r.facets.public - value, count, selected = r.facets.public[0] - assert value is True - - -def test_empty_search_finds_everything(data_client): - cs = CommitSearch() - - r = cs.execute() - - assert r.hits.total.value == 52 - assert [ - ('elasticsearch_dsl', 40, False), - ('test_elasticsearch_dsl', 35, False), - ('elasticsearch_dsl/query.py', 19, False), - ('test_elasticsearch_dsl/test_search.py', 15, False), - ('elasticsearch_dsl/utils.py', 14, False), - ('test_elasticsearch_dsl/test_query.py', 13, False), - ('elasticsearch_dsl/search.py', 12, False), - ('elasticsearch_dsl/aggs.py', 11, False), - ('test_elasticsearch_dsl/test_result.py', 5, False), - ('elasticsearch_dsl/result.py', 3, False) - ] == r.facets.files - - assert [ - (datetime(2014, 3, 3, 0, 0), 2, False), - (datetime(2014, 3, 4, 0, 0), 1, False), - (datetime(2014, 3, 5, 0, 0), 3, False), - (datetime(2014, 3, 6, 0, 0), 3, False), - (datetime(2014, 3, 7, 0, 0), 9, False), - (datetime(2014, 3, 10, 0, 0), 2, False), - (datetime(2014, 3, 15, 0, 0), 4, False), - (datetime(2014, 3, 21, 0, 0), 2, False), - (datetime(2014, 3, 23, 0, 0), 2, False), - (datetime(2014, 3, 24, 0, 0), 10, False), - (datetime(2014, 4, 20, 0, 0), 2, False), - (datetime(2014, 4, 22, 0, 0), 2, False), - (datetime(2014, 4, 25, 0, 0), 3, False), - (datetime(2014, 4, 26, 0, 0), 2, False), - (datetime(2014, 4, 27, 0, 0), 2, False), - (datetime(2014, 5, 1, 0, 0), 2, False), - (datetime(2014, 5, 2, 0, 0), 1, False) - ] == r.facets.frequency - - assert [ - ('ok', 19, False), - ('good', 14, False), - ('better', 19, False) - ] == r.facets.deletions - -def test_term_filters_are_shown_as_selected_and_data_is_filtered(data_client): - cs = CommitSearch(filters={'files': 'test_elasticsearch_dsl'}) - - r = cs.execute() - - assert 35 == r.hits.total.value - assert [ - ('elasticsearch_dsl', 40, False), - ('test_elasticsearch_dsl', 35, True), # selected - ('elasticsearch_dsl/query.py', 19, False), - ('test_elasticsearch_dsl/test_search.py', 15, False), - ('elasticsearch_dsl/utils.py', 14, False), - ('test_elasticsearch_dsl/test_query.py', 13, False), - ('elasticsearch_dsl/search.py', 12, False), - ('elasticsearch_dsl/aggs.py', 11, False), - ('test_elasticsearch_dsl/test_result.py', 5, False), - ('elasticsearch_dsl/result.py', 3, False) - ] == r.facets.files - - assert [ - (datetime(2014, 3, 3, 0, 0), 1, False), - (datetime(2014, 3, 5, 0, 0), 2, False), - (datetime(2014, 3, 6, 0, 0), 3, False), - (datetime(2014, 3, 7, 0, 0), 6, False), - (datetime(2014, 3, 10, 0, 0), 1, False), - (datetime(2014, 3, 15, 0, 0), 3, False), - (datetime(2014, 3, 21, 0, 0), 2, False), - (datetime(2014, 3, 23, 0, 0), 1, False), - (datetime(2014, 3, 24, 0, 0), 7, False), - (datetime(2014, 4, 20, 0, 0), 1, False), - (datetime(2014, 4, 25, 0, 0), 3, False), - (datetime(2014, 4, 26, 0, 0), 2, False), - (datetime(2014, 4, 27, 0, 0), 1, False), - (datetime(2014, 5, 1, 0, 0), 1, False), - (datetime(2014, 5, 2, 0, 0), 1, False) - ] == r.facets.frequency - - assert [ - ('ok', 12, False), - ('good', 10, False), - ('better', 13, False) - ] == r.facets.deletions - -def test_range_filters_are_shown_as_selected_and_data_is_filtered(data_client): - cs = CommitSearch(filters={'deletions': 'better'}) - - r = cs.execute() - - assert 19 == r.hits.total.value - -def test_pagination(data_client): - cs = CommitSearch() - cs = cs[0:20] - - assert 52 == cs.count() - assert 20 == len(cs.execute()) diff --git a/test_elasticsearch_dsl/test_integration/test_index.py b/test_elasticsearch_dsl/test_integration/test_index.py deleted file mode 100644 index 062622406..000000000 --- a/test_elasticsearch_dsl/test_integration/test_index.py +++ /dev/null @@ -1,96 +0,0 @@ -from elasticsearch_dsl import Document, Index, Text, Keyword, Date, analysis, IndexTemplate - -class Post(Document): - title = Text(analyzer=analysis.analyzer('my_analyzer', tokenizer='keyword')) - published_from = Date() - -def test_index_template_works(write_client): - it = IndexTemplate('test-template', 'test-*') - it.document(Post) - it.settings(number_of_replicas=0, number_of_shards=1) - it.save() - - i = Index('test-blog') - i.create() - - assert { - 'test-blog': { - 'mappings': { - 'properties': { - 'title': {'type': 'text', 'analyzer': 'my_analyzer'}, - 'published_from': {'type': 'date'}, - } - } - } - } == write_client.indices.get_mapping(index='test-blog') - -def test_index_can_be_saved_even_with_settings(write_client): - i = Index('test-blog', using=write_client) - i.settings(number_of_shards=3, number_of_replicas=0) - i.save() - i.settings(number_of_replicas=1) - i.save() - - assert '1' == i.get_settings()['test-blog']['settings']['index']['number_of_replicas'] - -def test_index_exists(data_client): - assert Index('git').exists() - assert not Index('not-there').exists() - -def test_index_can_be_created_with_settings_and_mappings(write_client): - i = Index('test-blog', using=write_client) - i.document(Post) - i.settings(number_of_replicas=0, number_of_shards=1) - i.create() - - assert { - 'test-blog': { - 'mappings': { - 'properties': { - 'title': {'type': 'text', 'analyzer': 'my_analyzer'}, - 'published_from': {'type': 'date'} - } - } - } - } == write_client.indices.get_mapping(index='test-blog') - - settings = write_client.indices.get_settings(index='test-blog') - assert settings['test-blog']['settings']['index']['number_of_replicas'] == '0' - assert settings['test-blog']['settings']['index']['number_of_shards'] == '1' - assert settings['test-blog']['settings']['index']['analysis'] == { - 'analyzer': { - 'my_analyzer': { - 'type': 'custom', - 'tokenizer': 'keyword' - } - } - } - -def test_delete(write_client): - write_client.indices.create( - index='test-index', - body={'settings': {'number_of_replicas': 0, 'number_of_shards': 1}} - ) - - i = Index('test-index', using=write_client) - i.delete() - assert not write_client.indices.exists(index='test-index') - -def test_multiple_indices_with_same_doc_type_work(write_client): - i1 = Index('test-index-1', using=write_client) - i2 = Index('test-index-2', using=write_client) - - for i in (i1, i2): - i.document(Post) - i.create() - - for i in ('test-index-1', 'test-index-2'): - settings = write_client.indices.get_settings(index=i) - assert settings[i]['settings']['index']['analysis'] == { - 'analyzer': { - 'my_analyzer': { - 'type': 'custom', - 'tokenizer': 'keyword' - } - } - } diff --git a/test_elasticsearch_dsl/test_integration/test_mapping.py b/test_elasticsearch_dsl/test_integration/test_mapping.py deleted file mode 100644 index 98c1ba175..000000000 --- a/test_elasticsearch_dsl/test_integration/test_mapping.py +++ /dev/null @@ -1,127 +0,0 @@ -from elasticsearch_dsl import mapping, analysis, exceptions - -from pytest import raises - -def test_mapping_saved_into_es(write_client): - m = mapping.Mapping() - m.field('name', 'text', analyzer=analysis.analyzer('my_analyzer', tokenizer='keyword')) - m.field('tags', 'keyword') - m.save('test-mapping', using=write_client) - - assert { - 'test-mapping': { - 'mappings': { - 'properties': { - 'name': {'type': 'text', 'analyzer': 'my_analyzer'}, - 'tags': {'type': 'keyword'} - } - } - } - } == write_client.indices.get_mapping(index='test-mapping') - -def test_mapping_saved_into_es_when_index_already_exists_closed(write_client): - m = mapping.Mapping() - m.field('name', 'text', analyzer=analysis.analyzer('my_analyzer', tokenizer='keyword')) - write_client.indices.create(index='test-mapping') - - with raises(exceptions.IllegalOperation): - m.save('test-mapping', using=write_client) - - write_client.cluster.health(index='test-mapping', wait_for_status='yellow') - write_client.indices.close(index='test-mapping') - m.save('test-mapping', using=write_client) - - - assert { - 'test-mapping': { - 'mappings': { - 'properties': { - 'name': {'type': 'text', 'analyzer': 'my_analyzer'}, - } - } - } - } == write_client.indices.get_mapping(index='test-mapping') - -def test_mapping_saved_into_es_when_index_already_exists_with_analysis(write_client): - m = mapping.Mapping() - analyzer = analysis.analyzer('my_analyzer', tokenizer='keyword') - m.field('name', 'text', analyzer=analyzer) - - new_analysis = analyzer.get_analysis_definition() - new_analysis['analyzer']['other_analyzer'] = { - 'type': 'custom', - 'tokenizer': 'whitespace' - } - write_client.indices.create(index='test-mapping', body={'settings': {'analysis': new_analysis}}) - - m.field('title', 'text', analyzer=analyzer) - m.save('test-mapping', using=write_client) - - assert { - 'test-mapping': { - 'mappings': { - 'properties': { - 'name': {'type': 'text', 'analyzer': 'my_analyzer'}, - 'title': {'type': 'text', 'analyzer': 'my_analyzer'}, - } - } - } - } == write_client.indices.get_mapping(index='test-mapping') - -def test_mapping_gets_updated_from_es(write_client): - write_client.indices.create( - index='test-mapping', - body={ - 'settings': {'number_of_shards': 1, 'number_of_replicas': 0}, - 'mappings': { - 'date_detection': False, - 'properties': { - 'title': { - 'type': 'text', - 'analyzer': 'snowball', - 'fields': { - 'raw': {'type': 'keyword'} - } - }, - 'created_at': {'type': 'date'}, - 'comments': { - 'type': 'nested', - 'properties': { - 'created': {'type': 'date'}, - 'author': { - 'type': 'text', - 'analyzer': 'snowball', - 'fields': { - 'raw': {'type': 'keyword'} - } - } - } - } - } - } - } - ) - - m = mapping.Mapping.from_es('test-mapping', using=write_client) - - assert ['comments', 'created_at', 'title'] == list(sorted(m.properties.properties._d_.keys())) - assert { - 'date_detection': False, - 'properties': { - 'comments': { - 'type': 'nested', - 'properties': { - 'created': {'type': 'date'}, - 'author': {'analyzer': 'snowball', 'fields': {'raw': {'type': 'keyword'}}, 'type': 'text'} - }, - }, - 'created_at': {'type': 'date'}, - 'title': {'analyzer': 'snowball', 'fields': {'raw': {'type': 'keyword'}}, 'type': 'text'} - } - } == m.to_dict() - - # test same with alias - write_client.indices.put_alias(index='test-mapping', name='test-alias') - - m2 = mapping.Mapping.from_es('test-alias', using=write_client) - assert m2.to_dict() == m.to_dict() diff --git a/test_elasticsearch_dsl/test_integration/test_search.py b/test_elasticsearch_dsl/test_integration/test_search.py deleted file mode 100644 index 23eaaf6c5..000000000 --- a/test_elasticsearch_dsl/test_integration/test_search.py +++ /dev/null @@ -1,132 +0,0 @@ -# -*- coding: utf-8 -*- -from __future__ import unicode_literals -from elasticsearch import TransportError - -from elasticsearch_dsl import Search, Document, Date, Text, Keyword, MultiSearch, \ - Index, Q -from elasticsearch_dsl.response import aggs - -from .test_data import FLAT_DATA - -from pytest import raises - -class Repository(Document): - created_at = Date() - description = Text(analyzer='snowball') - tags = Keyword() - - @classmethod - def search(cls): - return super(Repository, cls).search().filter('term', commit_repo='repo') - - class Index: - name = 'git' - -class Commit(Document): - class Index: - name = 'flat-git' - -def test_filters_aggregation_buckets_are_accessible(data_client): - has_tests_query = Q('term', files='test_elasticsearch_dsl') - s = Commit.search()[0:0] - s.aggs\ - .bucket('top_authors', 'terms', field='author.name.raw')\ - .bucket('has_tests', 'filters', filters={'yes': has_tests_query, 'no': ~has_tests_query})\ - .metric('lines', 'stats', field='stats.lines') - response = s.execute() - - assert isinstance(response.aggregations.top_authors.buckets[0].has_tests.buckets.yes, aggs.Bucket) - assert 35 == response.aggregations.top_authors.buckets[0].has_tests.buckets.yes.doc_count - assert 228 == response.aggregations.top_authors.buckets[0].has_tests.buckets.yes.lines.max - -def test_top_hits_are_wrapped_in_response(data_client): - s = Commit.search()[0:0] - s.aggs.bucket('top_authors', 'terms', field='author.name.raw').metric('top_commits', 'top_hits', size=5) - response = s.execute() - - top_commits = response.aggregations.top_authors.buckets[0].top_commits - assert isinstance(top_commits, aggs.TopHitsData) - assert 5 == len(top_commits) - - hits = [h for h in top_commits] - assert 5 == len(hits) - assert isinstance(hits[0], Commit) - - -def test_inner_hits_are_wrapped_in_response(data_client): - s = Search(index='git')[0:1].query('has_parent', parent_type='repo', inner_hits={}, query=Q('match_all')) - response = s.execute() - - commit = response.hits[0] - assert isinstance(commit.meta.inner_hits.repo, response.__class__) - assert repr(commit.meta.inner_hits.repo[0]).startswith(" 0 - assert not response.timed_out - assert response.updated == 52 - assert response.deleted == 0 - assert response.took > 0 - -def test_update_by_query_with_script(write_client, setup_ubq_tests): - index = setup_ubq_tests - - ubq = UpdateByQuery(using=write_client).index(index)\ - .filter(~Q('exists', field='parent_shas'))\ - .script(source='ctx._source.is_public = false') - ubq = ubq.params(conflicts='proceed') - - response = ubq.execute() - assert response.total == 2 - assert response.updated == 2 - assert response.version_conflicts == 0 - -def test_delete_by_query_with_script(write_client, setup_ubq_tests): - index = setup_ubq_tests - - ubq = UpdateByQuery(using=write_client).index(index)\ - .filter(Q('match', parent_shas='1dd19210b5be92b960f7db6f66ae526288edccc3'))\ - .script(source='ctx.op = "delete"') - ubq = ubq.params(conflicts='proceed') - - response = ubq.execute() - - assert response.total == 1 - assert response.deleted == 1 diff --git a/test_elasticsearch_dsl/test_mapping.py b/test_elasticsearch_dsl/test_mapping.py deleted file mode 100644 index 5e35d8bad..000000000 --- a/test_elasticsearch_dsl/test_mapping.py +++ /dev/null @@ -1,184 +0,0 @@ -import json - -from elasticsearch_dsl import mapping, Text, Keyword, Nested, analysis - - -def test_mapping_can_has_fields(): - m = mapping.Mapping() - m.field('name', 'text').field('tags', 'keyword') - - assert { - 'properties': { - 'name': {'type': 'text'}, - 'tags': {'type': 'keyword'} - } - } == m.to_dict() - -def test_mapping_update_is_recursive(): - m1 = mapping.Mapping() - m1.field('title', 'text') - m1.field('author', 'object') - m1.field('author', 'object', properties={'name': {'type': 'text'}}) - m1.meta('_all', enabled=False) - m1.meta('dynamic', False) - - m2 = mapping.Mapping() - m2.field('published_from', 'date') - m2.field('author', 'object', properties={'email': {'type': 'text'}}) - m2.field('title', 'text') - m2.field('lang', 'keyword') - m2.meta('_analyzer', path='lang') - - m1.update(m2, update_only=True) - - assert { - '_all': {'enabled': False}, - '_analyzer': {'path': 'lang'}, - 'dynamic': False, - 'properties': { - 'published_from': {'type': 'date'}, - 'title': {'type': 'text'}, - 'lang': {'type': 'keyword'}, - 'author': { - 'type': 'object', - 'properties': { - 'name': {'type': 'text'}, - 'email': {'type': 'text'}, - } - } - } - } == m1.to_dict() - -def test_properties_can_iterate_over_all_the_fields(): - m = mapping.Mapping() - m.field('f1', 'text', test_attr='f1', fields={'f2': Keyword(test_attr='f2')}) - m.field('f3', Nested(test_attr='f3', properties={ - 'f4': Text(test_attr='f4')})) - - assert {'f1', 'f2', 'f3', 'f4'} == {f.test_attr for f in m.properties._collect_fields()} - -def test_mapping_can_collect_all_analyzers_and_normalizers(): - a1 = analysis.analyzer('my_analyzer1', - tokenizer='keyword', - filter=['lowercase', analysis.token_filter('my_filter1', 'stop', stopwords=['a', 'b'])], - ) - a2 = analysis.analyzer('english') - a3 = analysis.analyzer('unknown_custom') - a4 = analysis.analyzer('my_analyzer2', - tokenizer=analysis.tokenizer('trigram', 'nGram', min_gram=3, max_gram=3), - filter=[analysis.token_filter('my_filter2', 'stop', stopwords=['c', 'd'])], - ) - a5 = analysis.analyzer('my_analyzer3', tokenizer='keyword') - n1 = analysis.normalizer('my_normalizer1', - filter=['lowercase'] - ) - n2 = analysis.normalizer('my_normalizer2', - filter=['my_filter1', 'my_filter2', analysis.token_filter('my_filter3', 'stop', stopwords=['e', 'f'])] - ) - n3 = analysis.normalizer('unknown_custom') - - m = mapping.Mapping() - m.field('title', 'text', analyzer=a1, - fields={ - 'english': Text(analyzer=a2), - 'unknown': Keyword(search_analyzer=a3), - } - ) - m.field('comments', Nested(properties={ - 'author': Text(analyzer=a4) - })) - m.field('normalized_title', 'keyword', normalizer=n1) - m.field('normalized_comment', 'keyword', normalizer=n2) - m.field('unknown', 'keyword', normalizer=n3) - m.meta('_all', analyzer=a5) - - assert { - 'analyzer': { - 'my_analyzer1': {'filter': ['lowercase', 'my_filter1'], 'tokenizer': 'keyword', 'type': 'custom'}, - 'my_analyzer2': {'filter': ['my_filter2'], 'tokenizer': 'trigram', 'type': 'custom'}, - 'my_analyzer3': {'tokenizer': 'keyword', 'type': 'custom'}, - }, - 'normalizer': { - 'my_normalizer1': {'filter': ['lowercase'], 'type': 'custom'}, - 'my_normalizer2': {'filter': ['my_filter1', 'my_filter2', 'my_filter3'], 'type': 'custom'}, - }, - 'filter': { - 'my_filter1': {'stopwords': ['a', 'b'], 'type': 'stop'}, - 'my_filter2': {'stopwords': ['c', 'd'], 'type': 'stop'}, - 'my_filter3': {'stopwords': ['e', 'f'], 'type': 'stop'}, - }, - 'tokenizer': { - 'trigram': {'max_gram': 3, 'min_gram': 3, 'type': 'nGram'}, - } - } == m._collect_analysis() - - assert json.loads(json.dumps(m.to_dict())) == m.to_dict() - - -def test_mapping_can_collect_multiple_analyzers(): - a1 = analysis.analyzer( - 'my_analyzer1', - tokenizer='keyword', - filter=['lowercase', analysis.token_filter('my_filter1', 'stop', stopwords=['a', 'b'])], - ) - a2 = analysis.analyzer( - 'my_analyzer2', - tokenizer=analysis.tokenizer('trigram', 'nGram', min_gram=3, max_gram=3), - filter=[analysis.token_filter('my_filter2', 'stop', stopwords=['c', 'd'])], - ) - m = mapping.Mapping() - m.field('title', 'text', analyzer=a1, search_analyzer=a2) - m.field( - 'text', 'text', analyzer=a1, - fields={ - 'english': Text(analyzer=a1), - 'unknown': Keyword(analyzer=a1, search_analyzer=a2), - } - ) - assert { - 'analyzer': { - 'my_analyzer1': {'filter': ['lowercase', 'my_filter1'], - 'tokenizer': 'keyword', - 'type': 'custom'}, - 'my_analyzer2': {'filter': ['my_filter2'], - 'tokenizer': 'trigram', - 'type': 'custom'}}, - 'filter': { - 'my_filter1': {'stopwords': ['a', 'b'], 'type': 'stop'}, - 'my_filter2': {'stopwords': ['c', 'd'], 'type': 'stop'}}, - 'tokenizer': {'trigram': {'max_gram': 3, 'min_gram': 3, 'type': 'nGram'}} - } == m._collect_analysis() - -def test_even_non_custom_analyzers_can_have_params(): - a1 = analysis.analyzer('whitespace', type='pattern', pattern=r'\\s+') - m = mapping.Mapping() - m.field('title', 'text', analyzer=a1) - - assert { - "analyzer": { - "whitespace": { - "type": "pattern", - "pattern": r"\\s+" - } - } - } == m._collect_analysis() - -def test_resolve_field_can_resolve_multifields(): - m = mapping.Mapping() - m.field('title', 'text', fields={'keyword': Keyword()}) - - assert isinstance(m.resolve_field('title.keyword'), Keyword) - -def test_resolve_nested(): - m = mapping.Mapping() - m.field('n1', 'nested', properties={'n2': Nested(properties={'k1': Keyword()})}) - m.field('k2', 'keyword') - - nested, field = m.resolve_nested('n1.n2.k1') - assert nested == ['n1', 'n1.n2'] - assert isinstance(field, Keyword) - - nested, field = m.resolve_nested('k2') - assert nested == [] - assert isinstance(field, Keyword) - diff --git a/test_elasticsearch_dsl/test_query.py b/test_elasticsearch_dsl/test_query.py deleted file mode 100644 index 2430ff9bc..000000000 --- a/test_elasticsearch_dsl/test_query.py +++ /dev/null @@ -1,418 +0,0 @@ -from elasticsearch_dsl import query, function - -from pytest import raises - -def test_empty_Q_is_match_all(): - q = query.Q() - - assert isinstance(q, query.MatchAll) - assert query.MatchAll() == q - -def test_match_to_dict(): - assert {"match": {"f": "value"}} == query.Match(f='value').to_dict() - -def test_match_to_dict_extra(): - assert {"match": {"f": "value", "boost": 2}} == query.Match(f='value', boost=2).to_dict() - -def test_fuzzy_to_dict(): - assert {"fuzzy": {"f": "value"}} == query.Fuzzy(f='value').to_dict() - -def test_prefix_to_dict(): - assert {"prefix": {"f": "value"}} == query.Prefix(f='value').to_dict() - -def test_term_to_dict(): - assert {"term": {"_type": "article"}} == query.Term(_type='article').to_dict() - -def test_bool_to_dict(): - bool = query.Bool(must=[query.Match(f='value')], should=[]) - - assert {"bool": {"must": [{"match": {"f": "value"}}]}} == bool.to_dict() - -def test_dismax_to_dict(): - assert {"dis_max":{"queries": [{"term": {"_type": "article"}}]}} == query.DisMax(queries=[query.Term(_type='article')]).to_dict() - -def test_bool_from_dict_issue_318(): - d = { - "bool": { - "must_not": {"match": {"field": "value"}} - } - } - q = query.Q(d) - - assert q == ~query.Match(field='value') - -def test_repr(): - bool = query.Bool(must=[query.Match(f='value')], should=[]) - - assert "Bool(must=[Match(f='value')])" == repr(bool) - -def test_query_clone(): - bool = query.Bool(must=[query.Match(x=42)], should=[query.Match(g="v2")], must_not=[query.Match(title='value')]) - bool_clone = bool._clone() - - assert bool == bool_clone - assert bool is not bool_clone - -def test_bool_converts_its_init_args_to_queries(): - q = query.Bool(must=[{"match": {"f": "value"}}]) - - assert len(q.must) == 1 - assert q.must[0] == query.Match(f='value') - -def test_two_queries_make_a_bool(): - q1 = query.Match(f='value1') - q2 = query.Match(message={"query": "this is a test", "opeartor": "and"}) - q = q1 & q2 - - assert isinstance(q, query.Bool) - assert [q1, q2] == q.must - -def test_other_and_bool_appends_other_to_must(): - q1 = query.Match(f='value1') - qb = query.Bool() - - q = q1 & qb - assert q is not qb - assert q.must[0] == q1 - -def test_bool_and_other_appends_other_to_must(): - q1 = query.Match(f='value1') - qb = query.Bool() - - q = qb & q1 - assert q is not qb - assert q.must[0] == q1 - -def test_bool_and_other_sets_min_should_match_if_needed(): - q1 = query.Q('term', category=1) - q2 = query.Q('bool', should=[ - query.Q('term', name='aaa'), - query.Q('term', name='bbb')] - ) - - q = q1 & q2 - assert q == query.Bool( - must=[q1], - should=[query.Q('term', name='aaa'), query.Q('term', name='bbb')], - minimum_should_match=1 - ) - -def test_bool_with_different_minimum_should_match_should_not_be_combined(): - q1 = query.Q('bool', minimum_should_match=2, should=[query.Q('term', field='aa1'), query.Q('term', field='aa2'), query.Q('term', field='aa3'), query.Q('term', field='aa4')]) - q2 = query.Q('bool', minimum_should_match=3, should=[query.Q('term', field='bb1'), query.Q('term', field='bb2'), query.Q('term', field='bb3'), query.Q('term', field='bb4')]) - q3 = query.Q('bool', minimum_should_match=4, should=[query.Q('term', field='cc1'), query.Q('term', field='cc2'), query.Q('term', field='cc3'), query.Q('term', field='cc4')]) - - q4 = q1 | q2 - assert q4 == query.Bool( - should=[q1, q2] - ) - - q5 = q1 | q2 | q3 - assert q5 == query.Bool( - should=[q1, q2, q3] - ) - -def test_empty_bool_has_min_should_match_0(): - assert 0 == query.Bool()._min_should_match - -def test_query_and_query_creates_bool(): - q1 = query.Match(f=42) - q2 = query.Match(g=47) - - q = q1 & q2 - assert isinstance(q, query.Bool) - assert q.must == [q1, q2] - -def test_match_all_and_query_equals_other(): - q1 = query.Match(f=42) - q2 = query.MatchAll() - - q = q1 & q2 - assert q1 == q - -def test_not_match_all_is_match_none(): - q = query.MatchAll() - - assert ~q == query.MatchNone() - -def test_not_match_none_is_match_all(): - q = query.MatchNone() - - assert ~q == query.MatchAll() - -def test_match_none_or_query_equals_query(): - q1 = query.Match(f=42) - q2 = query.MatchNone() - - assert q1 | q2 == query.Match(f=42) - -def test_match_none_and_query_equals_match_none(): - q1 = query.Match(f=42) - q2 = query.MatchNone() - - assert q1 & q2 == query.MatchNone() - -def test_bool_and_bool(): - qt1, qt2, qt3 = query.Match(f=1), query.Match(f=2), query.Match(f=3) - - q1 = query.Bool(must=[qt1], should=[qt2]) - q2 = query.Bool(must_not=[qt3]) - assert q1 & q2 == query.Bool(must=[qt1], must_not=[qt3], should=[qt2], minimum_should_match=0) - - q1 = query.Bool(must=[qt1], should=[qt1, qt2]) - q2 = query.Bool(should=[qt3]) - assert q1 & q2 == query.Bool(must=[qt1, qt3], should=[qt1, qt2], minimum_should_match=0) - -def test_bool_and_bool_with_min_should_match(): - qt1, qt2 = query.Match(f=1), query.Match(f=2) - q1 = query.Q('bool', minimum_should_match=1, should=[qt1]) - q2 = query.Q('bool', minimum_should_match=1, should=[qt2]) - - assert query.Q('bool', must=[qt1, qt2]) == q1 & q2 - -def test_inverted_query_becomes_bool_with_must_not(): - q = query.Match(f=42) - - assert ~q == query.Bool(must_not=[query.Match(f=42)]) - -def test_inverted_query_with_must_not_become_should(): - q = query.Q('bool', must_not=[query.Q('match', f=1), query.Q('match', f=2)]) - - assert ~q == query.Q('bool', should=[query.Q('match', f=1), query.Q('match', f=2)]) - -def test_inverted_query_with_must_and_must_not(): - q = query.Q('bool', - must=[query.Q('match', f=3), query.Q('match', f=4)], - must_not=[query.Q('match', f=1), query.Q('match', f=2)] - ) - print((~q).to_dict()) - assert ~q == query.Q('bool', - should=[ - # negation of must - query.Q('bool', must_not=[query.Q('match', f=3)]), - query.Q('bool', must_not=[query.Q('match', f=4)]), - - # negation of must_not - query.Q('match', f=1), - query.Q('match', f=2), - ] - ) - -def test_double_invert_returns_original_query(): - q = query.Match(f=42) - - assert q == ~~q - -def test_bool_query_gets_inverted_internally(): - q = query.Bool(must_not=[query.Match(f=42)], must=[query.Match(g='v')]) - - assert ~q == query.Bool( - should=[ - # negating must - query.Bool(must_not=[query.Match(g='v')]), - # negating must_not - query.Match(f=42), - ] - ) - -def test_match_all_or_something_is_match_all(): - q1 = query.MatchAll() - q2 = query.Match(f=42) - - assert (q1 | q2) == query.MatchAll() - assert (q2 | q1) == query.MatchAll() - -def test_or_produces_bool_with_should(): - q1 = query.Match(f=42) - q2 = query.Match(g='v') - - q = q1|q2 - assert q == query.Bool(should=[q1, q2]) - -def test_or_bool_doesnt_loop_infinitely_issue_37(): - q = query.Match(f=42) | ~ query.Match(f=47) - - assert q == query.Bool(should=[query.Bool(must_not=[query.Match(f=47)]), query.Match(f=42)]) - -def test_or_bool_doesnt_loop_infinitely_issue_96(): - q = ~query.Match(f=42) | ~ query.Match(f=47) - - assert q == query.Bool(should=[query.Bool(must_not=[query.Match(f=42)]), query.Bool(must_not=[query.Match(f=47)])]) - -def test_bool_will_append_another_query_with_or(): - qb = query.Bool(should=[query.Match(f='v'), query.Match(f='v2'),]) - q = query.Match(g=42) - - assert (q | qb) == query.Bool(should=[query.Match(f='v'), query.Match(f='v2'), q]) - -def test_bool_queries_with_only_should_get_concatenated(): - q1 = query.Bool(should=[query.Match(f=1), query.Match(f=2),]) - q2 = query.Bool(should=[query.Match(f=3), query.Match(f=4),]) - - assert (q1 | q2) == query.Bool(should=[query.Match(f=1), query.Match(f=2),query.Match(f=3), query.Match(f=4),]) - -def test_two_bool_queries_append_one_to_should_if_possible(): - q1 = query.Bool(should=[query.Match(f='v')]) - q2 = query.Bool(must=[query.Match(f='v')]) - - assert (q1 | q2) == query.Bool(should=[query.Match(f='v'), query.Bool(must=[query.Match(f='v')])]) - assert (q2 | q1) == query.Bool(should=[query.Match(f='v'), query.Bool(must=[query.Match(f='v')])]) - -def test_queries_are_registered(): - assert 'match' in query.Query._classes - assert query.Query._classes['match'] is query.Match - -def test_defining_query_registers_it(): - class MyQuery(query.Query): - name = 'my_query' - - assert 'my_query' in query.Query._classes - assert query.Query._classes['my_query'] is MyQuery - -def test_Q_passes_query_through(): - q = query.Match(f='value1') - - assert query.Q(q) is q - -def test_Q_constructs_query_by_name(): - q = query.Q('match', f='value') - - assert isinstance(q, query.Match) - assert {'f': 'value'} == q._params - -def test_Q_translates_double_underscore_to_dots_in_param_names(): - q = query.Q('match', comment__author='honza') - - assert {'comment.author': 'honza'} == q._params - -def test_Q_doesn_translate_double_underscore_to_dots_in_param_names(): - q = query.Q('match', comment__author='honza', _expand__to_dot=False) - - assert {'comment__author': 'honza'} == q._params - -def test_Q_constructs_simple_query_from_dict(): - q = query.Q({'match': {'f': 'value'}}) - - assert isinstance(q, query.Match) - assert {'f': 'value'} == q._params - -def test_Q_constructs_compound_query_from_dict(): - q = query.Q( - { - "bool": { - "must": [ - {'match': {'f': 'value'}}, - ] - } - } - ) - - assert q == query.Bool(must=[query.Match(f='value')]) - -def test_Q_raises_error_when_passed_in_dict_and_params(): - with raises(Exception): - query.Q({"match": {'f': 'value'}}, f='value') - -def test_Q_raises_error_when_passed_in_query_and_params(): - q = query.Match(f='value1') - - with raises(Exception): - query.Q(q, f='value') - -def test_Q_raises_error_on_unknown_query(): - with raises(Exception): - query.Q('not a query', f='value') - -def test_match_all_and_anything_is_anything(): - q = query.MatchAll() - - s = query.Match(f=42) - assert q&s == s - assert s&q == s - -def test_function_score_with_functions(): - q = query.Q('function_score', functions=[query.SF('script_score', script="doc['comment_count'] * _score")]) - - assert {'function_score': {'functions': [{'script_score': {'script': "doc['comment_count'] * _score"}}]}} == q.to_dict() - -def test_function_score_with_no_function_is_boost_factor(): - q = query.Q('function_score', functions=[query.SF({'weight': 20, 'filter': query.Q('term', f=42)})]) - - assert {'function_score': {'functions': [{'filter': {'term': {'f': 42}}, 'weight': 20}]}} == q.to_dict() - -def test_function_score_to_dict(): - q = query.Q( - 'function_score', - query=query.Q('match', title='python'), - functions=[ - query.SF('random_score'), - query.SF('field_value_factor', field='comment_count', filter=query.Q('term', tags='python')) - ] - ) - - d = { - 'function_score': { - 'query': {'match': {'title': 'python'}}, - 'functions': [ - {'random_score': {}}, - { - 'filter': {'term': {'tags': 'python'}}, - 'field_value_factor': { - 'field': 'comment_count', - } - } - ], - } - } - assert d == q.to_dict() - -def test_function_score_with_single_function(): - d = { - 'function_score': { - 'filter': {"term": {"tags": "python"}}, - 'script_score': { - 'script': "doc['comment_count'] * _score" - } - } - } - - q = query.Q(d) - assert isinstance(q, query.FunctionScore) - assert isinstance(q.filter, query.Term) - assert len(q.functions) == 1 - - sf = q.functions[0] - assert isinstance(sf, function.ScriptScore) - assert "doc['comment_count'] * _score" == sf.script - -def test_function_score_from_dict(): - d = { - 'function_score': { - 'filter': {"term": {"tags": "python"}}, - 'functions': [ - { - 'filter': {"terms": {"tags": "python"}}, - 'script_score': { - 'script': "doc['comment_count'] * _score" - } - }, - { - 'boost_factor': 6 - } - ] - } - } - - q = query.Q(d) - assert isinstance(q, query.FunctionScore) - assert isinstance(q.filter, query.Term) - assert len(q.functions) == 2 - - sf = q.functions[0] - assert isinstance(sf, function.ScriptScore) - assert isinstance(sf.filter, query.Terms) - - sf = q.functions[1] - assert isinstance(sf, function.BoostFactor) - assert 6 == sf.value - assert {'boost_factor': 6} == sf.to_dict() diff --git a/test_elasticsearch_dsl/test_search.py b/test_elasticsearch_dsl/test_search.py deleted file mode 100644 index 592a932d4..000000000 --- a/test_elasticsearch_dsl/test_search.py +++ /dev/null @@ -1,541 +0,0 @@ -from copy import deepcopy - -from elasticsearch_dsl import search, query, Q, Document, utils -from elasticsearch_dsl.exceptions import IllegalOperation - -from pytest import raises - - -def test_expand__to_dot_is_respected(): - s = search.Search().query('match', a__b=42, _expand__to_dot=False) - - assert {"query": {"match": {"a__b": 42}}} == s.to_dict() - -def test_execute_uses_cache(): - s = search.Search() - r = object() - s._response = r - - assert r is s.execute() - -def test_cache_can_be_ignored(mock_client): - s = search.Search(using='mock') - r = object() - s._response = r - s.execute(ignore_cache=True) - - mock_client.search.assert_called_once_with( - index=None, - body={} - ) - -def test_iter_iterates_over_hits(): - s = search.Search() - s._response = [1, 2, 3] - - assert [1, 2, 3] == list(s) - -def test_cache_isnt_cloned(): - s = search.Search() - s._response = object() - - assert not hasattr(s._clone(), '_response') - - -def test_search_starts_with_no_query(): - s = search.Search() - - assert s.query._proxied is None - -def test_search_query_combines_query(): - s = search.Search() - - s2 = s.query('match', f=42) - assert s2.query._proxied == query.Match(f=42) - assert s.query._proxied is None - - s3 = s2.query('match', f=43) - assert s2.query._proxied == query.Match(f=42) - assert s3.query._proxied == query.Bool(must=[query.Match(f=42), query.Match(f=43)]) - -def test_query_can_be_assigned_to(): - s = search.Search() - - q = Q('match', title='python') - s.query = q - - assert s.query._proxied is q - -def test_query_can_be_wrapped(): - s = search.Search().query('match', title='python') - - s.query = Q('function_score', query=s.query, field_value_factor={'field': 'rating'}) - - assert { - 'query': { - 'function_score': { - 'functions': [{'field_value_factor': {'field': 'rating'}}], - 'query': {'match': {'title': 'python'}} - } - } - }== s.to_dict() - -def test_using(): - o = object() - o2 = object() - s = search.Search(using=o) - assert s._using is o - s2 = s.using(o2) - assert s._using is o - assert s2._using is o2 - -def test_methods_are_proxied_to_the_query(): - s = search.Search().query('match_all') - - assert s.query.to_dict() == {'match_all': {}} - -def test_query_always_returns_search(): - s = search.Search() - - assert isinstance(s.query('match', f=42), search.Search) - -def test_source_copied_on_clone(): - s = search.Search().source(False) - assert s._clone()._source == s._source - assert s._clone()._source is False - - s2 = search.Search().source([]) - assert s2._clone()._source == s2._source - assert s2._source == [] - - s3 = search.Search().source(["some", "fields"]) - assert s3._clone()._source == s3._source - assert s3._clone()._source == ["some", "fields"] - -def test_copy_clones(): - from copy import copy - s1 = search.Search().source(["some", "fields"]) - s2 = copy(s1) - - assert s1 == s2 - assert s1 is not s2 - -def test_aggs_allow_two_metric(): - s = search.Search() - - s.aggs.metric('a', 'max', field='a').metric('b', 'max', field='b') - - assert s.to_dict() == { - 'aggs': { - 'a': { - 'max': { - 'field': 'a' - } - }, - 'b': { - 'max': { - 'field': 'b' - } - } - } - } - -def test_aggs_get_copied_on_change(): - s = search.Search().query('match_all') - s.aggs.bucket('per_tag', 'terms', field='f').metric('max_score', 'max', field='score') - - s2 = s.query('match_all') - s2.aggs.bucket('per_month', 'date_histogram', field='date', interval='month') - s3 = s2.query('match_all') - s3.aggs['per_month'].metric('max_score', 'max', field='score') - s4 = s3._clone() - s4.aggs.metric('max_score', 'max', field='score') - - d = { - 'query': {'match_all': {}}, - 'aggs': { - 'per_tag': { - 'terms': {'field': 'f'}, - 'aggs': {'max_score': {'max': {'field': 'score'}}} - } - } - } - - assert d == s.to_dict() - d['aggs']['per_month'] = {"date_histogram": {'field': 'date', 'interval': 'month'}} - assert d == s2.to_dict() - d['aggs']['per_month']['aggs'] = {"max_score": {"max": {"field": 'score'}}} - assert d == s3.to_dict() - d['aggs']['max_score'] = {"max": {"field": 'score'}} - assert d == s4.to_dict() - -def test_search_index(): - s = search.Search(index='i') - assert s._index == ['i'] - s = s.index('i2') - assert s._index == ['i', 'i2'] - s = s.index(u'i3') - assert s._index == ['i', 'i2', 'i3'] - s = s.index() - assert s._index is None - s = search.Search(index=('i', 'i2')) - assert s._index == ['i', 'i2'] - s = search.Search(index=['i', 'i2']) - assert s._index == ['i', 'i2'] - s = search.Search() - s = s.index('i', 'i2') - assert s._index == ['i', 'i2'] - s2 = s.index('i3') - assert s._index == ['i', 'i2'] - assert s2._index == ['i', 'i2', 'i3'] - s = search.Search() - s = s.index(['i', 'i2'], 'i3') - assert s._index == ['i', 'i2', 'i3'] - s2 = s.index('i4') - assert s._index == ['i', 'i2', 'i3'] - assert s2._index == ['i', 'i2', 'i3', 'i4'] - s2 = s.index(['i4']) - assert s2._index == ['i', 'i2', 'i3', 'i4'] - s2 = s.index(('i4', 'i5')) - assert s2._index == ['i', 'i2', 'i3', 'i4', 'i5'] - -def test_doc_type_document_class(): - class MyDocument(Document): - pass - - s = search.Search(doc_type=MyDocument) - assert s._doc_type == [MyDocument] - assert s._doc_type_map == {} - - s = search.Search().doc_type(MyDocument) - assert s._doc_type == [MyDocument] - assert s._doc_type_map == {} - -def test_sort(): - s = search.Search() - s = s.sort('fielda', '-fieldb') - - assert ['fielda', {'fieldb': {'order': 'desc'}}] == s._sort - assert {'sort': ['fielda', {'fieldb': {'order': 'desc'}}]} == s.to_dict() - - s = s.sort() - assert [] == s._sort - assert search.Search().to_dict() == s.to_dict() - -def test_sort_by_score(): - s = search.Search() - s = s.sort('_score') - assert {'sort': ['_score']} == s.to_dict() - - s = search.Search() - with raises(IllegalOperation): - s.sort('-_score') - -def test_slice(): - s = search.Search() - assert {'from': 3, 'size': 7} == s[3:10].to_dict() - assert {'from': 0, 'size': 5} == s[:5].to_dict() - assert {'from': 3, 'size': 10} == s[3:].to_dict() - assert {'from': 0, 'size': 0} == s[0:0].to_dict() - -def test_index(): - s = search.Search() - assert {'from': 3, 'size': 1} == s[3].to_dict() - -def test_search_to_dict(): - s = search.Search() - assert {} == s.to_dict() - - s = s.query('match', f=42) - assert {"query": {"match": {'f': 42}}} == s.to_dict() - - assert {"query": {"match": {'f': 42}}, "size": 10} == s.to_dict(size=10) - - s.aggs.bucket('per_tag', 'terms', field='f').metric('max_score', 'max', field='score') - d = { - 'aggs': { - 'per_tag': { - 'terms': {'field': 'f'}, - 'aggs': {'max_score': {'max': {'field': 'score'}}} - } - }, - 'query': {'match': {'f': 42}} - } - assert d == s.to_dict() - - s = search.Search(extra={"size": 5}) - assert {"size": 5} == s.to_dict() - s = s.extra(from_=42) - assert {"size": 5, "from": 42} == s.to_dict() - - -def test_complex_example(): - s = search.Search() - s = s.query('match', title='python') \ - .query(~Q('match', title='ruby')) \ - .filter(Q('term', category='meetup') | Q('term', category='conference')) \ - .post_filter('terms', tags=['prague', 'czech']) \ - .script_fields(more_attendees="doc['attendees'].value + 42") - - s.aggs.bucket('per_country', 'terms', field='country')\ - .metric('avg_attendees', 'avg', field='attendees') - - s.query.minimum_should_match = 2 - - s = s.highlight_options(order='score').highlight('title', 'body', fragment_size=50) - - assert { - 'query': { - 'bool': { - 'filter': [ - { - 'bool': { - 'should': [ - {'term': {'category': 'meetup'}}, - {'term': {'category': 'conference'}} - ] - } - } - ], - 'must': [ {'match': {'title': 'python'}}], - 'must_not': [{'match': {'title': 'ruby'}}], - 'minimum_should_match': 2 - } - }, - 'post_filter': { - 'terms': {'tags': ['prague', 'czech']} - }, - 'aggs': { - 'per_country': { - 'terms': {'field': 'country'}, - 'aggs': { - 'avg_attendees': {'avg': {'field': 'attendees'}} - } - } - }, - "highlight": { - 'order': 'score', - 'fields': { - 'title': {'fragment_size': 50}, - 'body': {'fragment_size': 50} - } - }, - 'script_fields': { - 'more_attendees': {'script': "doc['attendees'].value + 42"} - } - } == s.to_dict() - -def test_reverse(): - d = { - 'query': { - 'filtered': { - 'filter': { - 'bool': { - 'should': [ - {'term': {'category': 'meetup'}}, - {'term': {'category': 'conference'}} - ] - } - }, - 'query': { - 'bool': { - 'must': [ {'match': {'title': 'python'}}], - 'must_not': [{'match': {'title': 'ruby'}}], - 'minimum_should_match': 2 - } - } - } - }, - 'post_filter': { - 'bool': {'must': [{'terms': {'tags': ['prague', 'czech']}}]} - }, - 'aggs': { - 'per_country': { - 'terms': {'field': 'country'}, - 'aggs': { - 'avg_attendees': {'avg': {'field': 'attendees'}} - } - } - }, - "sort": [ - "title", - {"category": {"order": "desc"}}, - "_score" - ], - "size": 5, - "highlight": { - 'order': 'score', - 'fields': { - 'title': {'fragment_size': 50} - } - }, - "suggest": { - "my-title-suggestions-1" : { - "text" : "devloping distibutd saerch engies", - "term" : { - "size" : 3, - "field" : "title" - } - } - }, - 'script_fields': { - 'more_attendees': {'script': "doc['attendees'].value + 42"} - } - } - - d2 = deepcopy(d) - - s = search.Search.from_dict(d) - - # make sure we haven't modified anything in place - assert d == d2 - assert {"size": 5} == s._extra - assert d == s.to_dict() - -def test_from_dict_doesnt_need_query(): - s = search.Search.from_dict({"size": 5}) - - assert { - "size": 5 - } == s.to_dict() - -def test_params_being_passed_to_search(mock_client): - s = search.Search(using='mock') - s = s.params(routing='42') - s.execute() - - mock_client.search.assert_called_once_with( - index=None, - body={}, - routing='42' - ) - -def test_source(): - assert {} == search.Search().source().to_dict() - - assert { - '_source': { - 'includes': ['foo.bar.*'], - 'excludes': ['foo.one'] - } - } == search.Search().source(includes=['foo.bar.*'], excludes=['foo.one']).to_dict() - - assert { - '_source': False - } == search.Search().source(False).to_dict() - - assert { - '_source': ['f1', 'f2'] - } == search.Search().source(includes=['foo.bar.*'], excludes=['foo.one']).source(['f1', 'f2']).to_dict() - -def test_source_on_clone(): - assert { - '_source': { - 'includes': ['foo.bar.*'], - 'excludes': ['foo.one'] - }, - 'query': { - 'bool': { - 'filter': [{'term': {'title': 'python'}}], - } - } - } == search.Search().source(includes=['foo.bar.*']).\ - source(excludes=['foo.one']).\ - filter('term', title='python').to_dict()\ - - assert {'_source': False, - 'query': { - 'bool': { - 'filter': [{'term': {'title': 'python'}}], - } - }} == search.Search().source( - False).filter('term', title='python').to_dict() - -def test_source_on_clear(): - assert { - } == search.Search().source(includes=['foo.bar.*']).\ - source(includes=None, excludes=None).to_dict() - -def test_suggest_accepts_global_text(): - s = search.Search.from_dict({ - "suggest" : { - "text" : "the amsterdma meetpu", - "my-suggest-1" : { - "term" : {"field" : "title"} - }, - "my-suggest-2" : { - "text": "other", - "term" : {"field" : "body"} - } - } - }) - - assert { - 'suggest': { - 'my-suggest-1': { - 'term': {'field': 'title'}, - 'text': 'the amsterdma meetpu' - }, - 'my-suggest-2': { - 'term': {'field': 'body'}, - 'text': 'other'} - } - } == s.to_dict() - -def test_suggest(): - s = search.Search() - s = s.suggest('my_suggestion', 'pyhton', term={'field': 'title'}) - - assert { - 'suggest': { - 'my_suggestion': { - 'term': {'field': 'title'}, - 'text': 'pyhton' - } - } - } == s.to_dict() - -def test_exclude(): - s = search.Search() - s = s.exclude('match', title='python') - - assert { - 'query': { - 'bool': { - 'filter': [{ - 'bool': { - 'must_not': [{ - 'match': { - 'title': 'python' - } - }] - } - }] - } - } - } == s.to_dict() - -def test_delete_by_query(mock_client): - s = search.Search(using='mock') \ - .query("match", lang="java") - s.delete() - - mock_client.delete_by_query.assert_called_once_with( - index=None, - body={"query": {"match": {"lang": "java"}}} - ) - -def test_update_from_dict(): - s = search.Search() - s.update_from_dict({"indices_boost": [{"important-documents": 2}]}) - s.update_from_dict({"_source": ["id", "name"]}) - - assert { - 'indices_boost': [{ - 'important-documents': 2 - }], - '_source': [ - 'id', - 'name' - ] - } == s.to_dict() diff --git a/test_elasticsearch_dsl/test_update_by_query.py b/test_elasticsearch_dsl/test_update_by_query.py deleted file mode 100644 index a3c687e7a..000000000 --- a/test_elasticsearch_dsl/test_update_by_query.py +++ /dev/null @@ -1,149 +0,0 @@ -from copy import deepcopy - -from elasticsearch_dsl import UpdateByQuery, query, Q, Document - -def test_ubq_starts_with_no_query(): - ubq = UpdateByQuery() - - assert ubq.query._proxied is None - -def test_ubq_to_dict(): - ubq = UpdateByQuery() - assert {} == ubq.to_dict() - - ubq = ubq.query('match', f=42) - assert {"query": {"match": {'f': 42}}} == ubq.to_dict() - - assert {"query": {"match": {'f': 42}}, "size": 10} == ubq.to_dict(size=10) - - ubq = UpdateByQuery(extra={"size": 5}) - assert {"size": 5} == ubq.to_dict() - -def test_complex_example(): - ubq = UpdateByQuery() - ubq = ubq.query('match', title='python') \ - .query(~Q('match', title='ruby')) \ - .filter(Q('term', category='meetup') | Q('term', category='conference')) \ - .script(source='ctx._source.likes += params.f', lang='painless', params={'f': 3}) - - ubq.query.minimum_should_match = 2 - assert { - 'query': { - 'bool': { - 'filter': [ - { - 'bool': { - 'should': [ - {'term': {'category': 'meetup'}}, - {'term': {'category': 'conference'}} - ] - } - } - ], - 'must': [ {'match': {'title': 'python'}}], - 'must_not': [{'match': {'title': 'ruby'}}], - 'minimum_should_match': 2 - } - }, - 'script': { - 'source': 'ctx._source.likes += params.f', - 'lang': 'painless', - 'params': { - 'f': 3 - } - } - } == ubq.to_dict() - -def test_exclude(): - ubq = UpdateByQuery() - ubq = ubq.exclude('match', title='python') - - assert { - 'query': { - 'bool': { - 'filter': [{ - 'bool': { - 'must_not': [{ - 'match': { - 'title': 'python' - } - }] - } - }] - } - } - } == ubq.to_dict() - -def test_reverse(): - d = { - 'query': { - 'filtered': { - 'filter': { - 'bool': { - 'should': [ - {'term': {'category': 'meetup'}}, - {'term': {'category': 'conference'}} - ] - } - }, - 'query': { - 'bool': { - 'must': [ {'match': {'title': 'python'}}], - 'must_not': [{'match': {'title': 'ruby'}}], - 'minimum_should_match': 2 - } - } - } - }, - 'script': { - 'source': 'ctx._source.likes += params.f', - 'lang': 'painless', - 'params': { - 'f': 3 - } - } - } - - d2 = deepcopy(d) - - ubq = UpdateByQuery.from_dict(d) - - assert d == d2 - assert d == ubq.to_dict() - -def test_from_dict_doesnt_need_query(): - ubq = UpdateByQuery.from_dict({'script': {'source': 'test'}}) - - assert { - 'script': {'source': 'test'} - } == ubq.to_dict() - -def test_params_being_passed_to_search(mock_client): - ubq = UpdateByQuery(using='mock') - ubq = ubq.params(routing='42') - ubq.execute() - - mock_client.update_by_query.assert_called_once_with( - index=None, - body={}, - routing='42' - ) - -def test_overwrite_script(): - ubq = UpdateByQuery() - ubq = ubq.script(source='ctx._source.likes += params.f', lang='painless', params={'f': 3}) - assert { - 'script': { - 'source': 'ctx._source.likes += params.f', - 'lang': 'painless', - 'params': { - 'f': 3 - } - } - } == ubq.to_dict() - ubq = ubq.script(source='ctx._source.likes++') - assert { - 'script': { - 'source': 'ctx._source.likes++' - } - } == ubq.to_dict() diff --git a/test_elasticsearch_dsl/test_utils.py b/test_elasticsearch_dsl/test_utils.py deleted file mode 100644 index 155b29f65..000000000 --- a/test_elasticsearch_dsl/test_utils.py +++ /dev/null @@ -1,66 +0,0 @@ -import pickle - -from elasticsearch_dsl import utils, serializer - -from pytest import raises - -def test_attrdict_pickle(): - ad = utils.AttrDict({}) - - pickled_ad = pickle.dumps(ad) - assert ad == pickle.loads(pickled_ad) - - -def test_attrlist_pickle(): - al = utils.AttrList([]) - - pickled_al = pickle.dumps(al) - assert al == pickle.loads(pickled_al) - -def test_attrlist_slice(): - class MyAttrDict(utils.AttrDict): - pass - - l = utils.AttrList([{}, {}], obj_wrapper=MyAttrDict) - assert isinstance(l[:][0], MyAttrDict) - -def test_merge(): - a = utils.AttrDict({'a': {'b': 42, 'c': 47}}) - b = {'a': {'b': 123, 'd': -12}, 'e': [1, 2, 3]} - - utils.merge(a, b) - - assert a == {'a': {'b': 123, 'c': 47, 'd': -12}, 'e': [1, 2, 3]} - -def test_merge_conflict(): - for d in ({'a': 42}, {'a': {'b': 47}},): - utils.merge({'a': {'b': 42}}, d) - with raises(ValueError): - utils.merge({'a': {'b': 42}}, d, True) - -def test_attrdict_bool(): - d = utils.AttrDict({}) - - assert not d - d.title = 'Title' - assert d - -def test_attrlist_items_get_wrapped_during_iteration(): - al = utils.AttrList([1, object(), [1], {}]) - - l = list(iter(al)) - - assert isinstance(l[2], utils.AttrList) - assert isinstance(l[3], utils.AttrDict) - -def test_serializer_deals_with_Attr_versions(): - d = utils.AttrDict({'key': utils.AttrList([1, 2, 3])}) - - assert serializer.serializer.dumps(d) == serializer.serializer.dumps({'key': [1, 2, 3]}) - -def test_serializer_deals_with_objects_with_to_dict(): - class MyClass(object): - def to_dict(self): - return 42 - - assert serializer.serializer.dumps(MyClass()) == '42' diff --git a/test_elasticsearch_dsl/test_wrappers.py b/test_elasticsearch_dsl/test_wrappers.py deleted file mode 100644 index 4ebbd9076..000000000 --- a/test_elasticsearch_dsl/test_wrappers.py +++ /dev/null @@ -1,57 +0,0 @@ -from datetime import datetime, timedelta - -from elasticsearch_dsl import Range - -import pytest - -@pytest.mark.parametrize('kwargs, item', [ - ({}, 1), - ({}, -1), - ({'gte': -1}, -1), - ({'lte': 4}, 4), - ({'lte': 4, 'gte': 2}, 4), - ({'lte': 4, 'gte': 2}, 2), - ({'gt': datetime.now() - timedelta(seconds=10)}, datetime.now()) - -]) -def test_range_contains(kwargs, item): - assert item in Range(**kwargs) - -@pytest.mark.parametrize('kwargs, item', [ - ({'gt': -1}, -1), - ({'lt': 4}, 4), - ({'lt': 4}, 42), - ({'lte': 4, 'gte': 2}, 1), - ({'lte': datetime.now() - timedelta(seconds=10)}, datetime.now()) -]) -def test_range_not_contains(kwargs, item): - assert item not in Range(**kwargs) - -@pytest.mark.parametrize('args,kwargs', [ - (({}, ), {'lt': 42}), - ((), {'not_lt': 42}), - ((object(),), {}), - ((), {'lt': 1, 'lte': 1}), - ((), {'gt': 1, 'gte': 1}), -]) -def test_range_raises_value_error_on_wrong_params(args, kwargs): - with pytest.raises(ValueError): - Range(*args, **kwargs) - -@pytest.mark.parametrize('range,lower,inclusive', [ - (Range(gt=1), 1, False), - (Range(gte=1), 1, True), - (Range(), None, False), - (Range(lt=42), None, False), -]) -def test_range_lower(range, lower, inclusive): - assert (lower, inclusive) == range.lower - -@pytest.mark.parametrize('range,upper,inclusive', [ - (Range(lt=1), 1, False), - (Range(lte=1), 1, True), - (Range(), None, False), - (Range(gt=42), None, False), -]) -def test_range_upper(range, upper, inclusive): - assert (upper, inclusive) == range.upper diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 000000000..2a87d183f --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,16 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 000000000..b7326c3fb --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,324 @@ +# -*- coding: utf-8 -*- +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +import os +import re +from datetime import datetime + +from elasticsearch.helpers import bulk +from elasticsearch.helpers.test import SkipTest, get_test_client +from mock import Mock +from pytest import fixture, skip + +from elasticsearch_dsl.connections import add_connection, connections + +from .test_integration.test_data import ( + DATA, + FLAT_DATA, + TEST_GIT_DATA, + create_flat_git_index, + create_git_index, +) +from .test_integration.test_document import Comment, History, PullRequest, User + + +@fixture(scope="session") +def client(): + try: + connection = get_test_client(nowait="WAIT_FOR_ES" not in os.environ) + add_connection("default", connection) + return connection + except SkipTest: + skip() + + +@fixture(scope="session") +def es_version(client): + info = client.info() + print(info) + yield tuple( + int(x) + for x in re.match(r"^([0-9.]+)", info["version"]["number"]).group(1).split(".") + ) + + +@fixture +def write_client(client): + yield client + client.indices.delete("test-*", ignore=404) + client.indices.delete_template("test-template", ignore=404) + + +@fixture +def mock_client(dummy_response): + client = Mock() + client.search.return_value = dummy_response + add_connection("mock", client) + yield client + connections._conn = {} + connections._kwargs = {} + + +@fixture(scope="session") +def data_client(client): + # create mappings + create_git_index(client, "git") + create_flat_git_index(client, "flat-git") + # load data + bulk(client, DATA, raise_on_error=True, refresh=True) + bulk(client, FLAT_DATA, raise_on_error=True, refresh=True) + yield client + client.indices.delete("git") + client.indices.delete("flat-git") + + +@fixture +def dummy_response(): + return { + "_shards": {"failed": 0, "successful": 10, "total": 10}, + "hits": { + "hits": [ + { + "_index": "test-index", + "_type": "company", + "_id": "elasticsearch", + "_score": 12.0, + "_source": {"city": "Amsterdam", "name": "Elasticsearch"}, + }, + { + "_index": "test-index", + "_type": "employee", + "_id": "42", + "_score": 11.123, + "_routing": "elasticsearch", + "_source": { + "name": {"first": "Shay", "last": "Bannon"}, + "lang": "java", + "twitter": "kimchy", + }, + }, + { + "_index": "test-index", + "_type": "employee", + "_id": "47", + "_score": 1, + "_routing": "elasticsearch", + "_source": { + "name": {"first": "Honza", "last": "Král"}, + "lang": "python", + "twitter": "honzakral", + }, + }, + { + "_index": "test-index", + "_type": "employee", + "_id": "53", + "_score": 16.0, + "_routing": "elasticsearch", + }, + ], + "max_score": 12.0, + "total": 123, + }, + "timed_out": False, + "took": 123, + } + + +@fixture +def aggs_search(): + from elasticsearch_dsl import Search + + s = Search(index="flat-git") + s.aggs.bucket("popular_files", "terms", field="files", size=2).metric( + "line_stats", "stats", field="stats.lines" + ).metric("top_commits", "top_hits", size=2, _source=["stats.*", "committed_date"]) + s.aggs.bucket( + "per_month", "date_histogram", interval="month", field="info.committed_date" + ) + s.aggs.metric("sum_lines", "sum", field="stats.lines") + return s + + +@fixture +def aggs_data(): + return { + "took": 4, + "timed_out": False, + "_shards": {"total": 1, "successful": 1, "failed": 0}, + "hits": {"total": 52, "hits": [], "max_score": 0.0}, + "aggregations": { + "sum_lines": {"value": 25052.0}, + "per_month": { + "buckets": [ + { + "doc_count": 38, + "key": 1393632000000, + "key_as_string": "2014-03-01T00:00:00.000Z", + }, + { + "doc_count": 11, + "key": 1396310400000, + "key_as_string": "2014-04-01T00:00:00.000Z", + }, + { + "doc_count": 3, + "key": 1398902400000, + "key_as_string": "2014-05-01T00:00:00.000Z", + }, + ] + }, + "popular_files": { + "buckets": [ + { + "key": "elasticsearch_dsl", + "line_stats": { + "count": 40, + "max": 228.0, + "min": 2.0, + "sum": 2151.0, + "avg": 53.775, + }, + "doc_count": 40, + "top_commits": { + "hits": { + "total": 40, + "hits": [ + { + "_id": "3ca6e1e73a071a705b4babd2f581c91a2a3e5037", + "_type": "doc", + "_source": { + "stats": { + "files": 4, + "deletions": 7, + "lines": 30, + "insertions": 23, + }, + "committed_date": "2014-05-02T13:47:19", + }, + "_score": 1.0, + "_index": "flat-git", + }, + { + "_id": "eb3e543323f189fd7b698e66295427204fff5755", + "_type": "doc", + "_source": { + "stats": { + "files": 1, + "deletions": 0, + "lines": 18, + "insertions": 18, + }, + "committed_date": "2014-05-01T13:32:14", + }, + "_score": 1.0, + "_index": "flat-git", + }, + ], + "max_score": 1.0, + } + }, + }, + { + "key": "test_elasticsearch_dsl", + "line_stats": { + "count": 35, + "max": 228.0, + "min": 2.0, + "sum": 1939.0, + "avg": 55.4, + }, + "doc_count": 35, + "top_commits": { + "hits": { + "total": 35, + "hits": [ + { + "_id": "3ca6e1e73a071a705b4babd2f581c91a2a3e5037", + "_type": "doc", + "_source": { + "stats": { + "files": 4, + "deletions": 7, + "lines": 30, + "insertions": 23, + }, + "committed_date": "2014-05-02T13:47:19", + }, + "_score": 1.0, + "_index": "flat-git", + }, + { + "_id": "dd15b6ba17dd9ba16363a51f85b31f66f1fb1157", + "_type": "doc", + "_source": { + "stats": { + "files": 3, + "deletions": 18, + "lines": 62, + "insertions": 44, + }, + "committed_date": "2014-05-01T13:30:44", + }, + "_score": 1.0, + "_index": "flat-git", + }, + ], + "max_score": 1.0, + } + }, + }, + ], + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 120, + }, + }, + } + + +@fixture +def pull_request(write_client): + PullRequest.init() + pr = PullRequest( + _id=42, + comments=[ + Comment( + content="Hello World!", + author=User(name="honzakral"), + created_at=datetime(2018, 1, 9, 10, 17, 3, 21184), + history=[ + History( + timestamp=datetime(2012, 1, 1), + diff="-Ahoj Svete!\n+Hello World!", + ) + ], + ), + ], + created_at=datetime(2018, 1, 9, 9, 17, 3, 21184), + ) + pr.save(refresh=True) + return pr + + +@fixture +def setup_ubq_tests(client): + index = "test-git" + create_git_index(client, index) + bulk(client, TEST_GIT_DATA, raise_on_error=True, refresh=True) + return index diff --git a/tests/test_aggs.py b/tests/test_aggs.py new file mode 100644 index 000000000..6c39383a3 --- /dev/null +++ b/tests/test_aggs.py @@ -0,0 +1,356 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pytest import raises + +from elasticsearch_dsl import aggs, query + + +def test_repr(): + max_score = aggs.Max(field="score") + a = aggs.A("terms", field="tags", aggs={"max_score": max_score}) + + assert "Terms(aggs={'max_score': Max(field='score')}, field='tags')" == repr(a) + + +def test_meta(): + max_score = aggs.Max(field="score") + a = aggs.A( + "terms", field="tags", aggs={"max_score": max_score}, meta={"some": "metadata"} + ) + + assert { + "terms": {"field": "tags"}, + "aggs": {"max_score": {"max": {"field": "score"}}}, + "meta": {"some": "metadata"}, + } == a.to_dict() + + +def test_meta_from_dict(): + max_score = aggs.Max(field="score") + a = aggs.A( + "terms", field="tags", aggs={"max_score": max_score}, meta={"some": "metadata"} + ) + + assert aggs.A(a.to_dict()) == a + + +def test_A_creates_proper_agg(): + a = aggs.A("terms", field="tags") + + assert isinstance(a, aggs.Terms) + assert a._params == {"field": "tags"} + + +def test_A_handles_nested_aggs_properly(): + max_score = aggs.Max(field="score") + a = aggs.A("terms", field="tags", aggs={"max_score": max_score}) + + assert isinstance(a, aggs.Terms) + assert a._params == {"field": "tags", "aggs": {"max_score": max_score}} + + +def test_A_passes_aggs_through(): + a = aggs.A("terms", field="tags") + assert aggs.A(a) is a + + +def test_A_from_dict(): + d = { + "terms": {"field": "tags"}, + "aggs": {"per_author": {"terms": {"field": "author.raw"}}}, + } + a = aggs.A(d) + + assert isinstance(a, aggs.Terms) + assert a._params == { + "field": "tags", + "aggs": {"per_author": aggs.A("terms", field="author.raw")}, + } + assert a["per_author"] == aggs.A("terms", field="author.raw") + assert a.aggs.per_author == aggs.A("terms", field="author.raw") + + +def test_A_fails_with_incorrect_dict(): + correct_d = { + "terms": {"field": "tags"}, + "aggs": {"per_author": {"terms": {"field": "author.raw"}}}, + } + + with raises(Exception): + aggs.A(correct_d, field="f") + + d = correct_d.copy() + del d["terms"] + with raises(Exception): + aggs.A(d) + + d = correct_d.copy() + d["xx"] = {} + with raises(Exception): + aggs.A(d) + + +def test_A_fails_with_agg_and_params(): + a = aggs.A("terms", field="tags") + + with raises(Exception): + aggs.A(a, field="score") + + +def test_buckets_are_nestable(): + a = aggs.Terms(field="tags") + b = a.bucket("per_author", "terms", field="author.raw") + + assert isinstance(b, aggs.Terms) + assert b._params == {"field": "author.raw"} + assert a.aggs == {"per_author": b} + + +def test_metric_inside_buckets(): + a = aggs.Terms(field="tags") + b = a.metric("max_score", "max", field="score") + + # returns bucket so it's chainable + assert a is b + assert a.aggs["max_score"] == aggs.Max(field="score") + + +def test_buckets_equals_counts_subaggs(): + a = aggs.Terms(field="tags") + a.bucket("per_author", "terms", field="author.raw") + b = aggs.Terms(field="tags") + + assert a != b + + +def test_buckets_to_dict(): + a = aggs.Terms(field="tags") + a.bucket("per_author", "terms", field="author.raw") + + assert { + "terms": {"field": "tags"}, + "aggs": {"per_author": {"terms": {"field": "author.raw"}}}, + } == a.to_dict() + + a = aggs.Terms(field="tags") + a.metric("max_score", "max", field="score") + + assert { + "terms": {"field": "tags"}, + "aggs": {"max_score": {"max": {"field": "score"}}}, + } == a.to_dict() + + +def test_nested_buckets_are_reachable_as_getitem(): + a = aggs.Terms(field="tags") + b = a.bucket("per_author", "terms", field="author.raw") + + assert a["per_author"] is not b + assert a["per_author"] == b + + +def test_nested_buckets_are_settable_as_getitem(): + a = aggs.Terms(field="tags") + b = a["per_author"] = aggs.A("terms", field="author.raw") + + assert a.aggs["per_author"] is b + + +def test_filter_can_be_instantiated_using_positional_args(): + a = aggs.Filter(query.Q("term", f=42)) + + assert {"filter": {"term": {"f": 42}}} == a.to_dict() + + assert a == aggs.A("filter", query.Q("term", f=42)) + + +def test_filter_aggregation_as_nested_agg(): + a = aggs.Terms(field="tags") + a.bucket("filtered", "filter", query.Q("term", f=42)) + + assert { + "terms": {"field": "tags"}, + "aggs": {"filtered": {"filter": {"term": {"f": 42}}}}, + } == a.to_dict() + + +def test_filter_aggregation_with_nested_aggs(): + a = aggs.Filter(query.Q("term", f=42)) + a.bucket("testing", "terms", field="tags") + + assert { + "filter": {"term": {"f": 42}}, + "aggs": {"testing": {"terms": {"field": "tags"}}}, + } == a.to_dict() + + +def test_filters_correctly_identifies_the_hash(): + a = aggs.A( + "filters", + filters={ + "group_a": {"term": {"group": "a"}}, + "group_b": {"term": {"group": "b"}}, + }, + ) + + assert { + "filters": { + "filters": { + "group_a": {"term": {"group": "a"}}, + "group_b": {"term": {"group": "b"}}, + } + } + } == a.to_dict() + assert a.filters.group_a == query.Q("term", group="a") + + +def test_bucket_sort_agg(): + bucket_sort_agg = aggs.BucketSort(sort=[{"total_sales": {"order": "desc"}}], size=3) + assert bucket_sort_agg.to_dict() == { + "bucket_sort": {"sort": [{"total_sales": {"order": "desc"}}], "size": 3} + } + + a = aggs.DateHistogram(field="date", interval="month") + a.bucket("total_sales", "sum", field="price") + a.bucket( + "sales_bucket_sort", + "bucket_sort", + sort=[{"total_sales": {"order": "desc"}}], + size=3, + ) + assert { + "date_histogram": {"field": "date", "interval": "month"}, + "aggs": { + "total_sales": {"sum": {"field": "price"}}, + "sales_bucket_sort": { + "bucket_sort": {"sort": [{"total_sales": {"order": "desc"}}], "size": 3} + }, + }, + } == a.to_dict() + + +def test_bucket_sort_agg_only_trnunc(): + bucket_sort_agg = aggs.BucketSort(**{"from": 1, "size": 1}) + assert bucket_sort_agg.to_dict() == {"bucket_sort": {"from": 1, "size": 1}} + + a = aggs.DateHistogram(field="date", interval="month") + a.bucket("bucket_truncate", "bucket_sort", **{"from": 1, "size": 1}) + assert { + "date_histogram": {"field": "date", "interval": "month"}, + "aggs": {"bucket_truncate": {"bucket_sort": {"from": 1, "size": 1}}}, + } == a.to_dict() + + +def test_geohash_grid_aggregation(): + a = aggs.GeohashGrid(**{"field": "centroid", "precision": 3}) + + assert {"geohash_grid": {"field": "centroid", "precision": 3}} == a.to_dict() + + +def test_geotile_grid_aggregation(): + a = aggs.GeotileGrid(**{"field": "centroid", "precision": 3}) + + assert {"geotile_grid": {"field": "centroid", "precision": 3}} == a.to_dict() + + +def test_boxplot_aggregation(): + a = aggs.Boxplot(field="load_time") + + assert {"boxplot": {"field": "load_time"}} == a.to_dict() + + +def test_rare_terms_aggregation(): + a = aggs.RareTerms(field="the-field") + a.bucket("total_sales", "sum", field="price") + a.bucket( + "sales_bucket_sort", + "bucket_sort", + sort=[{"total_sales": {"order": "desc"}}], + size=3, + ) + + assert { + "aggs": { + "sales_bucket_sort": { + "bucket_sort": {"size": 3, "sort": [{"total_sales": {"order": "desc"}}]} + }, + "total_sales": {"sum": {"field": "price"}}, + }, + "rare_terms": {"field": "the-field"}, + } == a.to_dict() + + +def test_variable_width_histogram_aggregation(): + a = aggs.VariableWidthHistogram(field="price", buckets=2) + assert {"variable_width_histogram": {"buckets": 2, "field": "price"}} == a.to_dict() + + +def test_median_absolute_deviation_aggregation(): + a = aggs.MedianAbsoluteDeviation(field="rating") + + assert {"median_absolute_deviation": {"field": "rating"}} == a.to_dict() + + +def test_t_test_aggregation(): + a = aggs.TTest( + a={"field": "startup_time_before"}, + b={"field": "startup_time_after"}, + type="paired", + ) + + assert { + "t_test": { + "a": {"field": "startup_time_before"}, + "b": {"field": "startup_time_after"}, + "type": "paired", + } + } == a.to_dict() + + +def test_inference_aggregation(): + a = aggs.Inference(model_id="model-id", buckets_path={"agg_name": "agg_name"}) + assert { + "inference": {"buckets_path": {"agg_name": "agg_name"}, "model_id": "model-id"} + } == a.to_dict() + + +def test_moving_percentiles_aggregation(): + a = aggs.DateHistogram() + a.bucket("the_percentile", "percentiles", field="price", percents=[1.0, 99.0]) + a.pipeline( + "the_movperc", "moving_percentiles", buckets_path="the_percentile", window=10 + ) + + assert { + "aggs": { + "the_movperc": { + "moving_percentiles": {"buckets_path": "the_percentile", "window": 10} + }, + "the_percentile": { + "percentiles": {"field": "price", "percents": [1.0, 99.0]} + }, + }, + "date_histogram": {}, + } == a.to_dict() + + +def test_normalize_aggregation(): + a = aggs.Normalize(buckets_path="normalized", method="percent_of_sum") + assert { + "normalize": {"buckets_path": "normalized", "method": "percent_of_sum"} + } == a.to_dict() diff --git a/tests/test_analysis.py b/tests/test_analysis.py new file mode 100644 index 000000000..90445502b --- /dev/null +++ b/tests/test_analysis.py @@ -0,0 +1,217 @@ +# -*- coding: utf-8 -*- +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pytest import raises + +from elasticsearch_dsl import analysis + + +def test_analyzer_serializes_as_name(): + a = analysis.analyzer("my_analyzer") + + assert "my_analyzer" == a.to_dict() + + +def test_analyzer_has_definition(): + a = analysis.CustomAnalyzer( + "my_analyzer", tokenizer="keyword", filter=["lowercase"] + ) + + assert { + "type": "custom", + "tokenizer": "keyword", + "filter": ["lowercase"], + } == a.get_definition() + + +def test_simple_multiplexer_filter(): + a = analysis.analyzer( + "my_analyzer", + tokenizer="keyword", + filter=[ + analysis.token_filter( + "my_multi", "multiplexer", filters=["lowercase", "lowercase, stop"] + ) + ], + ) + + assert { + "analyzer": { + "my_analyzer": { + "filter": ["my_multi"], + "tokenizer": "keyword", + "type": "custom", + } + }, + "filter": { + "my_multi": { + "filters": ["lowercase", "lowercase, stop"], + "type": "multiplexer", + } + }, + } == a.get_analysis_definition() + + +def test_multiplexer_with_custom_filter(): + a = analysis.analyzer( + "my_analyzer", + tokenizer="keyword", + filter=[ + analysis.token_filter( + "my_multi", + "multiplexer", + filters=[ + [analysis.token_filter("en", "snowball", language="English")], + "lowercase, stop", + ], + ) + ], + ) + + assert { + "analyzer": { + "my_analyzer": { + "filter": ["my_multi"], + "tokenizer": "keyword", + "type": "custom", + } + }, + "filter": { + "en": {"type": "snowball", "language": "English"}, + "my_multi": {"filters": ["en", "lowercase, stop"], "type": "multiplexer"}, + }, + } == a.get_analysis_definition() + + +def test_conditional_token_filter(): + a = analysis.analyzer( + "my_cond", + tokenizer=analysis.tokenizer("keyword"), + filter=[ + analysis.token_filter( + "testing", + "condition", + script={"source": "return true"}, + filter=[ + "lowercase", + analysis.token_filter("en", "snowball", language="English"), + ], + ), + "stop", + ], + ) + + assert { + "analyzer": { + "my_cond": { + "filter": ["testing", "stop"], + "tokenizer": "keyword", + "type": "custom", + } + }, + "filter": { + "en": {"language": "English", "type": "snowball"}, + "testing": { + "script": {"source": "return true"}, + "filter": ["lowercase", "en"], + "type": "condition", + }, + }, + } == a.get_analysis_definition() + + +def test_conflicting_nested_filters_cause_error(): + a = analysis.analyzer( + "my_cond", + tokenizer=analysis.tokenizer("keyword"), + filter=[ + analysis.token_filter("en", "stemmer", language="english"), + analysis.token_filter( + "testing", + "condition", + script={"source": "return true"}, + filter=[ + "lowercase", + analysis.token_filter("en", "snowball", language="English"), + ], + ), + ], + ) + + with raises(ValueError): + a.get_analysis_definition() + + +def test_normalizer_serializes_as_name(): + n = analysis.normalizer("my_normalizer") + + assert "my_normalizer" == n.to_dict() + + +def test_normalizer_has_definition(): + n = analysis.CustomNormalizer( + "my_normalizer", filter=["lowercase", "asciifolding"], char_filter=["quote"] + ) + + assert { + "type": "custom", + "filter": ["lowercase", "asciifolding"], + "char_filter": ["quote"], + } == n.get_definition() + + +def test_tokenizer(): + t = analysis.tokenizer("trigram", "nGram", min_gram=3, max_gram=3) + + assert t.to_dict() == "trigram" + assert {"type": "nGram", "min_gram": 3, "max_gram": 3} == t.get_definition() + + +def test_custom_analyzer_can_collect_custom_items(): + trigram = analysis.tokenizer("trigram", "nGram", min_gram=3, max_gram=3) + my_stop = analysis.token_filter("my_stop", "stop", stopwords=["a", "b"]) + umlauts = analysis.char_filter("umlauts", "pattern_replace", mappings=["ü=>ue"]) + a = analysis.analyzer( + "my_analyzer", + tokenizer=trigram, + filter=["lowercase", my_stop], + char_filter=["html_strip", umlauts], + ) + + assert a.to_dict() == "my_analyzer" + assert { + "analyzer": { + "my_analyzer": { + "type": "custom", + "tokenizer": "trigram", + "filter": ["lowercase", "my_stop"], + "char_filter": ["html_strip", "umlauts"], + } + }, + "tokenizer": {"trigram": trigram.get_definition()}, + "filter": {"my_stop": my_stop.get_definition()}, + "char_filter": {"umlauts": umlauts.get_definition()}, + } == a.get_analysis_definition() + + +def test_stemmer_analyzer_can_pass_name(): + t = analysis.token_filter( + "my_english_filter", name="minimal_english", type="stemmer" + ) + assert t.to_dict() == "my_english_filter" + assert {"type": "stemmer", "name": "minimal_english"} == t.get_definition() diff --git a/tests/test_connections.py b/tests/test_connections.py new file mode 100644 index 000000000..278760cc3 --- /dev/null +++ b/tests/test_connections.py @@ -0,0 +1,91 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from elasticsearch import Elasticsearch +from pytest import raises + +from elasticsearch_dsl import connections, serializer + + +def test_default_connection_is_returned_by_default(): + c = connections.Connections() + + con, con2 = object(), object() + c.add_connection("default", con) + + c.add_connection("not-default", con2) + + assert c.get_connection() is con + + +def test_get_connection_created_connection_if_needed(): + c = connections.Connections() + c.configure(default={"hosts": ["es.com"]}, local={"hosts": ["localhost"]}) + + default = c.get_connection() + local = c.get_connection("local") + + assert isinstance(default, Elasticsearch) + assert isinstance(local, Elasticsearch) + + assert [{"host": "es.com"}] == default.transport.hosts + assert [{"host": "localhost"}] == local.transport.hosts + + +def test_configure_preserves_unchanged_connections(): + c = connections.Connections() + + c.configure(default={"hosts": ["es.com"]}, local={"hosts": ["localhost"]}) + default = c.get_connection() + local = c.get_connection("local") + + c.configure(default={"hosts": ["not-es.com"]}, local={"hosts": ["localhost"]}) + new_default = c.get_connection() + new_local = c.get_connection("local") + + assert new_local is local + assert new_default is not default + + +def test_remove_connection_removes_both_conn_and_conf(): + c = connections.Connections() + + c.configure(default={"hosts": ["es.com"]}, local={"hosts": ["localhost"]}) + c.add_connection("local2", object()) + + c.remove_connection("default") + c.get_connection("local2") + c.remove_connection("local2") + + with raises(Exception): + c.get_connection("local2") + c.get_connection("default") + + +def test_create_connection_constructs_client(): + c = connections.Connections() + c.create_connection("testing", hosts=["es.com"]) + + con = c.get_connection("testing") + assert [{"host": "es.com"}] == con.transport.hosts + + +def test_create_connection_adds_our_serializer(): + c = connections.Connections() + c.create_connection("testing", hosts=["es.com"]) + + assert c.get_connection("testing").transport.serializer is serializer.serializer diff --git a/tests/test_document.py b/tests/test_document.py new file mode 100644 index 000000000..5e34f0dbb --- /dev/null +++ b/tests/test_document.py @@ -0,0 +1,637 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import codecs +import ipaddress +import pickle +from datetime import datetime +from hashlib import md5 + +from pytest import raises + +from elasticsearch_dsl import ( + Index, + InnerDoc, + Mapping, + Range, + analyzer, + document, + field, + utils, +) +from elasticsearch_dsl.exceptions import IllegalOperation, ValidationException + + +class MyInner(InnerDoc): + old_field = field.Text() + + +class MyDoc(document.Document): + title = field.Keyword() + name = field.Text() + created_at = field.Date() + inner = field.Object(MyInner) + + +class MySubDoc(MyDoc): + name = field.Keyword() + + class Index: + name = "default-index" + + +class MyDoc2(document.Document): + extra = field.Long() + + +class MyMultiSubDoc(MyDoc2, MySubDoc): + pass + + +class Comment(document.InnerDoc): + title = field.Text() + tags = field.Keyword(multi=True) + + +class DocWithNested(document.Document): + comments = field.Nested(Comment) + + class Index: + name = "test-doc-with-nested" + + +class SimpleCommit(document.Document): + files = field.Text(multi=True) + + class Index: + name = "test-git" + + +class Secret(str): + pass + + +class SecretField(field.CustomField): + builtin_type = "text" + + def _serialize(self, data): + return codecs.encode(data, "rot_13") + + def _deserialize(self, data): + if isinstance(data, Secret): + return data + return Secret(codecs.decode(data, "rot_13")) + + +class SecretDoc(document.Document): + title = SecretField(index="no") + + class Index: + name = "test-secret-doc" + + +class NestedSecret(document.Document): + secrets = field.Nested(SecretDoc) + + class Index: + name = "test-nested-secret" + + +class OptionalObjectWithRequiredField(document.Document): + comments = field.Nested(properties={"title": field.Keyword(required=True)}) + + class Index: + name = "test-required" + + +class Host(document.Document): + ip = field.Ip() + + class Index: + name = "test-host" + + +def test_range_serializes_properly(): + class D(document.Document): + lr = field.LongRange() + + d = D(lr=Range(lt=42)) + assert 40 in d.lr + assert 47 not in d.lr + assert {"lr": {"lt": 42}} == d.to_dict() + + d = D(lr={"lt": 42}) + assert {"lr": {"lt": 42}} == d.to_dict() + + +def test_range_deserializes_properly(): + class D(document.InnerDoc): + lr = field.LongRange() + + d = D.from_es({"lr": {"lt": 42}}, True) + assert isinstance(d.lr, Range) + assert 40 in d.lr + assert 47 not in d.lr + + +def test_resolve_nested(): + nested, field = NestedSecret._index.resolve_nested("secrets.title") + assert nested == ["secrets"] + assert field is NestedSecret._doc_type.mapping["secrets"]["title"] + + +def test_conflicting_mapping_raises_error_in_index_to_dict(): + class A(document.Document): + name = field.Text() + + class B(document.Document): + name = field.Keyword() + + i = Index("i") + i.document(A) + i.document(B) + + with raises(ValueError): + i.to_dict() + + +def test_ip_address_serializes_properly(): + host = Host(ip=ipaddress.IPv4Address(u"10.0.0.1")) + + assert {"ip": "10.0.0.1"} == host.to_dict() + + +def test_matches_uses_index(): + assert SimpleCommit._matches({"_index": "test-git"}) + assert not SimpleCommit._matches({"_index": "not-test-git"}) + + +def test_matches_with_no_name_always_matches(): + class D(document.Document): + pass + + assert D._matches({}) + assert D._matches({"_index": "whatever"}) + + +def test_matches_accepts_wildcards(): + class MyDoc(document.Document): + class Index: + name = "my-*" + + assert MyDoc._matches({"_index": "my-index"}) + assert not MyDoc._matches({"_index": "not-my-index"}) + + +def test_assigning_attrlist_to_field(): + sc = SimpleCommit() + l = ["README", "README.rst"] + sc.files = utils.AttrList(l) + + assert sc.to_dict()["files"] is l + + +def test_optional_inner_objects_are_not_validated_if_missing(): + d = OptionalObjectWithRequiredField() + + assert d.full_clean() is None + + +def test_custom_field(): + s = SecretDoc(title=Secret("Hello")) + + assert {"title": "Uryyb"} == s.to_dict() + assert s.title == "Hello" + + s = SecretDoc.from_es({"_source": {"title": "Uryyb"}}) + assert s.title == "Hello" + assert isinstance(s.title, Secret) + + +def test_custom_field_mapping(): + assert { + "properties": {"title": {"index": "no", "type": "text"}} + } == SecretDoc._doc_type.mapping.to_dict() + + +def test_custom_field_in_nested(): + s = NestedSecret() + s.secrets.append(SecretDoc(title=Secret("Hello"))) + + assert {"secrets": [{"title": "Uryyb"}]} == s.to_dict() + assert s.secrets[0].title == "Hello" + + +def test_multi_works_after_doc_has_been_saved(): + c = SimpleCommit() + c.full_clean() + c.files.append("setup.py") + + assert c.to_dict() == {"files": ["setup.py"]} + + +def test_multi_works_in_nested_after_doc_has_been_serialized(): + # Issue #359 + c = DocWithNested(comments=[Comment(title="First!")]) + + assert [] == c.comments[0].tags + assert {"comments": [{"title": "First!"}]} == c.to_dict() + assert [] == c.comments[0].tags + + +def test_null_value_for_object(): + d = MyDoc(inner=None) + + assert d.inner is None + + +def test_inherited_doc_types_can_override_index(): + class MyDocDifferentIndex(MySubDoc): + class Index: + name = "not-default-index" + settings = {"number_of_replicas": 0} + aliases = {"a": {}} + analyzers = [analyzer("my_analizer", tokenizer="keyword")] + + assert MyDocDifferentIndex._index._name == "not-default-index" + assert MyDocDifferentIndex()._get_index() == "not-default-index" + assert MyDocDifferentIndex._index.to_dict() == { + "aliases": {"a": {}}, + "mappings": { + "properties": { + "created_at": {"type": "date"}, + "inner": { + "type": "object", + "properties": {"old_field": {"type": "text"}}, + }, + "name": {"type": "keyword"}, + "title": {"type": "keyword"}, + } + }, + "settings": { + "analysis": { + "analyzer": {"my_analizer": {"tokenizer": "keyword", "type": "custom"}} + }, + "number_of_replicas": 0, + }, + } + + +def test_to_dict_with_meta(): + d = MySubDoc(title="hello") + d.meta.routing = "some-parent" + + assert { + "_index": "default-index", + "_routing": "some-parent", + "_source": {"title": "hello"}, + } == d.to_dict(True) + + +def test_to_dict_with_meta_includes_custom_index(): + d = MySubDoc(title="hello") + d.meta.index = "other-index" + + assert {"_index": "other-index", "_source": {"title": "hello"}} == d.to_dict(True) + + +def test_to_dict_without_skip_empty_will_include_empty_fields(): + d = MySubDoc(tags=[], title=None, inner={}) + + assert {} == d.to_dict() + assert {"tags": [], "title": None, "inner": {}} == d.to_dict(skip_empty=False) + + +def test_attribute_can_be_removed(): + d = MyDoc(title="hello") + + del d.title + assert "title" not in d._d_ + + +def test_doc_type_can_be_correctly_pickled(): + d = DocWithNested( + title="Hello World!", comments=[Comment(title="hellp")], meta={"id": 42} + ) + s = pickle.dumps(d) + + d2 = pickle.loads(s) + + assert d2 == d + assert 42 == d2.meta.id + assert "Hello World!" == d2.title + assert [{"title": "hellp"}] == d2.comments + assert isinstance(d2.comments[0], Comment) + + +def test_meta_is_accessible_even_on_empty_doc(): + d = MyDoc() + d.meta + + d = MyDoc(title="aaa") + d.meta + + +def test_meta_field_mapping(): + class User(document.Document): + username = field.Text() + + class Meta: + all = document.MetaField(enabled=False) + _index = document.MetaField(enabled=True) + dynamic = document.MetaField("strict") + dynamic_templates = document.MetaField([42]) + + assert { + "properties": {"username": {"type": "text"}}, + "_all": {"enabled": False}, + "_index": {"enabled": True}, + "dynamic": "strict", + "dynamic_templates": [42], + } == User._doc_type.mapping.to_dict() + + +def test_multi_value_fields(): + class Blog(document.Document): + tags = field.Keyword(multi=True) + + b = Blog() + assert [] == b.tags + b.tags.append("search") + b.tags.append("python") + assert ["search", "python"] == b.tags + + +def test_docs_with_properties(): + class User(document.Document): + pwd_hash = field.Text() + + def check_password(self, pwd): + return md5(pwd).hexdigest() == self.pwd_hash + + @property + def password(self): + raise AttributeError("readonly") + + @password.setter + def password(self, pwd): + self.pwd_hash = md5(pwd).hexdigest() + + u = User(pwd_hash=md5(b"secret").hexdigest()) + assert u.check_password(b"secret") + assert not u.check_password(b"not-secret") + + u.password = b"not-secret" + assert "password" not in u._d_ + assert not u.check_password(b"secret") + assert u.check_password(b"not-secret") + + with raises(AttributeError): + u.password + + +def test_nested_can_be_assigned_to(): + d1 = DocWithNested(comments=[Comment(title="First!")]) + d2 = DocWithNested() + + d2.comments = d1.comments + assert isinstance(d1.comments[0], Comment) + assert d2.comments == [{"title": "First!"}] + assert {"comments": [{"title": "First!"}]} == d2.to_dict() + assert isinstance(d2.comments[0], Comment) + + +def test_nested_can_be_none(): + d = DocWithNested(comments=None, title="Hello World!") + + assert {"title": "Hello World!"} == d.to_dict() + + +def test_nested_defaults_to_list_and_can_be_updated(): + md = DocWithNested() + + assert [] == md.comments + + md.comments.append({"title": "hello World!"}) + assert {"comments": [{"title": "hello World!"}]} == md.to_dict() + + +def test_to_dict_is_recursive_and_can_cope_with_multi_values(): + md = MyDoc(name=["a", "b", "c"]) + md.inner = [MyInner(old_field="of1"), MyInner(old_field="of2")] + + assert isinstance(md.inner[0], MyInner) + + assert { + "name": ["a", "b", "c"], + "inner": [{"old_field": "of1"}, {"old_field": "of2"}], + } == md.to_dict() + + +def test_to_dict_ignores_empty_collections(): + md = MySubDoc(name="", address={}, count=0, valid=False, tags=[]) + + assert {"name": "", "count": 0, "valid": False} == md.to_dict() + + +def test_declarative_mapping_definition(): + assert issubclass(MyDoc, document.Document) + assert hasattr(MyDoc, "_doc_type") + assert { + "properties": { + "created_at": {"type": "date"}, + "name": {"type": "text"}, + "title": {"type": "keyword"}, + "inner": {"type": "object", "properties": {"old_field": {"type": "text"}}}, + } + } == MyDoc._doc_type.mapping.to_dict() + + +def test_you_can_supply_own_mapping_instance(): + class MyD(document.Document): + title = field.Text() + + class Meta: + mapping = Mapping() + mapping.meta("_all", enabled=False) + + assert { + "_all": {"enabled": False}, + "properties": {"title": {"type": "text"}}, + } == MyD._doc_type.mapping.to_dict() + + +def test_document_can_be_created_dynamically(): + n = datetime.now() + md = MyDoc(title="hello") + md.name = "My Fancy Document!" + md.created_at = n + + inner = md.inner + # consistent returns + assert inner is md.inner + inner.old_field = "Already defined." + + md.inner.new_field = ["undefined", "field"] + + assert { + "title": "hello", + "name": "My Fancy Document!", + "created_at": n, + "inner": {"old_field": "Already defined.", "new_field": ["undefined", "field"]}, + } == md.to_dict() + + +def test_invalid_date_will_raise_exception(): + md = MyDoc() + md.created_at = "not-a-date" + with raises(ValidationException): + md.full_clean() + + +def test_document_inheritance(): + assert issubclass(MySubDoc, MyDoc) + assert issubclass(MySubDoc, document.Document) + assert hasattr(MySubDoc, "_doc_type") + assert { + "properties": { + "created_at": {"type": "date"}, + "name": {"type": "keyword"}, + "title": {"type": "keyword"}, + "inner": {"type": "object", "properties": {"old_field": {"type": "text"}}}, + } + } == MySubDoc._doc_type.mapping.to_dict() + + +def test_child_class_can_override_parent(): + class A(document.Document): + o = field.Object(dynamic=False, properties={"a": field.Text()}) + + class B(A): + o = field.Object(dynamic="strict", properties={"b": field.Text()}) + + assert { + "properties": { + "o": { + "dynamic": "strict", + "properties": {"a": {"type": "text"}, "b": {"type": "text"}}, + "type": "object", + } + } + } == B._doc_type.mapping.to_dict() + + +def test_meta_fields_are_stored_in_meta_and_ignored_by_to_dict(): + md = MySubDoc(meta={"id": 42}, name="My First doc!") + + md.meta.index = "my-index" + assert md.meta.index == "my-index" + assert md.meta.id == 42 + assert {"name": "My First doc!"} == md.to_dict() + assert {"id": 42, "index": "my-index"} == md.meta.to_dict() + + +def test_index_inheritance(): + assert issubclass(MyMultiSubDoc, MySubDoc) + assert issubclass(MyMultiSubDoc, MyDoc2) + assert issubclass(MyMultiSubDoc, document.Document) + assert hasattr(MyMultiSubDoc, "_doc_type") + assert hasattr(MyMultiSubDoc, "_index") + assert { + "properties": { + "created_at": {"type": "date"}, + "name": {"type": "keyword"}, + "title": {"type": "keyword"}, + "inner": {"type": "object", "properties": {"old_field": {"type": "text"}}}, + "extra": {"type": "long"}, + } + } == MyMultiSubDoc._doc_type.mapping.to_dict() + + +def test_meta_fields_can_be_set_directly_in_init(): + p = object() + md = MyDoc(_id=p, title="Hello World!") + + assert md.meta.id is p + + +def test_save_no_index(mock_client): + md = MyDoc() + with raises(ValidationException): + md.save(using="mock") + + +def test_delete_no_index(mock_client): + md = MyDoc() + with raises(ValidationException): + md.delete(using="mock") + + +def test_update_no_fields(): + md = MyDoc() + with raises(IllegalOperation): + md.update() + + +def test_search_with_custom_alias_and_index(mock_client): + search_object = MyDoc.search( + using="staging", index=["custom_index1", "custom_index2"] + ) + + assert search_object._using == "staging" + assert search_object._index == ["custom_index1", "custom_index2"] + + +def test_from_es_respects_underscored_non_meta_fields(): + doc = { + "_index": "test-index", + "_id": "elasticsearch", + "_score": 12.0, + "fields": {"hello": "world", "_routing": "es", "_tags": ["search"]}, + "_source": { + "city": "Amsterdam", + "name": "Elasticsearch", + "_tagline": "You know, for search", + }, + } + + class Company(document.Document): + class Index: + name = "test-company" + + c = Company.from_es(doc) + + assert c.meta.fields._tags == ["search"] + assert c.meta.fields._routing == "es" + assert c._tagline == "You know, for search" + + +def test_nested_and_object_inner_doc(): + class MySubDocWithNested(MyDoc): + nested_inner = field.Nested(MyInner) + + props = MySubDocWithNested._doc_type.mapping.to_dict()["properties"] + assert props == { + "created_at": {"type": "date"}, + "inner": {"properties": {"old_field": {"type": "text"}}, "type": "object"}, + "name": {"type": "text"}, + "nested_inner": { + "properties": {"old_field": {"type": "text"}}, + "type": "nested", + }, + "title": {"type": "keyword"}, + } diff --git a/tests/test_faceted_search.py b/tests/test_faceted_search.py new file mode 100644 index 000000000..6e4981319 --- /dev/null +++ b/tests/test_faceted_search.py @@ -0,0 +1,194 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datetime import datetime + +import pytest + +from elasticsearch_dsl.faceted_search import ( + DateHistogramFacet, + FacetedSearch, + TermsFacet, +) + + +class BlogSearch(FacetedSearch): + doc_types = ["user", "post"] + fields = ( + "title^5", + "body", + ) + + facets = { + "category": TermsFacet(field="category.raw"), + "tags": TermsFacet(field="tags"), + } + + +def test_query_is_created_properly(): + bs = BlogSearch("python search") + s = bs.build_search() + + assert s._doc_type == ["user", "post"] + assert { + "aggs": { + "_filter_tags": { + "filter": {"match_all": {}}, + "aggs": {"tags": {"terms": {"field": "tags"}}}, + }, + "_filter_category": { + "filter": {"match_all": {}}, + "aggs": {"category": {"terms": {"field": "category.raw"}}}, + }, + }, + "query": { + "multi_match": {"fields": ("title^5", "body"), "query": "python search"} + }, + "highlight": {"fields": {"body": {}, "title": {}}}, + } == s.to_dict() + + +def test_query_is_created_properly_with_sort_tuple(): + bs = BlogSearch("python search", sort=("category", "-title")) + s = bs.build_search() + + assert s._doc_type == ["user", "post"] + assert { + "aggs": { + "_filter_tags": { + "filter": {"match_all": {}}, + "aggs": {"tags": {"terms": {"field": "tags"}}}, + }, + "_filter_category": { + "filter": {"match_all": {}}, + "aggs": {"category": {"terms": {"field": "category.raw"}}}, + }, + }, + "query": { + "multi_match": {"fields": ("title^5", "body"), "query": "python search"} + }, + "highlight": {"fields": {"body": {}, "title": {}}}, + "sort": ["category", {"title": {"order": "desc"}}], + } == s.to_dict() + + +def test_filter_is_applied_to_search_but_not_relevant_facet(): + bs = BlogSearch("python search", filters={"category": "elastic"}) + s = bs.build_search() + + assert { + "aggs": { + "_filter_tags": { + "filter": {"terms": {"category.raw": ["elastic"]}}, + "aggs": {"tags": {"terms": {"field": "tags"}}}, + }, + "_filter_category": { + "filter": {"match_all": {}}, + "aggs": {"category": {"terms": {"field": "category.raw"}}}, + }, + }, + "post_filter": {"terms": {"category.raw": ["elastic"]}}, + "query": { + "multi_match": {"fields": ("title^5", "body"), "query": "python search"} + }, + "highlight": {"fields": {"body": {}, "title": {}}}, + } == s.to_dict() + + +def test_filters_are_applied_to_search_ant_relevant_facets(): + bs = BlogSearch( + "python search", filters={"category": "elastic", "tags": ["python", "django"]} + ) + s = bs.build_search() + + d = s.to_dict() + + # we need to test post_filter without relying on order + f = d["post_filter"]["bool"].pop("must") + assert len(f) == 2 + assert {"terms": {"category.raw": ["elastic"]}} in f + assert {"terms": {"tags": ["python", "django"]}} in f + + assert { + "aggs": { + "_filter_tags": { + "filter": {"terms": {"category.raw": ["elastic"]}}, + "aggs": {"tags": {"terms": {"field": "tags"}}}, + }, + "_filter_category": { + "filter": {"terms": {"tags": ["python", "django"]}}, + "aggs": {"category": {"terms": {"field": "category.raw"}}}, + }, + }, + "query": { + "multi_match": {"fields": ("title^5", "body"), "query": "python search"} + }, + "post_filter": {"bool": {}}, + "highlight": {"fields": {"body": {}, "title": {}}}, + } == d + + +def test_date_histogram_facet_with_1970_01_01_date(): + dhf = DateHistogramFacet() + assert dhf.get_value({"key": None}) == datetime(1970, 1, 1, 0, 0) + assert dhf.get_value({"key": 0}) == datetime(1970, 1, 1, 0, 0) + + +@pytest.mark.parametrize( + ["interval_type", "interval"], + [ + ("interval", "year"), + ("calendar_interval", "year"), + ("interval", "month"), + ("calendar_interval", "month"), + ("interval", "week"), + ("calendar_interval", "week"), + ("interval", "day"), + ("calendar_interval", "day"), + ("fixed_interval", "day"), + ("interval", "hour"), + ("fixed_interval", "hour"), + ("interval", "1Y"), + ("calendar_interval", "1Y"), + ("interval", "1M"), + ("calendar_interval", "1M"), + ("interval", "1w"), + ("calendar_interval", "1w"), + ("interval", "1d"), + ("calendar_interval", "1d"), + ("fixed_interval", "1d"), + ("interval", "1h"), + ("fixed_interval", "1h"), + ], +) +def test_date_histogram_interval_types(interval_type, interval): + dhf = DateHistogramFacet(field="@timestamp", **{interval_type: interval}) + assert dhf.get_aggregation().to_dict() == { + "date_histogram": { + "field": "@timestamp", + interval_type: interval, + "min_doc_count": 0, + } + } + dhf.get_value_filter(datetime.now()) + + +def test_date_histogram_no_interval_keyerror(): + dhf = DateHistogramFacet(field="@timestamp") + with pytest.raises(KeyError) as e: + dhf.get_value_filter(datetime.now()) + assert str(e.value) == "'interval'" diff --git a/tests/test_field.py b/tests/test_field.py new file mode 100644 index 000000000..8bc8d6f03 --- /dev/null +++ b/tests/test_field.py @@ -0,0 +1,212 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import base64 +import ipaddress +from datetime import datetime + +import pytest +from dateutil import tz + +from elasticsearch_dsl import InnerDoc, Range, ValidationException, field + + +def test_date_range_deserialization(): + data = {"lt": "2018-01-01T00:30:10"} + + r = field.DateRange().deserialize(data) + + assert isinstance(r, Range) + assert r.lt == datetime(2018, 1, 1, 0, 30, 10) + + +def test_boolean_deserialization(): + bf = field.Boolean() + + assert not bf.deserialize("false") + assert not bf.deserialize(False) + assert not bf.deserialize("") + assert not bf.deserialize(0) + + assert bf.deserialize(True) + assert bf.deserialize("true") + assert bf.deserialize(1) + + +def test_date_field_can_have_default_tz(): + f = field.Date(default_timezone="UTC") + now = datetime.now() + + now_with_tz = f._deserialize(now) + + assert now_with_tz.tzinfo == tz.gettz("UTC") + assert now.isoformat() + "+00:00" == now_with_tz.isoformat() + + now_with_tz = f._deserialize(now.isoformat()) + + assert now_with_tz.tzinfo == tz.gettz("UTC") + assert now.isoformat() + "+00:00" == now_with_tz.isoformat() + + +def test_custom_field_car_wrap_other_field(): + class MyField(field.CustomField): + @property + def builtin_type(self): + return field.Text(**self._params) + + assert {"type": "text", "index": "not_analyzed"} == MyField( + index="not_analyzed" + ).to_dict() + + +def test_field_from_dict(): + f = field.construct_field({"type": "text", "index": "not_analyzed"}) + + assert isinstance(f, field.Text) + assert {"type": "text", "index": "not_analyzed"} == f.to_dict() + + +def test_multi_fields_are_accepted_and_parsed(): + f = field.construct_field( + "text", + fields={"raw": {"type": "keyword"}, "eng": field.Text(analyzer="english")}, + ) + + assert isinstance(f, field.Text) + assert { + "type": "text", + "fields": { + "raw": {"type": "keyword"}, + "eng": {"type": "text", "analyzer": "english"}, + }, + } == f.to_dict() + + +def test_nested_provides_direct_access_to_its_fields(): + f = field.Nested(properties={"name": {"type": "text", "index": "not_analyzed"}}) + + assert "name" in f + assert f["name"] == field.Text(index="not_analyzed") + + +def test_field_supports_multiple_analyzers(): + f = field.Text(analyzer="snowball", search_analyzer="keyword") + assert { + "analyzer": "snowball", + "search_analyzer": "keyword", + "type": "text", + } == f.to_dict() + + +def test_multifield_supports_multiple_analyzers(): + f = field.Text( + fields={ + "f1": field.Text(search_analyzer="keyword", analyzer="snowball"), + "f2": field.Text(analyzer="keyword"), + } + ) + assert { + "fields": { + "f1": { + "analyzer": "snowball", + "search_analyzer": "keyword", + "type": "text", + }, + "f2": {"analyzer": "keyword", "type": "text"}, + }, + "type": "text", + } == f.to_dict() + + +def test_scaled_float(): + with pytest.raises(TypeError): + field.ScaledFloat() + f = field.ScaledFloat(123) + assert f.to_dict() == {"scaling_factor": 123, "type": "scaled_float"} + + +def test_ipaddress(): + f = field.Ip() + assert f.deserialize("127.0.0.1") == ipaddress.ip_address(u"127.0.0.1") + assert f.deserialize(u"::1") == ipaddress.ip_address(u"::1") + assert f.serialize(f.deserialize("::1")) == "::1" + assert f.deserialize(None) is None + with pytest.raises(ValueError): + assert f.deserialize("not_an_ipaddress") + + +def test_float(): + f = field.Float() + assert f.deserialize("42") == 42.0 + assert f.deserialize(None) is None + with pytest.raises(ValueError): + assert f.deserialize("not_a_float") + + +def test_integer(): + f = field.Integer() + assert f.deserialize("42") == 42 + assert f.deserialize(None) is None + with pytest.raises(ValueError): + assert f.deserialize("not_an_integer") + + +def test_binary(): + f = field.Binary() + assert f.deserialize(base64.b64encode(b"42")) == b"42" + assert f.deserialize(f.serialize(b"42")) == b"42" + assert f.deserialize(None) is None + + +def test_constant_keyword(): + f = field.ConstantKeyword() + assert f.to_dict() == {"type": "constant_keyword"} + + +def test_rank_features(): + f = field.RankFeatures() + assert f.to_dict() == {"type": "rank_features"} + + +def test_object_dynamic_values(): + for dynamic in True, False, "strict": + f = field.Object(dynamic=dynamic) + assert f.to_dict()["dynamic"] == dynamic + + +def test_object_disabled(): + f = field.Object(enabled=False) + assert f.to_dict() == {"type": "object", "enabled": False} + + +def test_object_constructor(): + expected = {"type": "object", "properties": {"inner_int": {"type": "integer"}}} + + class Inner(InnerDoc): + inner_int = field.Integer() + + obj_from_doc = field.Object(doc_class=Inner) + assert obj_from_doc.to_dict() == expected + + obj_from_props = field.Object(properties={"inner_int": field.Integer()}) + assert obj_from_props.to_dict() == expected + + with pytest.raises(ValidationException): + field.Object(doc_class=Inner, properties={"inner_int": field.Integer()}) + + with pytest.raises(ValidationException): + field.Object(doc_class=Inner, dynamic=False) diff --git a/tests/test_index.py b/tests/test_index.py new file mode 100644 index 000000000..3bdddb57b --- /dev/null +++ b/tests/test_index.py @@ -0,0 +1,187 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import string +from random import choice + +from pytest import raises + +from elasticsearch_dsl import Date, Document, Index, IndexTemplate, Text, analyzer + + +class Post(Document): + title = Text() + published_from = Date() + + +def test_multiple_doc_types_will_combine_mappings(): + class User(Document): + username = Text() + + i = Index("i") + i.document(Post) + i.document(User) + assert { + "mappings": { + "properties": { + "title": {"type": "text"}, + "username": {"type": "text"}, + "published_from": {"type": "date"}, + } + } + } == i.to_dict() + + +def test_search_is_limited_to_index_name(): + i = Index("my-index") + s = i.search() + + assert s._index == ["my-index"] + + +def test_cloned_index_has_copied_settings_and_using(): + client = object() + i = Index("my-index", using=client) + i.settings(number_of_shards=1) + + i2 = i.clone("my-other-index") + + assert "my-other-index" == i2._name + assert client is i2._using + assert i._settings == i2._settings + assert i._settings is not i2._settings + + +def test_cloned_index_has_analysis_attribute(): + """ + Regression test for Issue #582 in which `Index.clone()` was not copying + over the `_analysis` attribute. + """ + client = object() + i = Index("my-index", using=client) + + random_analyzer_name = "".join((choice(string.ascii_letters) for _ in range(100))) + random_analyzer = analyzer( + random_analyzer_name, tokenizer="standard", filter="standard" + ) + + i.analyzer(random_analyzer) + + i2 = i.clone("my-clone-index") + + assert i.to_dict()["settings"]["analysis"] == i2.to_dict()["settings"]["analysis"] + + +def test_settings_are_saved(): + i = Index("i") + i.settings(number_of_replicas=0) + i.settings(number_of_shards=1) + + assert {"settings": {"number_of_shards": 1, "number_of_replicas": 0}} == i.to_dict() + + +def test_registered_doc_type_included_in_to_dict(): + i = Index("i", using="alias") + i.document(Post) + + assert { + "mappings": { + "properties": { + "title": {"type": "text"}, + "published_from": {"type": "date"}, + } + } + } == i.to_dict() + + +def test_registered_doc_type_included_in_search(): + i = Index("i", using="alias") + i.document(Post) + + s = i.search() + + assert s._doc_type == [Post] + + +def test_aliases_add_to_object(): + random_alias = "".join((choice(string.ascii_letters) for _ in range(100))) + alias_dict = {random_alias: {}} + + index = Index("i", using="alias") + index.aliases(**alias_dict) + + assert index._aliases == alias_dict + + +def test_aliases_returned_from_to_dict(): + random_alias = "".join((choice(string.ascii_letters) for _ in range(100))) + alias_dict = {random_alias: {}} + + index = Index("i", using="alias") + index.aliases(**alias_dict) + + assert index._aliases == index.to_dict()["aliases"] == alias_dict + + +def test_analyzers_added_to_object(): + random_analyzer_name = "".join((choice(string.ascii_letters) for _ in range(100))) + random_analyzer = analyzer( + random_analyzer_name, tokenizer="standard", filter="standard" + ) + + index = Index("i", using="alias") + index.analyzer(random_analyzer) + + assert index._analysis["analyzer"][random_analyzer_name] == { + "filter": ["standard"], + "type": "custom", + "tokenizer": "standard", + } + + +def test_analyzers_returned_from_to_dict(): + random_analyzer_name = "".join((choice(string.ascii_letters) for _ in range(100))) + random_analyzer = analyzer( + random_analyzer_name, tokenizer="standard", filter="standard" + ) + index = Index("i", using="alias") + index.analyzer(random_analyzer) + + assert index.to_dict()["settings"]["analysis"]["analyzer"][ + random_analyzer_name + ] == {"filter": ["standard"], "type": "custom", "tokenizer": "standard"} + + +def test_conflicting_analyzer_raises_error(): + i = Index("i") + i.analyzer("my_analyzer", tokenizer="whitespace", filter=["lowercase", "stop"]) + + with raises(ValueError): + i.analyzer("my_analyzer", tokenizer="keyword", filter=["lowercase", "stop"]) + + +def test_index_template_can_have_order(): + i = Index("i-*") + it = i.as_template("i", order=2) + + assert {"index_patterns": ["i-*"], "order": 2} == it.to_dict() + + +def test_index_template_save_result(mock_client): + it = IndexTemplate("test-template", "test-*") + + assert it.save(using="mock") == mock_client.indices.put_template() diff --git a/tests/test_integration/__init__.py b/tests/test_integration/__init__.py new file mode 100644 index 000000000..2a87d183f --- /dev/null +++ b/tests/test_integration/__init__.py @@ -0,0 +1,16 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/tests/test_integration/test_analysis.py b/tests/test_integration/test_analysis.py new file mode 100644 index 000000000..140099d4a --- /dev/null +++ b/tests/test_integration/test_analysis.py @@ -0,0 +1,46 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from elasticsearch_dsl import analyzer, token_filter, tokenizer + + +def test_simulate_with_just__builtin_tokenizer(client): + a = analyzer("my-analyzer", tokenizer="keyword") + tokens = a.simulate("Hello World!", using=client).tokens + + assert len(tokens) == 1 + assert tokens[0].token == "Hello World!" + + +def test_simulate_complex(client): + a = analyzer( + "my-analyzer", + tokenizer=tokenizer("split_words", "simple_pattern_split", pattern=":"), + filter=["lowercase", token_filter("no-ifs", "stop", stopwords=["if"])], + ) + + tokens = a.simulate("if:this:works", using=client).tokens + + assert len(tokens) == 2 + assert ["this", "works"] == [t.token for t in tokens] + + +def test_simulate_builtin(client): + a = analyzer("my-analyzer", "english") + tokens = a.simulate("fixes running").tokens + + assert ["fix", "run"] == [t.token for t in tokens] diff --git a/tests/test_integration/test_count.py b/tests/test_integration/test_count.py new file mode 100644 index 000000000..4b2ed9584 --- /dev/null +++ b/tests/test_integration/test_count.py @@ -0,0 +1,42 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from elasticsearch_dsl.search import Q, Search + + +def test_count_all(data_client): + s = Search(using=data_client).index("git") + assert 53 == s.count() + + +def test_count_prefetch(data_client, mocker): + mocker.spy(data_client, "count") + + search = Search(using=data_client).index("git") + search.execute() + assert search.count() == 53 + assert data_client.count.call_count == 0 + + search._response.hits.total.relation = "gte" + assert search.count() == 53 + assert data_client.count.call_count == 1 + + +def test_count_filter(data_client): + s = Search(using=data_client).index("git").filter(~Q("exists", field="parent_shas")) + # initial commit + repo document + assert 2 == s.count() diff --git a/tests/test_integration/test_data.py b/tests/test_integration/test_data.py new file mode 100644 index 000000000..958f528b3 --- /dev/null +++ b/tests/test_integration/test_data.py @@ -0,0 +1,1105 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import unicode_literals + + +def create_flat_git_index(client, index): + # we will use user on several places + user_mapping = { + "properties": {"name": {"type": "text", "fields": {"raw": {"type": "keyword"}}}} + } + + client.indices.create( + index=index, + body={ + "settings": { + # just one shard, no replicas for testing + "number_of_shards": 1, + "number_of_replicas": 0, + # custom analyzer for analyzing file paths + "analysis": { + "analyzer": { + "file_path": { + "type": "custom", + "tokenizer": "path_hierarchy", + "filter": ["lowercase"], + } + } + }, + }, + "mappings": { + "properties": { + "description": {"type": "text", "analyzer": "snowball"}, + "author": user_mapping, + "authored_date": {"type": "date"}, + "committer": user_mapping, + "committed_date": {"type": "date"}, + "parent_shas": {"type": "keyword"}, + "files": { + "type": "text", + "analyzer": "file_path", + "fielddata": True, + }, + } + }, + }, + ) + + +def create_git_index(client, index): + # we will use user on several places + user_mapping = { + "properties": {"name": {"type": "text", "fields": {"raw": {"type": "keyword"}}}} + } + + client.indices.create( + index=index, + body={ + "settings": { + # just one shard, no replicas for testing + "number_of_shards": 1, + "number_of_replicas": 0, + # custom analyzer for analyzing file paths + "analysis": { + "analyzer": { + "file_path": { + "type": "custom", + "tokenizer": "path_hierarchy", + "filter": ["lowercase"], + } + } + }, + }, + "mappings": { + "properties": { + # common fields + "description": {"type": "text", "analyzer": "snowball"}, + "commit_repo": {"type": "join", "relations": {"repo": "commit"}}, + # COMMIT mappings + "author": user_mapping, + "authored_date": {"type": "date"}, + "committer": user_mapping, + "committed_date": {"type": "date"}, + "parent_shas": {"type": "keyword"}, + "files": { + "type": "text", + "analyzer": "file_path", + "fielddata": True, + }, + # REPO mappings + "is_public": {"type": "boolean"}, + "owner": user_mapping, + "created_at": {"type": "date"}, + "tags": {"type": "keyword"}, + } + }, + }, + ) + + +DATA = [ + # repository + { + "_id": "elasticsearch-dsl-py", + "_source": { + "commit_repo": "repo", + "organization": "elasticsearch", + "created_at": "2014-03-03", + "owner": {"name": "elasticsearch"}, + "is_public": True, + }, + "_index": "git", + }, + # documents + { + "_id": "3ca6e1e73a071a705b4babd2f581c91a2a3e5037", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/aggs.py", + "elasticsearch_dsl/search.py", + "test_elasticsearch_dsl/test_aggs.py", + "test_elasticsearch_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 7, "insertions": 23, "lines": 30, "files": 4}, + "description": "Make sure buckets aren't modified in-place", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["eb3e543323f189fd7b698e66295427204fff5755"], + "committed_date": "2014-05-02T13:47:19", + "authored_date": "2014-05-02T13:47:19.123+02:00", + }, + "_index": "git", + }, + { + "_id": "eb3e543323f189fd7b698e66295427204fff5755", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["elasticsearch_dsl/search.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 18, "lines": 18, "files": 1}, + "description": "Add communication with ES server", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["dd15b6ba17dd9ba16363a51f85b31f66f1fb1157"], + "committed_date": "2014-05-01T13:32:14", + "authored_date": "2014-05-01T13:32:14", + }, + "_index": "git", + }, + { + "_id": "dd15b6ba17dd9ba16363a51f85b31f66f1fb1157", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/utils.py", + "test_elasticsearch_dsl/test_result.py", + "elasticsearch_dsl/result.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 18, "insertions": 44, "lines": 62, "files": 3}, + "description": "Minor cleanup and adding helpers for interactive python", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["ed19caf25abd25300e707fadf3f81b05c5673446"], + "committed_date": "2014-05-01T13:30:44", + "authored_date": "2014-05-01T13:30:44", + }, + "_index": "git", + }, + { + "_id": "ed19caf25abd25300e707fadf3f81b05c5673446", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/aggs.py", + "elasticsearch_dsl/search.py", + "test_elasticsearch_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 28, "lines": 28, "files": 3}, + "description": "Make sure aggs do copy-on-write", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["583e52c71e9a72c1b291ec5843683d8fa8f1ce2d"], + "committed_date": "2014-04-27T16:28:09", + "authored_date": "2014-04-27T16:28:09", + }, + "_index": "git", + }, + { + "_id": "583e52c71e9a72c1b291ec5843683d8fa8f1ce2d", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["elasticsearch_dsl/aggs.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 1, "lines": 2, "files": 1}, + "description": "Use __setitem__ from DslBase in AggsBase", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["1dd19210b5be92b960f7db6f66ae526288edccc3"], + "committed_date": "2014-04-27T15:51:53", + "authored_date": "2014-04-27T15:51:53", + }, + "_index": "git", + }, + { + "_id": "1dd19210b5be92b960f7db6f66ae526288edccc3", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/aggs.py", + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_search.py", + "elasticsearch_dsl/search.py", + "elasticsearch_dsl/filter.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 21, "insertions": 98, "lines": 119, "files": 5}, + "description": "Have Search clone itself on any change besides aggs", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["b4c9e29376af2e42a4e6dc153f0f293b1a18bac3"], + "committed_date": "2014-04-26T14:49:43", + "authored_date": "2014-04-26T14:49:43", + }, + "_index": "git", + }, + { + "_id": "b4c9e29376af2e42a4e6dc153f0f293b1a18bac3", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["test_elasticsearch_dsl/test_result.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 5, "lines": 5, "files": 1}, + "description": "Add tests for [] on response", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["a64a54181b232bb5943bd16960be9416e402f5f5"], + "committed_date": "2014-04-26T13:56:52", + "authored_date": "2014-04-26T13:56:52", + }, + "_index": "git", + }, + { + "_id": "a64a54181b232bb5943bd16960be9416e402f5f5", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["test_elasticsearch_dsl/test_result.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 7, "lines": 8, "files": 1}, + "description": "Test access to missing fields raises appropriate exceptions", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["df3f778a3d37b170bde6979a4ef2d9e3e6400778"], + "committed_date": "2014-04-25T16:01:07", + "authored_date": "2014-04-25T16:01:07", + }, + "_index": "git", + }, + { + "_id": "df3f778a3d37b170bde6979a4ef2d9e3e6400778", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/utils.py", + "test_elasticsearch_dsl/test_result.py", + "elasticsearch_dsl/result.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 8, "insertions": 31, "lines": 39, "files": 3}, + "description": "Support attribute access even for inner/nested objects", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["7e599e116b5ff5d271ce3fe1ebc80e82ab3d5925"], + "committed_date": "2014-04-25T15:59:02", + "authored_date": "2014-04-25T15:59:02", + }, + "_index": "git", + }, + { + "_id": "7e599e116b5ff5d271ce3fe1ebc80e82ab3d5925", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "test_elasticsearch_dsl/test_result.py", + "elasticsearch_dsl/result.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 149, "lines": 149, "files": 2}, + "description": "Added a prototype of a Respose and Result classes", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["e2882d28cb8077eaa3e5d8ae76543482d4d90f7e"], + "committed_date": "2014-04-25T15:12:15", + "authored_date": "2014-04-25T15:12:15", + }, + "_index": "git", + }, + { + "_id": "e2882d28cb8077eaa3e5d8ae76543482d4d90f7e", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["docs/index.rst"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 6, "lines": 6, "files": 1}, + "description": "add warning to the docs", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["51f94d83d1c47d3b81207736ca97a1ec6302678f"], + "committed_date": "2014-04-22T19:16:21", + "authored_date": "2014-04-22T19:16:21", + }, + "_index": "git", + }, + { + "_id": "51f94d83d1c47d3b81207736ca97a1ec6302678f", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["elasticsearch_dsl/utils.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 3, "insertions": 29, "lines": 32, "files": 1}, + "description": "Add some comments to the code", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["0950f6c600b49e2bf012d03b02250fb71c848555"], + "committed_date": "2014-04-22T19:12:06", + "authored_date": "2014-04-22T19:12:06", + }, + "_index": "git", + }, + { + "_id": "0950f6c600b49e2bf012d03b02250fb71c848555", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["README.rst"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 6, "lines": 6, "files": 1}, + "description": "Added a WIP warning", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["54d058f5ac6be8225ef61d5529772aada42ec6c8"], + "committed_date": "2014-04-20T00:19:25", + "authored_date": "2014-04-20T00:19:25", + }, + "_index": "git", + }, + { + "_id": "54d058f5ac6be8225ef61d5529772aada42ec6c8", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/__init__.py", + "elasticsearch_dsl/search.py", + "test_elasticsearch_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 36, "insertions": 7, "lines": 43, "files": 3}, + "description": "Remove the operator kwarg from .query", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["4cb07845e45787abc1f850c0b561e487e0034424"], + "committed_date": "2014-04-20T00:17:25", + "authored_date": "2014-04-20T00:17:25", + }, + "_index": "git", + }, + { + "_id": "4cb07845e45787abc1f850c0b561e487e0034424", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/aggs.py", + "test_elasticsearch_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 35, "insertions": 49, "lines": 84, "files": 2}, + "description": "Complex example", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["578abe80f76aafd7e81fe46a44403e601733a938"], + "committed_date": "2014-03-24T20:48:45", + "authored_date": "2014-03-24T20:48:45", + }, + "_index": "git", + }, + { + "_id": "578abe80f76aafd7e81fe46a44403e601733a938", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["test_elasticsearch_dsl/test_search.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 2, "insertions": 0, "lines": 2, "files": 1}, + "description": "removing extra whitespace", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["ecb84f03565940c7d294dbc80723420dcfbab340"], + "committed_date": "2014-03-24T20:42:23", + "authored_date": "2014-03-24T20:42:23", + }, + "_index": "git", + }, + { + "_id": "ecb84f03565940c7d294dbc80723420dcfbab340", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["test_elasticsearch_dsl/test_search.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 3, "lines": 4, "files": 1}, + "description": "Make sure attribute access works for .query on Search", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["9a247c876ab66e2bca56b25f392d054e613b1b2a"], + "committed_date": "2014-03-24T20:35:02", + "authored_date": "2014-03-24T20:34:46", + }, + "_index": "git", + }, + { + "_id": "9a247c876ab66e2bca56b25f392d054e613b1b2a", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["elasticsearch_dsl/search.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 2, "lines": 2, "files": 1}, + "description": "Make sure .index and .doc_type methods are chainable", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["cee5e46947d510a49edd3609ff91aab7b1f3ac89"], + "committed_date": "2014-03-24T20:27:46", + "authored_date": "2014-03-24T20:27:46", + }, + "_index": "git", + }, + { + "_id": "cee5e46947d510a49edd3609ff91aab7b1f3ac89", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/search.py", + "test_elasticsearch_dsl/test_search.py", + "elasticsearch_dsl/filter.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 13, "insertions": 128, "lines": 141, "files": 3}, + "description": "Added .filter and .post_filter to Search", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["1d6857182b09a556d58c6bc5bdcb243092812ba3"], + "committed_date": "2014-03-24T20:26:57", + "authored_date": "2014-03-24T20:26:57", + }, + "_index": "git", + }, + { + "_id": "1d6857182b09a556d58c6bc5bdcb243092812ba3", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["elasticsearch_dsl/utils.py", "elasticsearch_dsl/query.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 24, "insertions": 29, "lines": 53, "files": 2}, + "description": "Extracted combination logic into DslBase", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["4ad92f15a1955846c01642318303a821e8435b75"], + "committed_date": "2014-03-24T20:03:51", + "authored_date": "2014-03-24T20:03:51", + }, + "_index": "git", + }, + { + "_id": "4ad92f15a1955846c01642318303a821e8435b75", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["elasticsearch_dsl/utils.py", "elasticsearch_dsl/query.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 43, "insertions": 45, "lines": 88, "files": 2}, + "description": "Extracted bool-related logic to a mixin to be reused by filters", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["6eb39dc2825605543ac1ed0b45b9b6baeecc44c2"], + "committed_date": "2014-03-24T19:16:16", + "authored_date": "2014-03-24T19:16:16", + }, + "_index": "git", + }, + { + "_id": "6eb39dc2825605543ac1ed0b45b9b6baeecc44c2", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/search.py", + "test_elasticsearch_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 32, "lines": 33, "files": 2}, + "description": "Enable otheroperators when querying on Search object", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["be094c7b307332cb6039bf9a7c984d2c7593ddff"], + "committed_date": "2014-03-24T18:25:10", + "authored_date": "2014-03-24T18:25:10", + }, + "_index": "git", + }, + { + "_id": "be094c7b307332cb6039bf9a7c984d2c7593ddff", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/utils.py", + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 23, "insertions": 35, "lines": 58, "files": 3}, + "description": "make sure query operations always return copies", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["b2576e3b6437e2cb9d8971fee4ead60df91fd75b"], + "committed_date": "2014-03-24T18:10:37", + "authored_date": "2014-03-24T18:03:13", + }, + "_index": "git", + }, + { + "_id": "b2576e3b6437e2cb9d8971fee4ead60df91fd75b", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 53, "lines": 54, "files": 2}, + "description": "Adding or operator for queries", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["1be002170ac3cd59d2e97824b83b88bb3c9c60ed"], + "committed_date": "2014-03-24T17:53:38", + "authored_date": "2014-03-24T17:53:38", + }, + "_index": "git", + }, + { + "_id": "1be002170ac3cd59d2e97824b83b88bb3c9c60ed", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 35, "lines": 35, "files": 2}, + "description": "Added inverting of queries", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["24e1e38b2f704f65440d96c290b7c6cd54c2e00e"], + "committed_date": "2014-03-23T17:44:36", + "authored_date": "2014-03-23T17:44:36", + }, + "_index": "git", + }, + { + "_id": "24e1e38b2f704f65440d96c290b7c6cd54c2e00e", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["elasticsearch_dsl/aggs.py", "elasticsearch_dsl/utils.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 5, "insertions": 1, "lines": 6, "files": 2}, + "description": "Change equality checks to use .to_dict()", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["277cfaedbaf3705ed74ad6296227e1172c97a63f"], + "committed_date": "2014-03-23T17:43:01", + "authored_date": "2014-03-23T17:43:01", + }, + "_index": "git", + }, + { + "_id": "277cfaedbaf3705ed74ad6296227e1172c97a63f", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 11, "lines": 12, "files": 2}, + "description": "Test combining of bool queries", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["6aa3868a6a9f35f71553ce96f9d3d63c74d054fd"], + "committed_date": "2014-03-21T15:15:06", + "authored_date": "2014-03-21T15:15:06", + }, + "_index": "git", + }, + { + "_id": "6aa3868a6a9f35f71553ce96f9d3d63c74d054fd", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 23, "lines": 24, "files": 2}, + "description": "Adding & operator for queries", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["bb311eb35e7eb53fb5ae01e3f80336866c7e3e37"], + "committed_date": "2014-03-21T15:10:08", + "authored_date": "2014-03-21T15:10:08", + }, + "_index": "git", + }, + { + "_id": "bb311eb35e7eb53fb5ae01e3f80336866c7e3e37", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/utils.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 4, "lines": 5, "files": 2}, + "description": "Don't serialize empty typed fields into dict", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["aea8ea9e421bd53a5b058495e68c3fd57bb1dacc"], + "committed_date": "2014-03-15T16:29:37", + "authored_date": "2014-03-15T16:29:37", + }, + "_index": "git", + }, + { + "_id": "aea8ea9e421bd53a5b058495e68c3fd57bb1dacc", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/utils.py", + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 3, "insertions": 37, "lines": 40, "files": 3}, + "description": "Bool queries, when combining just adds their params together", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["a8819a510b919be43ff3011b904f257798fb8916"], + "committed_date": "2014-03-15T16:16:40", + "authored_date": "2014-03-15T16:16:40", + }, + "_index": "git", + }, + { + "_id": "a8819a510b919be43ff3011b904f257798fb8916", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["test_elasticsearch_dsl/run_tests.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 6, "insertions": 2, "lines": 8, "files": 1}, + "description": "Simpler run_tests.py", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["e35792a725be2325fc54d3fcb95a7d38d8075a99"], + "committed_date": "2014-03-15T16:02:21", + "authored_date": "2014-03-15T16:02:21", + }, + "_index": "git", + }, + { + "_id": "e35792a725be2325fc54d3fcb95a7d38d8075a99", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["elasticsearch_dsl/aggs.py", "elasticsearch_dsl/query.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 2, "insertions": 2, "lines": 4, "files": 2}, + "description": "Maku we don't treat shortcuts as methods.", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["3179d778dc9e3f2883d5f7ffa63b9ae0399c16bc"], + "committed_date": "2014-03-15T15:59:21", + "authored_date": "2014-03-15T15:59:21", + }, + "_index": "git", + }, + { + "_id": "3179d778dc9e3f2883d5f7ffa63b9ae0399c16bc", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/aggs.py", + "elasticsearch_dsl/query.py", + "elasticsearch_dsl/utils.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 9, "insertions": 5, "lines": 14, "files": 3}, + "description": "Centralize == of Dsl objects", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["b5e7d0c4b284211df8f7b464fcece93a27a802fb"], + "committed_date": "2014-03-10T21:37:24", + "authored_date": "2014-03-10T21:37:24", + }, + "_index": "git", + }, + { + "_id": "b5e7d0c4b284211df8f7b464fcece93a27a802fb", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/aggs.py", + "elasticsearch_dsl/search.py", + "test_elasticsearch_dsl/test_search.py", + "elasticsearch_dsl/utils.py", + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_aggs.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 75, "insertions": 115, "lines": 190, "files": 6}, + "description": "Experimental draft with more declarative DSL", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["0fe741b43adee5ca1424584ddd3f35fa33f8733c"], + "committed_date": "2014-03-10T21:34:39", + "authored_date": "2014-03-10T21:34:39", + }, + "_index": "git", + }, + { + "_id": "0fe741b43adee5ca1424584ddd3f35fa33f8733c", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["test_elasticsearch_dsl/test_search.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 2, "insertions": 2, "lines": 4, "files": 1}, + "description": "Make sure .query is chainable", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["a22be5933d4b022cbacee867b1aece120208edf3"], + "committed_date": "2014-03-07T17:41:59", + "authored_date": "2014-03-07T17:41:59", + }, + "_index": "git", + }, + { + "_id": "a22be5933d4b022cbacee867b1aece120208edf3", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/aggs.py", + "elasticsearch_dsl/search.py", + "test_elasticsearch_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 14, "insertions": 44, "lines": 58, "files": 3}, + "description": "Search now does aggregations", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["e823686aacfc4bdcb34ffdab337a26fa09659a9a"], + "committed_date": "2014-03-07T17:29:55", + "authored_date": "2014-03-07T17:29:55", + }, + "_index": "git", + }, + { + "_id": "e823686aacfc4bdcb34ffdab337a26fa09659a9a", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [".gitignore"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 1, "lines": 1, "files": 1}, + "description": "Ignore html coverage report", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["e0aedb3011c71d704deec03a8f32b2b360d6e364"], + "committed_date": "2014-03-07T17:03:23", + "authored_date": "2014-03-07T17:03:23", + }, + "_index": "git", + }, + { + "_id": "e0aedb3011c71d704deec03a8f32b2b360d6e364", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/aggs.py", + "test_elasticsearch_dsl/test_aggs.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 228, "lines": 228, "files": 2}, + "description": "Added aggregation DSL objects", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["61cbc0aa62a0b776ae5e333406659dbb2f5cfbbd"], + "committed_date": "2014-03-07T16:25:55", + "authored_date": "2014-03-07T16:25:55", + }, + "_index": "git", + }, + { + "_id": "61cbc0aa62a0b776ae5e333406659dbb2f5cfbbd", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["elasticsearch_dsl/utils.py", "elasticsearch_dsl/query.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 12, "insertions": 7, "lines": 19, "files": 2}, + "description": "Only retrieve DslClass, leave the instantiation to the caller", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["647f1017a7b17a913e07af70a3b03202f6adbdfd"], + "committed_date": "2014-03-07T15:27:43", + "authored_date": "2014-03-07T15:27:43", + }, + "_index": "git", + }, + { + "_id": "647f1017a7b17a913e07af70a3b03202f6adbdfd", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "test_elasticsearch_dsl/test_search.py", + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 19, "insertions": 19, "lines": 38, "files": 3}, + "description": "No need to replicate Query suffix when in query namespace", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["7c4f94ecdb38f0e91c7ee52f579c0ea148afcc7d"], + "committed_date": "2014-03-07T15:19:01", + "authored_date": "2014-03-07T15:19:01", + }, + "_index": "git", + }, + { + "_id": "7c4f94ecdb38f0e91c7ee52f579c0ea148afcc7d", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["elasticsearch_dsl/utils.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 2, "insertions": 3, "lines": 5, "files": 1}, + "description": "Ask forgiveness, not permission", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["c10793c2ca43688195e415b25b674ff34d58eaff"], + "committed_date": "2014-03-07T15:13:22", + "authored_date": "2014-03-07T15:13:22", + }, + "_index": "git", + }, + { + "_id": "c10793c2ca43688195e415b25b674ff34d58eaff", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/utils.py", + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 24, "insertions": 27, "lines": 51, "files": 3}, + "description": "Extract DSL object registration to DslMeta", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["d8867fdb17fcf4c696657740fa08d29c36adc6ec"], + "committed_date": "2014-03-07T15:12:13", + "authored_date": "2014-03-07T15:10:31", + }, + "_index": "git", + }, + { + "_id": "d8867fdb17fcf4c696657740fa08d29c36adc6ec", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/search.py", + "test_elasticsearch_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 13, "lines": 13, "files": 2}, + "description": "Search.to_dict", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["2eb7cd980d917ed6f4a4dd8e246804f710ec5082"], + "committed_date": "2014-03-07T02:58:33", + "authored_date": "2014-03-07T02:58:33", + }, + "_index": "git", + }, + { + "_id": "2eb7cd980d917ed6f4a4dd8e246804f710ec5082", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/search.py", + "test_elasticsearch_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 113, "lines": 113, "files": 2}, + "description": "Basic Search object", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["11708576f9118e0dbf27ae1f8a7b799cf281b511"], + "committed_date": "2014-03-06T21:02:03", + "authored_date": "2014-03-06T21:01:05", + }, + "_index": "git", + }, + { + "_id": "11708576f9118e0dbf27ae1f8a7b799cf281b511", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 13, "lines": 13, "files": 2}, + "description": "MatchAll query + anything is anything", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["1dc496e5c7c1b2caf290df477fca2db61ebe37e0"], + "committed_date": "2014-03-06T20:40:39", + "authored_date": "2014-03-06T20:39:52", + }, + "_index": "git", + }, + { + "_id": "1dc496e5c7c1b2caf290df477fca2db61ebe37e0", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 53, "lines": 53, "files": 2}, + "description": "From_dict, Q(dict) and bool query parses it's subqueries", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["d407f99d1959b7b862a541c066d9fd737ce913f3"], + "committed_date": "2014-03-06T20:24:30", + "authored_date": "2014-03-06T20:24:30", + }, + "_index": "git", + }, + { + "_id": "d407f99d1959b7b862a541c066d9fd737ce913f3", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["CONTRIBUTING.md", "README.rst"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 6, "insertions": 21, "lines": 27, "files": 2}, + "description": "Housekeeping - licence and updated generic CONTRIBUTING.md", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["277e8ecc7395754d1ba1f2411ec32337a3e9d73f"], + "committed_date": "2014-03-05T16:21:44", + "authored_date": "2014-03-05T16:21:44", + }, + "_index": "git", + }, + { + "_id": "277e8ecc7395754d1ba1f2411ec32337a3e9d73f", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/query.py", + "setup.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 59, "lines": 59, "files": 3}, + "description": "Automatic query registration and Q function", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["8f1e34bd8f462fec50bcc10971df2d57e2986604"], + "committed_date": "2014-03-05T16:18:52", + "authored_date": "2014-03-05T16:18:52", + }, + "_index": "git", + }, + { + "_id": "8f1e34bd8f462fec50bcc10971df2d57e2986604", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 54, "lines": 54, "files": 2}, + "description": "Initial implementation of match and bool queries", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["fcff47ddcc6d08be5739d03dd30f504fb9db2608"], + "committed_date": "2014-03-05T15:55:06", + "authored_date": "2014-03-05T15:55:06", + }, + "_index": "git", + }, + { + "_id": "fcff47ddcc6d08be5739d03dd30f504fb9db2608", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "docs/Makefile", + "CONTRIBUTING.md", + "docs/conf.py", + "LICENSE", + "Changelog.rst", + "docs/index.rst", + "docs/Changelog.rst", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 692, "lines": 692, "files": 7}, + "description": "Docs template", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["febe8127ae48fcc81778c0fb2d628f1bcc0a0350"], + "committed_date": "2014-03-04T01:42:31", + "authored_date": "2014-03-04T01:42:31", + }, + "_index": "git", + }, + { + "_id": "febe8127ae48fcc81778c0fb2d628f1bcc0a0350", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/__init__.py", + "test_elasticsearch_dsl/run_tests.py", + "setup.py", + "README.rst", + "test_elasticsearch_dsl/__init__.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 82, "lines": 82, "files": 5}, + "description": "Empty project structure", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["2a8f1ce89760bfc72808f3945b539eae650acac9"], + "committed_date": "2014-03-04T01:37:49", + "authored_date": "2014-03-03T18:23:55", + }, + "_index": "git", + }, + { + "_id": "2a8f1ce89760bfc72808f3945b539eae650acac9", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [".gitignore"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 9, "lines": 9, "files": 1}, + "description": "Initial commit, .gitignore", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": [], + "committed_date": "2014-03-03T18:15:05", + "authored_date": "2014-03-03T18:15:05", + }, + "_index": "git", + }, +] + + +def flatten_doc(d): + src = d["_source"].copy() + del src["commit_repo"] + return {"_index": "flat-git", "_id": d["_id"], "_source": src} + + +FLAT_DATA = [flatten_doc(d) for d in DATA if "routing" in d] + + +def create_test_git_data(d): + src = d["_source"].copy() + return { + "_index": "test-git", + "routing": "elasticsearch-dsl-py", + "_id": d["_id"], + "_source": src, + } + + +TEST_GIT_DATA = [create_test_git_data(d) for d in DATA] diff --git a/tests/test_integration/test_document.py b/tests/test_integration/test_document.py new file mode 100644 index 000000000..14b97a7f7 --- /dev/null +++ b/tests/test_integration/test_document.py @@ -0,0 +1,574 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import warnings +from datetime import datetime +from ipaddress import ip_address + +import pytest +from elasticsearch import ConflictError, NotFoundError +from pytest import raises +from pytz import timezone + +from elasticsearch_dsl import ( + Binary, + Boolean, + Date, + Document, + Double, + InnerDoc, + Ip, + Keyword, + Long, + Mapping, + MetaField, + Nested, + Object, + Q, + RankFeatures, + Text, + analyzer, +) +from elasticsearch_dsl.utils import AttrList + +snowball = analyzer("my_snow", tokenizer="standard", filter=["lowercase", "snowball"]) + + +class User(InnerDoc): + name = Text(fields={"raw": Keyword()}) + + +class Wiki(Document): + owner = Object(User) + views = Long() + ranked = RankFeatures() + + class Index: + name = "test-wiki" + + +class Repository(Document): + owner = Object(User) + created_at = Date() + description = Text(analyzer=snowball) + tags = Keyword() + + @classmethod + def search(cls): + return super(Repository, cls).search().filter("term", commit_repo="repo") + + class Index: + name = "git" + + +class Commit(Document): + committed_date = Date() + authored_date = Date() + description = Text(analyzer=snowball) + + class Index: + name = "flat-git" + + class Meta: + mapping = Mapping() + + +class History(InnerDoc): + timestamp = Date() + diff = Text() + + +class Comment(InnerDoc): + content = Text() + created_at = Date() + author = Object(User) + history = Nested(History) + + class Meta: + dynamic = MetaField(False) + + +class PullRequest(Document): + comments = Nested(Comment) + created_at = Date() + + class Index: + name = "test-prs" + + +class SerializationDoc(Document): + i = Long() + b = Boolean() + d = Double() + bin = Binary() + ip = Ip() + + class Index: + name = "test-serialization" + + +def test_serialization(write_client): + SerializationDoc.init() + write_client.index( + index="test-serialization", + id=42, + body={ + "i": [1, 2, "3", None], + "b": [True, False, "true", "false", None], + "d": [0.1, "-0.1", None], + "bin": ["SGVsbG8gV29ybGQ=", None], + "ip": ["::1", "127.0.0.1", None], + }, + ) + sd = SerializationDoc.get(id=42) + + assert sd.i == [1, 2, 3, None] + assert sd.b == [True, False, True, False, None] + assert sd.d == [0.1, -0.1, None] + assert sd.bin == [b"Hello World", None] + assert sd.ip == [ip_address(u"::1"), ip_address(u"127.0.0.1"), None] + + assert sd.to_dict() == { + "b": [True, False, True, False, None], + "bin": ["SGVsbG8gV29ybGQ=", None], + "d": [0.1, -0.1, None], + "i": [1, 2, 3, None], + "ip": ["::1", "127.0.0.1", None], + } + + +def test_nested_inner_hits_are_wrapped_properly(pull_request): + history_query = Q( + "nested", + path="comments.history", + inner_hits={}, + query=Q("match", comments__history__diff="ahoj"), + ) + s = PullRequest.search().query( + "nested", inner_hits={}, path="comments", query=history_query + ) + + response = s.execute() + pr = response.hits[0] + assert isinstance(pr, PullRequest) + assert isinstance(pr.comments[0], Comment) + assert isinstance(pr.comments[0].history[0], History) + + comment = pr.meta.inner_hits.comments.hits[0] + assert isinstance(comment, Comment) + assert comment.author.name == "honzakral" + assert isinstance(comment.history[0], History) + + history = comment.meta.inner_hits["comments.history"].hits[0] + assert isinstance(history, History) + assert history.timestamp == datetime(2012, 1, 1) + assert "score" in history.meta + + +def test_nested_inner_hits_are_deserialized_properly(pull_request): + s = PullRequest.search().query( + "nested", + inner_hits={}, + path="comments", + query=Q("match", comments__content="hello"), + ) + + response = s.execute() + pr = response.hits[0] + assert isinstance(pr.created_at, datetime) + assert isinstance(pr.comments[0], Comment) + assert isinstance(pr.comments[0].created_at, datetime) + + +def test_nested_top_hits_are_wrapped_properly(pull_request): + s = PullRequest.search() + s.aggs.bucket("comments", "nested", path="comments").metric( + "hits", "top_hits", size=1 + ) + + r = s.execute() + + print(r._d_) + assert isinstance(r.aggregations.comments.hits.hits[0], Comment) + + +def test_update_object_field(write_client): + Wiki.init() + w = Wiki( + owner=User(name="Honza Kral"), + _id="elasticsearch-py", + ranked={"test1": 0.1, "topic2": 0.2}, + ) + w.save() + + assert "updated" == w.update(owner=[{"name": "Honza"}, {"name": "Nick"}]) + assert w.owner[0].name == "Honza" + assert w.owner[1].name == "Nick" + + w = Wiki.get(id="elasticsearch-py") + assert w.owner[0].name == "Honza" + assert w.owner[1].name == "Nick" + + assert w.ranked == {"test1": 0.1, "topic2": 0.2} + + +def test_update_script(write_client): + Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="elasticsearch-py", views=42) + w.save() + + w.update(script="ctx._source.views += params.inc", inc=5) + w = Wiki.get(id="elasticsearch-py") + assert w.views == 47 + + +def test_update_retry_on_conflict(write_client): + Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="elasticsearch-py", views=42) + w.save() + + w1 = Wiki.get(id="elasticsearch-py") + w2 = Wiki.get(id="elasticsearch-py") + w1.update(script="ctx._source.views += params.inc", inc=5, retry_on_conflict=1) + w2.update(script="ctx._source.views += params.inc", inc=5, retry_on_conflict=1) + + w = Wiki.get(id="elasticsearch-py") + assert w.views == 52 + + +@pytest.mark.parametrize("retry_on_conflict", [None, 0]) +def test_update_conflicting_version(write_client, retry_on_conflict): + Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="elasticsearch-py", views=42) + w.save() + + w1 = Wiki.get(id="elasticsearch-py") + w2 = Wiki.get(id="elasticsearch-py") + w1.update(script="ctx._source.views += params.inc", inc=5) + + with raises(ConflictError): + w2.update( + script="ctx._source.views += params.inc", + inc=5, + retry_on_conflict=retry_on_conflict, + ) + + +def test_save_and_update_return_doc_meta(write_client): + Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="elasticsearch-py", views=42) + resp = w.save(return_doc_meta=True) + assert resp["_index"] == "test-wiki" + assert resp["result"] == "created" + assert set(resp.keys()) == { + "_id", + "_index", + "_primary_term", + "_seq_no", + "_shards", + "_type", + "_version", + "result", + } + + resp = w.update( + script="ctx._source.views += params.inc", inc=5, return_doc_meta=True + ) + assert resp["_index"] == "test-wiki" + assert resp["result"] == "updated" + assert set(resp.keys()) == { + "_id", + "_index", + "_primary_term", + "_seq_no", + "_shards", + "_type", + "_version", + "result", + } + + +def test_init(write_client): + Repository.init(index="test-git") + + assert write_client.indices.exists(index="test-git") + + +def test_get_raises_404_on_index_missing(data_client): + with raises(NotFoundError): + Repository.get("elasticsearch-dsl-php", index="not-there") + + +def test_get_raises_404_on_non_existent_id(data_client): + with raises(NotFoundError): + Repository.get("elasticsearch-dsl-php") + + +def test_get_returns_none_if_404_ignored(data_client): + assert None is Repository.get("elasticsearch-dsl-php", ignore=404) + + +def test_get_returns_none_if_404_ignored_and_index_doesnt_exist(data_client): + assert None is Repository.get("42", index="not-there", ignore=404) + + +def test_get(data_client): + elasticsearch_repo = Repository.get("elasticsearch-dsl-py") + + assert isinstance(elasticsearch_repo, Repository) + assert elasticsearch_repo.owner.name == "elasticsearch" + assert datetime(2014, 3, 3) == elasticsearch_repo.created_at + + +def test_exists_return_true(data_client): + assert Repository.exists("elasticsearch-dsl-py") + + +def test_exists_false(data_client): + assert not Repository.exists("elasticsearch-dsl-php") + + +def test_get_with_tz_date(data_client): + first_commit = Commit.get( + id="3ca6e1e73a071a705b4babd2f581c91a2a3e5037", routing="elasticsearch-dsl-py" + ) + + tzinfo = timezone("Europe/Prague") + assert ( + tzinfo.localize(datetime(2014, 5, 2, 13, 47, 19, 123000)) + == first_commit.authored_date + ) + + +def test_save_with_tz_date(data_client): + tzinfo = timezone("Europe/Prague") + first_commit = Commit.get( + id="3ca6e1e73a071a705b4babd2f581c91a2a3e5037", routing="elasticsearch-dsl-py" + ) + first_commit.committed_date = tzinfo.localize( + datetime(2014, 5, 2, 13, 47, 19, 123456) + ) + first_commit.save() + + first_commit = Commit.get( + id="3ca6e1e73a071a705b4babd2f581c91a2a3e5037", routing="elasticsearch-dsl-py" + ) + assert ( + tzinfo.localize(datetime(2014, 5, 2, 13, 47, 19, 123456)) + == first_commit.committed_date + ) + + +COMMIT_DOCS_WITH_MISSING = [ + {"_id": "0"}, # Missing + {"_id": "3ca6e1e73a071a705b4babd2f581c91a2a3e5037"}, # Existing + {"_id": "f"}, # Missing + {"_id": "eb3e543323f189fd7b698e66295427204fff5755"}, # Existing +] + + +def test_mget(data_client): + commits = Commit.mget(COMMIT_DOCS_WITH_MISSING) + assert commits[0] is None + assert commits[1].meta.id == "3ca6e1e73a071a705b4babd2f581c91a2a3e5037" + assert commits[2] is None + assert commits[3].meta.id == "eb3e543323f189fd7b698e66295427204fff5755" + + +def test_mget_raises_exception_when_missing_param_is_invalid(data_client): + with raises(ValueError): + Commit.mget(COMMIT_DOCS_WITH_MISSING, missing="raj") + + +def test_mget_raises_404_when_missing_param_is_raise(data_client): + with raises(NotFoundError): + Commit.mget(COMMIT_DOCS_WITH_MISSING, missing="raise") + + +def test_mget_ignores_missing_docs_when_missing_param_is_skip(data_client): + commits = Commit.mget(COMMIT_DOCS_WITH_MISSING, missing="skip") + assert commits[0].meta.id == "3ca6e1e73a071a705b4babd2f581c91a2a3e5037" + assert commits[1].meta.id == "eb3e543323f189fd7b698e66295427204fff5755" + + +def test_update_works_from_search_response(data_client): + elasticsearch_repo = Repository.search().execute()[0] + + with warnings.catch_warnings(record=True) as w: + elasticsearch_repo.update(owner={"other_name": "elastic"}) + assert [ + str(x.message) for x in w if issubclass(x.category, DeprecationWarning) + ] == [] + assert "elastic" == elasticsearch_repo.owner.other_name + + new_version = Repository.get("elasticsearch-dsl-py") + assert "elastic" == new_version.owner.other_name + assert "elasticsearch" == new_version.owner.name + + +def test_update(data_client): + elasticsearch_repo = Repository.get("elasticsearch-dsl-py") + v = elasticsearch_repo.meta.version + + old_seq_no = elasticsearch_repo.meta.seq_no + elasticsearch_repo.update(owner={"new_name": "elastic"}, new_field="testing-update") + + assert "elastic" == elasticsearch_repo.owner.new_name + assert "testing-update" == elasticsearch_repo.new_field + + # assert version has been updated + assert elasticsearch_repo.meta.version == v + 1 + + new_version = Repository.get("elasticsearch-dsl-py") + assert "testing-update" == new_version.new_field + assert "elastic" == new_version.owner.new_name + assert "elasticsearch" == new_version.owner.name + assert "seq_no" in new_version.meta + assert new_version.meta.seq_no != old_seq_no + assert "primary_term" in new_version.meta + + +def test_save_updates_existing_doc(data_client): + elasticsearch_repo = Repository.get("elasticsearch-dsl-py") + + elasticsearch_repo.new_field = "testing-save" + old_seq_no = elasticsearch_repo.meta.seq_no + + with warnings.catch_warnings(record=True) as w: + assert "updated" == elasticsearch_repo.save() + assert [ + str(x.message) for x in w if issubclass(x.category, DeprecationWarning) + ] == [] + + new_repo = data_client.get(index="git", id="elasticsearch-dsl-py") + assert "testing-save" == new_repo["_source"]["new_field"] + assert new_repo["_seq_no"] != old_seq_no + assert new_repo["_seq_no"] == elasticsearch_repo.meta.seq_no + + +def test_save_automatically_uses_seq_no_and_primary_term(data_client): + elasticsearch_repo = Repository.get("elasticsearch-dsl-py") + elasticsearch_repo.meta.seq_no += 1 + + with raises(ConflictError): + elasticsearch_repo.save() + + +def test_delete_automatically_uses_seq_no_and_primary_term(data_client): + elasticsearch_repo = Repository.get("elasticsearch-dsl-py") + elasticsearch_repo.meta.seq_no += 1 + + with raises(ConflictError): + elasticsearch_repo.delete() + + +def assert_doc_equals(expected, actual): + for f in expected: + assert f in actual + assert actual[f] == expected[f] + + +def test_can_save_to_different_index(write_client): + test_repo = Repository(description="testing", meta={"id": 42}) + assert test_repo.save(index="test-document") + + assert_doc_equals( + { + "found": True, + "_index": "test-document", + "_id": "42", + "_source": {"description": "testing"}, + }, + write_client.get(index="test-document", id=42), + ) + + +def test_save_without_skip_empty_will_include_empty_fields(write_client): + test_repo = Repository(field_1=[], field_2=None, field_3={}, meta={"id": 42}) + assert test_repo.save(index="test-document", skip_empty=False) + + assert_doc_equals( + { + "found": True, + "_index": "test-document", + "_id": "42", + "_source": {"field_1": [], "field_2": None, "field_3": {}}, + }, + write_client.get(index="test-document", id=42), + ) + + +def test_delete(write_client): + write_client.create( + index="test-document", + id="elasticsearch-dsl-py", + body={ + "organization": "elasticsearch", + "created_at": "2014-03-03", + "owner": {"name": "elasticsearch"}, + }, + ) + + test_repo = Repository(meta={"id": "elasticsearch-dsl-py"}) + test_repo.meta.index = "test-document" + test_repo.delete() + + assert not write_client.exists( + index="test-document", + id="elasticsearch-dsl-py", + ) + + +def test_search(data_client): + assert Repository.search().count() == 1 + + +def test_search_returns_proper_doc_classes(data_client): + result = Repository.search().execute() + + elasticsearch_repo = result.hits[0] + + assert isinstance(elasticsearch_repo, Repository) + assert elasticsearch_repo.owner.name == "elasticsearch" + + +def test_refresh_mapping(data_client): + class Commit(Document): + class Index: + name = "git" + + Commit._index.load_mappings() + + assert "stats" in Commit._index._mapping + assert "committer" in Commit._index._mapping + assert "description" in Commit._index._mapping + assert "committed_date" in Commit._index._mapping + assert isinstance(Commit._index._mapping["committed_date"], Date) + + +def test_highlight_in_meta(data_client): + commit = ( + Commit.search() + .query("match", description="inverting") + .highlight("description") + .execute()[0] + ) + + assert isinstance(commit, Commit) + assert "description" in commit.meta.highlight + assert isinstance(commit.meta.highlight["description"], AttrList) + assert len(commit.meta.highlight["description"]) > 0 diff --git a/tests/test_integration/test_examples/__init__.py b/tests/test_integration/test_examples/__init__.py new file mode 100644 index 000000000..2a87d183f --- /dev/null +++ b/tests/test_integration/test_examples/__init__.py @@ -0,0 +1,16 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/test_elasticsearch_dsl/test_integration/test_examples/alias_migration.py b/tests/test_integration/test_examples/alias_migration.py similarity index 100% rename from test_elasticsearch_dsl/test_integration/test_examples/alias_migration.py rename to tests/test_integration/test_examples/alias_migration.py diff --git a/test_elasticsearch_dsl/test_integration/test_examples/completion.py b/tests/test_integration/test_examples/completion.py similarity index 100% rename from test_elasticsearch_dsl/test_integration/test_examples/completion.py rename to tests/test_integration/test_examples/completion.py diff --git a/test_elasticsearch_dsl/test_integration/test_examples/composite_agg.py b/tests/test_integration/test_examples/composite_agg.py similarity index 100% rename from test_elasticsearch_dsl/test_integration/test_examples/composite_agg.py rename to tests/test_integration/test_examples/composite_agg.py diff --git a/test_elasticsearch_dsl/test_integration/test_examples/parent_child.py b/tests/test_integration/test_examples/parent_child.py similarity index 100% rename from test_elasticsearch_dsl/test_integration/test_examples/parent_child.py rename to tests/test_integration/test_examples/parent_child.py diff --git a/test_elasticsearch_dsl/test_integration/test_examples/percolate.py b/tests/test_integration/test_examples/percolate.py similarity index 100% rename from test_elasticsearch_dsl/test_integration/test_examples/percolate.py rename to tests/test_integration/test_examples/percolate.py diff --git a/test_elasticsearch_dsl/test_integration/test_examples/test_alias_migration.py b/tests/test_integration/test_examples/test_alias_migration.py similarity index 55% rename from test_elasticsearch_dsl/test_integration/test_examples/test_alias_migration.py rename to tests/test_integration/test_examples/test_alias_migration.py index 9cc257292..611fc6911 100644 --- a/test_elasticsearch_dsl/test_integration/test_examples/test_alias_migration.py +++ b/tests/test_integration/test_examples/test_alias_migration.py @@ -1,5 +1,23 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from . import alias_migration -from .alias_migration import BlogPost, PATTERN, ALIAS, migrate +from .alias_migration import ALIAS, PATTERN, BlogPost, migrate + def test_alias_migration(write_client): # create the index @@ -17,9 +35,9 @@ def test_alias_migration(write_client): # which means we can now save a document bp = BlogPost( _id=0, - title='Hello World!', - tags = ['testing', 'dummy'], - content=open(__file__).read() + title="Hello World!", + tags=["testing", "dummy"], + content=open(__file__).read(), ) bp.save(refresh=True) @@ -29,7 +47,7 @@ def test_alias_migration(write_client): bp = BlogPost.search().execute()[0] assert isinstance(bp, BlogPost) assert not bp.is_published() - assert '0' == bp.meta.id + assert "0" == bp.meta.id # create new index migrate() @@ -46,4 +64,4 @@ def test_alias_migration(write_client): # _matches work which means we get BlogPost instance bp = BlogPost.search().execute()[0] assert isinstance(bp, BlogPost) - assert '0' == bp.meta.id + assert "0" == bp.meta.id diff --git a/tests/test_integration/test_examples/test_completion.py b/tests/test_integration/test_examples/test_completion.py new file mode 100644 index 000000000..42aef0db2 --- /dev/null +++ b/tests/test_integration/test_examples/test_completion.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import unicode_literals + +from .completion import Person + + +def test_person_suggests_on_all_variants_of_name(write_client): + Person.init(using=write_client) + + Person(name="Honza Král", popularity=42).save(refresh=True) + + s = Person.search().suggest("t", "kra", completion={"field": "suggest"}) + response = s.execute() + + opts = response.suggest.t[0].options + + assert 1 == len(opts) + assert opts[0]._score == 42 + assert opts[0]._source.name == "Honza Král" diff --git a/tests/test_integration/test_examples/test_composite_aggs.py b/tests/test_integration/test_examples/test_composite_aggs.py new file mode 100644 index 000000000..16ad9b17f --- /dev/null +++ b/tests/test_integration/test_examples/test_composite_aggs.py @@ -0,0 +1,43 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from elasticsearch_dsl import A, Search + +from .composite_agg import scan_aggs + + +def test_scan_aggs_exhausts_all_files(data_client): + s = Search(index="flat-git") + key_aggs = {"files": A("terms", field="files")} + file_list = list(scan_aggs(s, key_aggs)) + + assert len(file_list) == 26 + + +def test_scan_aggs_with_multiple_aggs(data_client): + s = Search(index="flat-git") + key_aggs = [ + {"files": A("terms", field="files")}, + { + "months": { + "date_histogram": {"field": "committed_date", "interval": "month"} + } + }, + ] + file_list = list(scan_aggs(s, key_aggs)) + + assert len(file_list) == 47 diff --git a/tests/test_integration/test_examples/test_parent_child.py b/tests/test_integration/test_examples/test_parent_child.py new file mode 100644 index 000000000..fa2a3ab5e --- /dev/null +++ b/tests/test_integration/test_examples/test_parent_child.py @@ -0,0 +1,105 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datetime import datetime + +from pytest import fixture + +from elasticsearch_dsl import Q + +from .parent_child import Answer, Comment, Question, User, setup + +honza = User( + id=42, + signed_up=datetime(2013, 4, 3), + username="honzakral", + email="honza@elastic.co", + location="Prague", +) + +nick = User( + id=47, + signed_up=datetime(2017, 4, 3), + username="fxdgear", + email="nick.lang@elastic.co", + location="Colorado", +) + + +@fixture +def question(write_client): + setup() + assert write_client.indices.exists_template(name="base") + + # create a question object + q = Question( + _id=1, + author=nick, + tags=["elasticsearch", "python"], + title="How do I use elasticsearch from Python?", + body=""" + I want to use elasticsearch, how do I do it from Python? + """, + ) + q.save() + return q + + +def test_comment(write_client, question): + question.add_comment(nick, "Just use elasticsearch-py") + + q = Question.get(1) + assert isinstance(q, Question) + assert 1 == len(q.comments) + + c = q.comments[0] + assert isinstance(c, Comment) + assert c.author.username == "fxdgear" + + +def test_question_answer(write_client, question): + a = question.add_answer(honza, "Just use `elasticsearch-py`!") + + assert isinstance(a, Answer) + + # refresh the index so we can search right away + Question._index.refresh() + + # we can now fetch answers from elasticsearch + answers = question.get_answers() + assert 1 == len(answers) + assert isinstance(answers[0], Answer) + + search = Question.search().query( + "has_child", + type="answer", + inner_hits={}, + query=Q("term", author__username__keyword="honzakral"), + ) + response = search.execute() + + assert 1 == len(response.hits) + + q = response.hits[0] + assert isinstance(q, Question) + assert 1 == len(q.meta.inner_hits.answer.hits) + assert q.meta.inner_hits.answer.hits is q.get_answers() + + a = q.meta.inner_hits.answer.hits[0] + assert isinstance(a, Answer) + assert isinstance(a.question, Question) + assert a.question.meta.id == "1" diff --git a/tests/test_integration/test_examples/test_percolate.py b/tests/test_integration/test_examples/test_percolate.py new file mode 100644 index 000000000..30fcf972b --- /dev/null +++ b/tests/test_integration/test_examples/test_percolate.py @@ -0,0 +1,31 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from .percolate import BlogPost, setup + + +def test_post_gets_tagged_automatically(write_client): + setup() + + bp = BlogPost(_id=47, content="nothing about snakes here!") + bp_py = BlogPost(_id=42, content="something about Python here!") + + bp.save() + bp_py.save() + + assert [] == bp.tags + assert {"programming", "development", "python"} == set(bp_py.tags) diff --git a/tests/test_integration/test_faceted_search.py b/tests/test_integration/test_faceted_search.py new file mode 100644 index 000000000..de0a2b311 --- /dev/null +++ b/tests/test_integration/test_faceted_search.py @@ -0,0 +1,280 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datetime import datetime + +import pytest + +from elasticsearch_dsl import A, Boolean, Date, Document, Keyword +from elasticsearch_dsl.faceted_search import ( + DateHistogramFacet, + FacetedSearch, + NestedFacet, + RangeFacet, + TermsFacet, +) + +from .test_document import PullRequest + + +class Repos(Document): + is_public = Boolean() + created_at = Date() + + class Index: + name = "git" + + +class Commit(Document): + files = Keyword() + committed_date = Date() + + class Index: + name = "git" + + +class MetricSearch(FacetedSearch): + index = "git" + doc_types = [Commit] + + facets = { + "files": TermsFacet(field="files", metric=A("max", field="committed_date")), + } + + +@pytest.fixture(scope="session") +def commit_search_cls(es_version): + if es_version >= (7, 2): + interval_kwargs = {"fixed_interval": "1d"} + else: + interval_kwargs = {"interval": "day"} + + class CommitSearch(FacetedSearch): + index = "flat-git" + fields = ( + "description", + "files", + ) + + facets = { + "files": TermsFacet(field="files"), + "frequency": DateHistogramFacet( + field="authored_date", min_doc_count=1, **interval_kwargs + ), + "deletions": RangeFacet( + field="stats.deletions", + ranges=[("ok", (None, 1)), ("good", (1, 5)), ("better", (5, None))], + ), + } + + return CommitSearch + + +@pytest.fixture(scope="session") +def repo_search_cls(es_version): + interval_type = "calendar_interval" if es_version >= (7, 2) else "interval" + + class RepoSearch(FacetedSearch): + index = "git" + doc_types = [Repos] + facets = { + "public": TermsFacet(field="is_public"), + "created": DateHistogramFacet( + field="created_at", **{interval_type: "month"} + ), + } + + def search(self): + s = super(RepoSearch, self).search() + return s.filter("term", commit_repo="repo") + + return RepoSearch + + +@pytest.fixture(scope="session") +def pr_search_cls(es_version): + interval_type = "calendar_interval" if es_version >= (7, 2) else "interval" + + class PRSearch(FacetedSearch): + index = "test-prs" + doc_types = [PullRequest] + facets = { + "comments": NestedFacet( + "comments", + DateHistogramFacet( + field="comments.created_at", **{interval_type: "month"} + ), + ) + } + + return PRSearch + + +def test_facet_with_custom_metric(data_client): + ms = MetricSearch() + r = ms.execute() + + dates = [f[1] for f in r.facets.files] + assert dates == list(sorted(dates, reverse=True)) + assert dates[0] == 1399038439000 + + +def test_nested_facet(pull_request, pr_search_cls): + prs = pr_search_cls() + r = prs.execute() + + assert r.hits.total.value == 1 + assert [(datetime(2018, 1, 1, 0, 0), 1, False)] == r.facets.comments + + +def test_nested_facet_with_filter(pull_request, pr_search_cls): + prs = pr_search_cls(filters={"comments": datetime(2018, 1, 1, 0, 0)}) + r = prs.execute() + + assert r.hits.total.value == 1 + assert [(datetime(2018, 1, 1, 0, 0), 1, True)] == r.facets.comments + + prs = pr_search_cls(filters={"comments": datetime(2018, 2, 1, 0, 0)}) + r = prs.execute() + assert not r.hits + + +def test_datehistogram_facet(data_client, repo_search_cls): + rs = repo_search_cls() + r = rs.execute() + + assert r.hits.total.value == 1 + assert [(datetime(2014, 3, 1, 0, 0), 1, False)] == r.facets.created + + +def test_boolean_facet(data_client, repo_search_cls): + rs = repo_search_cls() + r = rs.execute() + + assert r.hits.total.value == 1 + assert [(True, 1, False)] == r.facets.public + value, count, selected = r.facets.public[0] + assert value is True + + +def test_empty_search_finds_everything(data_client, es_version, commit_search_cls): + cs = commit_search_cls() + r = cs.execute() + + assert r.hits.total.value == 52 + assert [ + ("elasticsearch_dsl", 40, False), + ("test_elasticsearch_dsl", 35, False), + ("elasticsearch_dsl/query.py", 19, False), + ("test_elasticsearch_dsl/test_search.py", 15, False), + ("elasticsearch_dsl/utils.py", 14, False), + ("test_elasticsearch_dsl/test_query.py", 13, False), + ("elasticsearch_dsl/search.py", 12, False), + ("elasticsearch_dsl/aggs.py", 11, False), + ("test_elasticsearch_dsl/test_result.py", 5, False), + ("elasticsearch_dsl/result.py", 3, False), + ] == r.facets.files + + assert [ + (datetime(2014, 3, 3, 0, 0), 2, False), + (datetime(2014, 3, 4, 0, 0), 1, False), + (datetime(2014, 3, 5, 0, 0), 3, False), + (datetime(2014, 3, 6, 0, 0), 3, False), + (datetime(2014, 3, 7, 0, 0), 9, False), + (datetime(2014, 3, 10, 0, 0), 2, False), + (datetime(2014, 3, 15, 0, 0), 4, False), + (datetime(2014, 3, 21, 0, 0), 2, False), + (datetime(2014, 3, 23, 0, 0), 2, False), + (datetime(2014, 3, 24, 0, 0), 10, False), + (datetime(2014, 4, 20, 0, 0), 2, False), + (datetime(2014, 4, 22, 0, 0), 2, False), + (datetime(2014, 4, 25, 0, 0), 3, False), + (datetime(2014, 4, 26, 0, 0), 2, False), + (datetime(2014, 4, 27, 0, 0), 2, False), + (datetime(2014, 5, 1, 0, 0), 2, False), + (datetime(2014, 5, 2, 0, 0), 1, False), + ] == r.facets.frequency + + assert [ + ("ok", 19, False), + ("good", 14, False), + ("better", 19, False), + ] == r.facets.deletions + + +def test_term_filters_are_shown_as_selected_and_data_is_filtered( + data_client, commit_search_cls +): + cs = commit_search_cls(filters={"files": "test_elasticsearch_dsl"}) + + r = cs.execute() + + assert 35 == r.hits.total.value + assert [ + ("elasticsearch_dsl", 40, False), + ("test_elasticsearch_dsl", 35, True), # selected + ("elasticsearch_dsl/query.py", 19, False), + ("test_elasticsearch_dsl/test_search.py", 15, False), + ("elasticsearch_dsl/utils.py", 14, False), + ("test_elasticsearch_dsl/test_query.py", 13, False), + ("elasticsearch_dsl/search.py", 12, False), + ("elasticsearch_dsl/aggs.py", 11, False), + ("test_elasticsearch_dsl/test_result.py", 5, False), + ("elasticsearch_dsl/result.py", 3, False), + ] == r.facets.files + + assert [ + (datetime(2014, 3, 3, 0, 0), 1, False), + (datetime(2014, 3, 5, 0, 0), 2, False), + (datetime(2014, 3, 6, 0, 0), 3, False), + (datetime(2014, 3, 7, 0, 0), 6, False), + (datetime(2014, 3, 10, 0, 0), 1, False), + (datetime(2014, 3, 15, 0, 0), 3, False), + (datetime(2014, 3, 21, 0, 0), 2, False), + (datetime(2014, 3, 23, 0, 0), 1, False), + (datetime(2014, 3, 24, 0, 0), 7, False), + (datetime(2014, 4, 20, 0, 0), 1, False), + (datetime(2014, 4, 25, 0, 0), 3, False), + (datetime(2014, 4, 26, 0, 0), 2, False), + (datetime(2014, 4, 27, 0, 0), 1, False), + (datetime(2014, 5, 1, 0, 0), 1, False), + (datetime(2014, 5, 2, 0, 0), 1, False), + ] == r.facets.frequency + + assert [ + ("ok", 12, False), + ("good", 10, False), + ("better", 13, False), + ] == r.facets.deletions + + +def test_range_filters_are_shown_as_selected_and_data_is_filtered( + data_client, commit_search_cls +): + cs = commit_search_cls(filters={"deletions": "better"}) + + r = cs.execute() + + assert 19 == r.hits.total.value + + +def test_pagination(data_client, commit_search_cls): + cs = commit_search_cls() + cs = cs[0:20] + + assert 52 == cs.count() + assert 20 == len(cs.execute()) diff --git a/tests/test_integration/test_index.py b/tests/test_integration/test_index.py new file mode 100644 index 000000000..ec7a23b0c --- /dev/null +++ b/tests/test_integration/test_index.py @@ -0,0 +1,119 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import warnings + +from elasticsearch_dsl import Date, Document, Index, IndexTemplate, Text, analysis + + +class Post(Document): + title = Text(analyzer=analysis.analyzer("my_analyzer", tokenizer="keyword")) + published_from = Date() + + +def test_index_template_works(write_client): + it = IndexTemplate("test-template", "test-*") + it.document(Post) + it.settings(number_of_replicas=0, number_of_shards=1) + it.save() + + i = Index("test-blog") + i.create() + + assert { + "test-blog": { + "mappings": { + "properties": { + "title": {"type": "text", "analyzer": "my_analyzer"}, + "published_from": {"type": "date"}, + } + } + } + } == write_client.indices.get_mapping(index="test-blog") + + +def test_index_can_be_saved_even_with_settings(write_client): + i = Index("test-blog", using=write_client) + i.settings(number_of_shards=3, number_of_replicas=0) + i.save() + i.settings(number_of_replicas=1) + i.save() + + assert ( + "1" == i.get_settings()["test-blog"]["settings"]["index"]["number_of_replicas"] + ) + + +def test_index_exists(data_client): + assert Index("git").exists() + assert not Index("not-there").exists() + + +def test_index_can_be_created_with_settings_and_mappings(write_client): + i = Index("test-blog", using=write_client) + i.document(Post) + i.settings(number_of_replicas=0, number_of_shards=1) + + with warnings.catch_warnings(record=True) as w: + i.create() + assert [ + str(x.message) for x in w if issubclass(x.category, DeprecationWarning) + ] == [] + + assert { + "test-blog": { + "mappings": { + "properties": { + "title": {"type": "text", "analyzer": "my_analyzer"}, + "published_from": {"type": "date"}, + } + } + } + } == write_client.indices.get_mapping(index="test-blog") + + settings = write_client.indices.get_settings(index="test-blog") + assert settings["test-blog"]["settings"]["index"]["number_of_replicas"] == "0" + assert settings["test-blog"]["settings"]["index"]["number_of_shards"] == "1" + assert settings["test-blog"]["settings"]["index"]["analysis"] == { + "analyzer": {"my_analyzer": {"type": "custom", "tokenizer": "keyword"}} + } + + +def test_delete(write_client): + write_client.indices.create( + index="test-index", + body={"settings": {"number_of_replicas": 0, "number_of_shards": 1}}, + ) + + i = Index("test-index", using=write_client) + i.delete() + assert not write_client.indices.exists(index="test-index") + + +def test_multiple_indices_with_same_doc_type_work(write_client): + i1 = Index("test-index-1", using=write_client) + i2 = Index("test-index-2", using=write_client) + + for i in (i1, i2): + i.document(Post) + i.create() + + for i in ("test-index-1", "test-index-2"): + settings = write_client.indices.get_settings(index=i) + assert settings[i]["settings"]["index"]["analysis"] == { + "analyzer": {"my_analyzer": {"type": "custom", "tokenizer": "keyword"}} + } diff --git a/tests/test_integration/test_mapping.py b/tests/test_integration/test_mapping.py new file mode 100644 index 000000000..ff266a777 --- /dev/null +++ b/tests/test_integration/test_mapping.py @@ -0,0 +1,157 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pytest import raises + +from elasticsearch_dsl import analysis, exceptions, mapping + + +def test_mapping_saved_into_es(write_client): + m = mapping.Mapping() + m.field( + "name", "text", analyzer=analysis.analyzer("my_analyzer", tokenizer="keyword") + ) + m.field("tags", "keyword") + m.save("test-mapping", using=write_client) + + assert { + "test-mapping": { + "mappings": { + "properties": { + "name": {"type": "text", "analyzer": "my_analyzer"}, + "tags": {"type": "keyword"}, + } + } + } + } == write_client.indices.get_mapping(index="test-mapping") + + +def test_mapping_saved_into_es_when_index_already_exists_closed(write_client): + m = mapping.Mapping() + m.field( + "name", "text", analyzer=analysis.analyzer("my_analyzer", tokenizer="keyword") + ) + write_client.indices.create(index="test-mapping") + + with raises(exceptions.IllegalOperation): + m.save("test-mapping", using=write_client) + + write_client.cluster.health(index="test-mapping", wait_for_status="yellow") + write_client.indices.close(index="test-mapping") + m.save("test-mapping", using=write_client) + + assert { + "test-mapping": { + "mappings": { + "properties": {"name": {"type": "text", "analyzer": "my_analyzer"}} + } + } + } == write_client.indices.get_mapping(index="test-mapping") + + +def test_mapping_saved_into_es_when_index_already_exists_with_analysis(write_client): + m = mapping.Mapping() + analyzer = analysis.analyzer("my_analyzer", tokenizer="keyword") + m.field("name", "text", analyzer=analyzer) + + new_analysis = analyzer.get_analysis_definition() + new_analysis["analyzer"]["other_analyzer"] = { + "type": "custom", + "tokenizer": "whitespace", + } + write_client.indices.create( + index="test-mapping", body={"settings": {"analysis": new_analysis}} + ) + + m.field("title", "text", analyzer=analyzer) + m.save("test-mapping", using=write_client) + + assert { + "test-mapping": { + "mappings": { + "properties": { + "name": {"type": "text", "analyzer": "my_analyzer"}, + "title": {"type": "text", "analyzer": "my_analyzer"}, + } + } + } + } == write_client.indices.get_mapping(index="test-mapping") + + +def test_mapping_gets_updated_from_es(write_client): + write_client.indices.create( + index="test-mapping", + body={ + "settings": {"number_of_shards": 1, "number_of_replicas": 0}, + "mappings": { + "date_detection": False, + "properties": { + "title": { + "type": "text", + "analyzer": "snowball", + "fields": {"raw": {"type": "keyword"}}, + }, + "created_at": {"type": "date"}, + "comments": { + "type": "nested", + "properties": { + "created": {"type": "date"}, + "author": { + "type": "text", + "analyzer": "snowball", + "fields": {"raw": {"type": "keyword"}}, + }, + }, + }, + }, + }, + }, + ) + + m = mapping.Mapping.from_es("test-mapping", using=write_client) + + assert ["comments", "created_at", "title"] == list( + sorted(m.properties.properties._d_.keys()) + ) + assert { + "date_detection": False, + "properties": { + "comments": { + "type": "nested", + "properties": { + "created": {"type": "date"}, + "author": { + "analyzer": "snowball", + "fields": {"raw": {"type": "keyword"}}, + "type": "text", + }, + }, + }, + "created_at": {"type": "date"}, + "title": { + "analyzer": "snowball", + "fields": {"raw": {"type": "keyword"}}, + "type": "text", + }, + }, + } == m.to_dict() + + # test same with alias + write_client.indices.put_alias(index="test-mapping", name="test-alias") + + m2 = mapping.Mapping.from_es("test-alias", using=write_client) + assert m2.to_dict() == m.to_dict() diff --git a/tests/test_integration/test_search.py b/tests/test_integration/test_search.py new file mode 100644 index 000000000..0a338171c --- /dev/null +++ b/tests/test_integration/test_search.py @@ -0,0 +1,184 @@ +# -*- coding: utf-8 -*- +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import unicode_literals + +import warnings + +from elasticsearch import TransportError +from pytest import raises + +from elasticsearch_dsl import Date, Document, Keyword, MultiSearch, Q, Search, Text +from elasticsearch_dsl.response import aggs + +from .test_data import FLAT_DATA + + +class Repository(Document): + created_at = Date() + description = Text(analyzer="snowball") + tags = Keyword() + + @classmethod + def search(cls): + return super(Repository, cls).search().filter("term", commit_repo="repo") + + class Index: + name = "git" + + +class Commit(Document): + class Index: + name = "flat-git" + + +def test_filters_aggregation_buckets_are_accessible(data_client): + has_tests_query = Q("term", files="test_elasticsearch_dsl") + s = Commit.search()[0:0] + s.aggs.bucket("top_authors", "terms", field="author.name.raw").bucket( + "has_tests", "filters", filters={"yes": has_tests_query, "no": ~has_tests_query} + ).metric("lines", "stats", field="stats.lines") + response = s.execute() + + assert isinstance( + response.aggregations.top_authors.buckets[0].has_tests.buckets.yes, aggs.Bucket + ) + assert ( + 35 + == response.aggregations.top_authors.buckets[0].has_tests.buckets.yes.doc_count + ) + assert ( + 228 + == response.aggregations.top_authors.buckets[0].has_tests.buckets.yes.lines.max + ) + + +def test_top_hits_are_wrapped_in_response(data_client): + s = Commit.search()[0:0] + s.aggs.bucket("top_authors", "terms", field="author.name.raw").metric( + "top_commits", "top_hits", size=5 + ) + response = s.execute() + + top_commits = response.aggregations.top_authors.buckets[0].top_commits + assert isinstance(top_commits, aggs.TopHitsData) + assert 5 == len(top_commits) + + hits = [h for h in top_commits] + assert 5 == len(hits) + assert isinstance(hits[0], Commit) + + +def test_inner_hits_are_wrapped_in_response(data_client): + s = Search(index="git")[0:1].query( + "has_parent", parent_type="repo", inner_hits={}, query=Q("match_all") + ) + response = s.execute() + + commit = response.hits[0] + assert isinstance(commit.meta.inner_hits.repo, response.__class__) + assert repr(commit.meta.inner_hits.repo[0]).startswith( + " 0 + assert not response.timed_out + assert response.updated == 52 + assert response.deleted == 0 + assert response.took > 0 + assert response.success() + + +def test_update_by_query_with_script(write_client, setup_ubq_tests): + index = setup_ubq_tests + + ubq = ( + UpdateByQuery(using=write_client) + .index(index) + .filter(~Q("exists", field="parent_shas")) + .script(source="ctx._source.is_public = false") + ) + ubq = ubq.params(conflicts="proceed") + + response = ubq.execute() + assert response.total == 2 + assert response.updated == 2 + assert response.version_conflicts == 0 + + +def test_delete_by_query_with_script(write_client, setup_ubq_tests): + index = setup_ubq_tests + + ubq = ( + UpdateByQuery(using=write_client) + .index(index) + .filter(Q("match", parent_shas="1dd19210b5be92b960f7db6f66ae526288edccc3")) + .script(source='ctx.op = "delete"') + ) + ubq = ubq.params(conflicts="proceed") + + response = ubq.execute() + + assert response.total == 1 + assert response.deleted == 1 + assert response.success() diff --git a/tests/test_mapping.py b/tests/test_mapping.py new file mode 100644 index 000000000..aa4939fbc --- /dev/null +++ b/tests/test_mapping.py @@ -0,0 +1,222 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import json + +from elasticsearch_dsl import Keyword, Nested, Text, analysis, mapping + + +def test_mapping_can_has_fields(): + m = mapping.Mapping() + m.field("name", "text").field("tags", "keyword") + + assert { + "properties": {"name": {"type": "text"}, "tags": {"type": "keyword"}} + } == m.to_dict() + + +def test_mapping_update_is_recursive(): + m1 = mapping.Mapping() + m1.field("title", "text") + m1.field("author", "object") + m1.field("author", "object", properties={"name": {"type": "text"}}) + m1.meta("_all", enabled=False) + m1.meta("dynamic", False) + + m2 = mapping.Mapping() + m2.field("published_from", "date") + m2.field("author", "object", properties={"email": {"type": "text"}}) + m2.field("title", "text") + m2.field("lang", "keyword") + m2.meta("_analyzer", path="lang") + + m1.update(m2, update_only=True) + + assert { + "_all": {"enabled": False}, + "_analyzer": {"path": "lang"}, + "dynamic": False, + "properties": { + "published_from": {"type": "date"}, + "title": {"type": "text"}, + "lang": {"type": "keyword"}, + "author": { + "type": "object", + "properties": {"name": {"type": "text"}, "email": {"type": "text"}}, + }, + }, + } == m1.to_dict() + + +def test_properties_can_iterate_over_all_the_fields(): + m = mapping.Mapping() + m.field("f1", "text", test_attr="f1", fields={"f2": Keyword(test_attr="f2")}) + m.field("f3", Nested(test_attr="f3", properties={"f4": Text(test_attr="f4")})) + + assert {"f1", "f2", "f3", "f4"} == { + f.test_attr for f in m.properties._collect_fields() + } + + +def test_mapping_can_collect_all_analyzers_and_normalizers(): + a1 = analysis.analyzer( + "my_analyzer1", + tokenizer="keyword", + filter=[ + "lowercase", + analysis.token_filter("my_filter1", "stop", stopwords=["a", "b"]), + ], + ) + a2 = analysis.analyzer("english") + a3 = analysis.analyzer("unknown_custom") + a4 = analysis.analyzer( + "my_analyzer2", + tokenizer=analysis.tokenizer("trigram", "nGram", min_gram=3, max_gram=3), + filter=[analysis.token_filter("my_filter2", "stop", stopwords=["c", "d"])], + ) + a5 = analysis.analyzer("my_analyzer3", tokenizer="keyword") + n1 = analysis.normalizer("my_normalizer1", filter=["lowercase"]) + n2 = analysis.normalizer( + "my_normalizer2", + filter=[ + "my_filter1", + "my_filter2", + analysis.token_filter("my_filter3", "stop", stopwords=["e", "f"]), + ], + ) + n3 = analysis.normalizer("unknown_custom") + + m = mapping.Mapping() + m.field( + "title", + "text", + analyzer=a1, + fields={"english": Text(analyzer=a2), "unknown": Keyword(search_analyzer=a3)}, + ) + m.field("comments", Nested(properties={"author": Text(analyzer=a4)})) + m.field("normalized_title", "keyword", normalizer=n1) + m.field("normalized_comment", "keyword", normalizer=n2) + m.field("unknown", "keyword", normalizer=n3) + m.meta("_all", analyzer=a5) + + assert { + "analyzer": { + "my_analyzer1": { + "filter": ["lowercase", "my_filter1"], + "tokenizer": "keyword", + "type": "custom", + }, + "my_analyzer2": { + "filter": ["my_filter2"], + "tokenizer": "trigram", + "type": "custom", + }, + "my_analyzer3": {"tokenizer": "keyword", "type": "custom"}, + }, + "normalizer": { + "my_normalizer1": {"filter": ["lowercase"], "type": "custom"}, + "my_normalizer2": { + "filter": ["my_filter1", "my_filter2", "my_filter3"], + "type": "custom", + }, + }, + "filter": { + "my_filter1": {"stopwords": ["a", "b"], "type": "stop"}, + "my_filter2": {"stopwords": ["c", "d"], "type": "stop"}, + "my_filter3": {"stopwords": ["e", "f"], "type": "stop"}, + }, + "tokenizer": {"trigram": {"max_gram": 3, "min_gram": 3, "type": "nGram"}}, + } == m._collect_analysis() + + assert json.loads(json.dumps(m.to_dict())) == m.to_dict() + + +def test_mapping_can_collect_multiple_analyzers(): + a1 = analysis.analyzer( + "my_analyzer1", + tokenizer="keyword", + filter=[ + "lowercase", + analysis.token_filter("my_filter1", "stop", stopwords=["a", "b"]), + ], + ) + a2 = analysis.analyzer( + "my_analyzer2", + tokenizer=analysis.tokenizer("trigram", "nGram", min_gram=3, max_gram=3), + filter=[analysis.token_filter("my_filter2", "stop", stopwords=["c", "d"])], + ) + m = mapping.Mapping() + m.field("title", "text", analyzer=a1, search_analyzer=a2) + m.field( + "text", + "text", + analyzer=a1, + fields={ + "english": Text(analyzer=a1), + "unknown": Keyword(analyzer=a1, search_analyzer=a2), + }, + ) + assert { + "analyzer": { + "my_analyzer1": { + "filter": ["lowercase", "my_filter1"], + "tokenizer": "keyword", + "type": "custom", + }, + "my_analyzer2": { + "filter": ["my_filter2"], + "tokenizer": "trigram", + "type": "custom", + }, + }, + "filter": { + "my_filter1": {"stopwords": ["a", "b"], "type": "stop"}, + "my_filter2": {"stopwords": ["c", "d"], "type": "stop"}, + }, + "tokenizer": {"trigram": {"max_gram": 3, "min_gram": 3, "type": "nGram"}}, + } == m._collect_analysis() + + +def test_even_non_custom_analyzers_can_have_params(): + a1 = analysis.analyzer("whitespace", type="pattern", pattern=r"\\s+") + m = mapping.Mapping() + m.field("title", "text", analyzer=a1) + + assert { + "analyzer": {"whitespace": {"type": "pattern", "pattern": r"\\s+"}} + } == m._collect_analysis() + + +def test_resolve_field_can_resolve_multifields(): + m = mapping.Mapping() + m.field("title", "text", fields={"keyword": Keyword()}) + + assert isinstance(m.resolve_field("title.keyword"), Keyword) + + +def test_resolve_nested(): + m = mapping.Mapping() + m.field("n1", "nested", properties={"n2": Nested(properties={"k1": Keyword()})}) + m.field("k2", "keyword") + + nested, field = m.resolve_nested("n1.n2.k1") + assert nested == ["n1", "n1.n2"] + assert isinstance(field, Keyword) + + nested, field = m.resolve_nested("k2") + assert nested == [] + assert isinstance(field, Keyword) diff --git a/tests/test_package.py b/tests/test_package.py new file mode 100644 index 000000000..8f8075dc0 --- /dev/null +++ b/tests/test_package.py @@ -0,0 +1,22 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import elasticsearch_dsl + + +def test__all__is_sorted(): + assert elasticsearch_dsl.__all__ == sorted(elasticsearch_dsl.__all__) diff --git a/tests/test_query.py b/tests/test_query.py new file mode 100644 index 000000000..2c9823eff --- /dev/null +++ b/tests/test_query.py @@ -0,0 +1,555 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pytest import raises + +from elasticsearch_dsl import function, query + + +def test_empty_Q_is_match_all(): + q = query.Q() + + assert isinstance(q, query.MatchAll) + assert query.MatchAll() == q + + +def test_match_to_dict(): + assert {"match": {"f": "value"}} == query.Match(f="value").to_dict() + + +def test_match_to_dict_extra(): + assert {"match": {"f": "value", "boost": 2}} == query.Match( + f="value", boost=2 + ).to_dict() + + +def test_fuzzy_to_dict(): + assert {"fuzzy": {"f": "value"}} == query.Fuzzy(f="value").to_dict() + + +def test_prefix_to_dict(): + assert {"prefix": {"f": "value"}} == query.Prefix(f="value").to_dict() + + +def test_term_to_dict(): + assert {"term": {"_type": "article"}} == query.Term(_type="article").to_dict() + + +def test_bool_to_dict(): + bool = query.Bool(must=[query.Match(f="value")], should=[]) + + assert {"bool": {"must": [{"match": {"f": "value"}}]}} == bool.to_dict() + + +def test_dismax_to_dict(): + assert {"dis_max": {"queries": [{"term": {"_type": "article"}}]}} == query.DisMax( + queries=[query.Term(_type="article")] + ).to_dict() + + +def test_bool_from_dict_issue_318(): + d = {"bool": {"must_not": {"match": {"field": "value"}}}} + q = query.Q(d) + + assert q == ~query.Match(field="value") + + +def test_repr(): + bool = query.Bool(must=[query.Match(f="value")], should=[]) + + assert "Bool(must=[Match(f='value')])" == repr(bool) + + +def test_query_clone(): + bool = query.Bool( + must=[query.Match(x=42)], + should=[query.Match(g="v2")], + must_not=[query.Match(title="value")], + ) + bool_clone = bool._clone() + + assert bool == bool_clone + assert bool is not bool_clone + + +def test_bool_converts_its_init_args_to_queries(): + q = query.Bool(must=[{"match": {"f": "value"}}]) + + assert len(q.must) == 1 + assert q.must[0] == query.Match(f="value") + + +def test_two_queries_make_a_bool(): + q1 = query.Match(f="value1") + q2 = query.Match(message={"query": "this is a test", "opeartor": "and"}) + q = q1 & q2 + + assert isinstance(q, query.Bool) + assert [q1, q2] == q.must + + +def test_other_and_bool_appends_other_to_must(): + q1 = query.Match(f="value1") + qb = query.Bool() + + q = q1 & qb + assert q is not qb + assert q.must[0] == q1 + + +def test_bool_and_other_appends_other_to_must(): + q1 = query.Match(f="value1") + qb = query.Bool() + + q = qb & q1 + assert q is not qb + assert q.must[0] == q1 + + +def test_bool_and_other_sets_min_should_match_if_needed(): + q1 = query.Q("term", category=1) + q2 = query.Q( + "bool", should=[query.Q("term", name="aaa"), query.Q("term", name="bbb")] + ) + + q = q1 & q2 + assert q == query.Bool( + must=[q1], + should=[query.Q("term", name="aaa"), query.Q("term", name="bbb")], + minimum_should_match=1, + ) + + +def test_bool_with_different_minimum_should_match_should_not_be_combined(): + q1 = query.Q( + "bool", + minimum_should_match=2, + should=[ + query.Q("term", field="aa1"), + query.Q("term", field="aa2"), + query.Q("term", field="aa3"), + query.Q("term", field="aa4"), + ], + ) + q2 = query.Q( + "bool", + minimum_should_match=3, + should=[ + query.Q("term", field="bb1"), + query.Q("term", field="bb2"), + query.Q("term", field="bb3"), + query.Q("term", field="bb4"), + ], + ) + q3 = query.Q( + "bool", + minimum_should_match=4, + should=[ + query.Q("term", field="cc1"), + query.Q("term", field="cc2"), + query.Q("term", field="cc3"), + query.Q("term", field="cc4"), + ], + ) + + q4 = q1 | q2 + assert q4 == query.Bool(should=[q1, q2]) + + q5 = q1 | q2 | q3 + assert q5 == query.Bool(should=[q1, q2, q3]) + + +def test_empty_bool_has_min_should_match_0(): + assert 0 == query.Bool()._min_should_match + + +def test_query_and_query_creates_bool(): + q1 = query.Match(f=42) + q2 = query.Match(g=47) + + q = q1 & q2 + assert isinstance(q, query.Bool) + assert q.must == [q1, q2] + + +def test_match_all_and_query_equals_other(): + q1 = query.Match(f=42) + q2 = query.MatchAll() + + q = q1 & q2 + assert q1 == q + + +def test_not_match_all_is_match_none(): + q = query.MatchAll() + + assert ~q == query.MatchNone() + + +def test_not_match_none_is_match_all(): + q = query.MatchNone() + + assert ~q == query.MatchAll() + + +def test_invert_empty_bool_is_match_none(): + q = query.Bool() + + assert ~q == query.MatchNone() + + +def test_match_none_or_query_equals_query(): + q1 = query.Match(f=42) + q2 = query.MatchNone() + + assert q1 | q2 == query.Match(f=42) + + +def test_match_none_and_query_equals_match_none(): + q1 = query.Match(f=42) + q2 = query.MatchNone() + + assert q1 & q2 == query.MatchNone() + + +def test_bool_and_bool(): + qt1, qt2, qt3 = query.Match(f=1), query.Match(f=2), query.Match(f=3) + + q1 = query.Bool(must=[qt1], should=[qt2]) + q2 = query.Bool(must_not=[qt3]) + assert q1 & q2 == query.Bool( + must=[qt1], must_not=[qt3], should=[qt2], minimum_should_match=0 + ) + + q1 = query.Bool(must=[qt1], should=[qt1, qt2]) + q2 = query.Bool(should=[qt3]) + assert q1 & q2 == query.Bool( + must=[qt1, qt3], should=[qt1, qt2], minimum_should_match=0 + ) + + +def test_bool_and_bool_with_min_should_match(): + qt1, qt2 = query.Match(f=1), query.Match(f=2) + q1 = query.Q("bool", minimum_should_match=1, should=[qt1]) + q2 = query.Q("bool", minimum_should_match=1, should=[qt2]) + + assert query.Q("bool", must=[qt1, qt2]) == q1 & q2 + + +def test_inverted_query_becomes_bool_with_must_not(): + q = query.Match(f=42) + + assert ~q == query.Bool(must_not=[query.Match(f=42)]) + + +def test_inverted_query_with_must_not_become_should(): + q = query.Q("bool", must_not=[query.Q("match", f=1), query.Q("match", f=2)]) + + assert ~q == query.Q("bool", should=[query.Q("match", f=1), query.Q("match", f=2)]) + + +def test_inverted_query_with_must_and_must_not(): + q = query.Q( + "bool", + must=[query.Q("match", f=3), query.Q("match", f=4)], + must_not=[query.Q("match", f=1), query.Q("match", f=2)], + ) + print((~q).to_dict()) + assert ~q == query.Q( + "bool", + should=[ + # negation of must + query.Q("bool", must_not=[query.Q("match", f=3)]), + query.Q("bool", must_not=[query.Q("match", f=4)]), + # negation of must_not + query.Q("match", f=1), + query.Q("match", f=2), + ], + ) + + +def test_double_invert_returns_original_query(): + q = query.Match(f=42) + + assert q == ~~q + + +def test_bool_query_gets_inverted_internally(): + q = query.Bool(must_not=[query.Match(f=42)], must=[query.Match(g="v")]) + + assert ~q == query.Bool( + should=[ + # negating must + query.Bool(must_not=[query.Match(g="v")]), + # negating must_not + query.Match(f=42), + ] + ) + + +def test_match_all_or_something_is_match_all(): + q1 = query.MatchAll() + q2 = query.Match(f=42) + + assert (q1 | q2) == query.MatchAll() + assert (q2 | q1) == query.MatchAll() + + +def test_or_produces_bool_with_should(): + q1 = query.Match(f=42) + q2 = query.Match(g="v") + + q = q1 | q2 + assert q == query.Bool(should=[q1, q2]) + + +def test_or_bool_doesnt_loop_infinitely_issue_37(): + q = query.Match(f=42) | ~query.Match(f=47) + + assert q == query.Bool( + should=[query.Bool(must_not=[query.Match(f=47)]), query.Match(f=42)] + ) + + +def test_or_bool_doesnt_loop_infinitely_issue_96(): + q = ~query.Match(f=42) | ~query.Match(f=47) + + assert q == query.Bool( + should=[ + query.Bool(must_not=[query.Match(f=42)]), + query.Bool(must_not=[query.Match(f=47)]), + ] + ) + + +def test_bool_will_append_another_query_with_or(): + qb = query.Bool(should=[query.Match(f="v"), query.Match(f="v2")]) + q = query.Match(g=42) + + assert (q | qb) == query.Bool(should=[query.Match(f="v"), query.Match(f="v2"), q]) + + +def test_bool_queries_with_only_should_get_concatenated(): + q1 = query.Bool(should=[query.Match(f=1), query.Match(f=2)]) + q2 = query.Bool(should=[query.Match(f=3), query.Match(f=4)]) + + assert (q1 | q2) == query.Bool( + should=[query.Match(f=1), query.Match(f=2), query.Match(f=3), query.Match(f=4)] + ) + + +def test_two_bool_queries_append_one_to_should_if_possible(): + q1 = query.Bool(should=[query.Match(f="v")]) + q2 = query.Bool(must=[query.Match(f="v")]) + + assert (q1 | q2) == query.Bool( + should=[query.Match(f="v"), query.Bool(must=[query.Match(f="v")])] + ) + assert (q2 | q1) == query.Bool( + should=[query.Match(f="v"), query.Bool(must=[query.Match(f="v")])] + ) + + +def test_queries_are_registered(): + assert "match" in query.Query._classes + assert query.Query._classes["match"] is query.Match + + +def test_defining_query_registers_it(): + class MyQuery(query.Query): + name = "my_query" + + assert "my_query" in query.Query._classes + assert query.Query._classes["my_query"] is MyQuery + + +def test_Q_passes_query_through(): + q = query.Match(f="value1") + + assert query.Q(q) is q + + +def test_Q_constructs_query_by_name(): + q = query.Q("match", f="value") + + assert isinstance(q, query.Match) + assert {"f": "value"} == q._params + + +def test_Q_translates_double_underscore_to_dots_in_param_names(): + q = query.Q("match", comment__author="honza") + + assert {"comment.author": "honza"} == q._params + + +def test_Q_doesn_translate_double_underscore_to_dots_in_param_names(): + q = query.Q("match", comment__author="honza", _expand__to_dot=False) + + assert {"comment__author": "honza"} == q._params + + +def test_Q_constructs_simple_query_from_dict(): + q = query.Q({"match": {"f": "value"}}) + + assert isinstance(q, query.Match) + assert {"f": "value"} == q._params + + +def test_Q_constructs_compound_query_from_dict(): + q = query.Q({"bool": {"must": [{"match": {"f": "value"}}]}}) + + assert q == query.Bool(must=[query.Match(f="value")]) + + +def test_Q_raises_error_when_passed_in_dict_and_params(): + with raises(Exception): + query.Q({"match": {"f": "value"}}, f="value") + + +def test_Q_raises_error_when_passed_in_query_and_params(): + q = query.Match(f="value1") + + with raises(Exception): + query.Q(q, f="value") + + +def test_Q_raises_error_on_unknown_query(): + with raises(Exception): + query.Q("not a query", f="value") + + +def test_match_all_and_anything_is_anything(): + q = query.MatchAll() + + s = query.Match(f=42) + assert q & s == s + assert s & q == s + + +def test_function_score_with_functions(): + q = query.Q( + "function_score", + functions=[query.SF("script_score", script="doc['comment_count'] * _score")], + ) + + assert { + "function_score": { + "functions": [{"script_score": {"script": "doc['comment_count'] * _score"}}] + } + } == q.to_dict() + + +def test_function_score_with_no_function_is_boost_factor(): + q = query.Q( + "function_score", + functions=[query.SF({"weight": 20, "filter": query.Q("term", f=42)})], + ) + + assert { + "function_score": {"functions": [{"filter": {"term": {"f": 42}}, "weight": 20}]} + } == q.to_dict() + + +def test_function_score_to_dict(): + q = query.Q( + "function_score", + query=query.Q("match", title="python"), + functions=[ + query.SF("random_score"), + query.SF( + "field_value_factor", + field="comment_count", + filter=query.Q("term", tags="python"), + ), + ], + ) + + d = { + "function_score": { + "query": {"match": {"title": "python"}}, + "functions": [ + {"random_score": {}}, + { + "filter": {"term": {"tags": "python"}}, + "field_value_factor": {"field": "comment_count"}, + }, + ], + } + } + assert d == q.to_dict() + + +def test_function_score_with_single_function(): + d = { + "function_score": { + "filter": {"term": {"tags": "python"}}, + "script_score": {"script": "doc['comment_count'] * _score"}, + } + } + + q = query.Q(d) + assert isinstance(q, query.FunctionScore) + assert isinstance(q.filter, query.Term) + assert len(q.functions) == 1 + + sf = q.functions[0] + assert isinstance(sf, function.ScriptScore) + assert "doc['comment_count'] * _score" == sf.script + + +def test_function_score_from_dict(): + d = { + "function_score": { + "filter": {"term": {"tags": "python"}}, + "functions": [ + { + "filter": {"terms": {"tags": "python"}}, + "script_score": {"script": "doc['comment_count'] * _score"}, + }, + {"boost_factor": 6}, + ], + } + } + + q = query.Q(d) + assert isinstance(q, query.FunctionScore) + assert isinstance(q.filter, query.Term) + assert len(q.functions) == 2 + + sf = q.functions[0] + assert isinstance(sf, function.ScriptScore) + assert isinstance(sf.filter, query.Terms) + + sf = q.functions[1] + assert isinstance(sf, function.BoostFactor) + assert 6 == sf.value + assert {"boost_factor": 6} == sf.to_dict() + + +def test_script_score(): + d = { + "script_score": { + "query": {"match_all": {}}, + "script": {"source": "...", "params": {}}, + } + } + q = query.Q(d) + + assert isinstance(q, query.ScriptScore) + assert isinstance(q.query, query.MatchAll) + assert q.script == {"source": "...", "params": {}} + assert q.to_dict() == d diff --git a/test_elasticsearch_dsl/test_result.py b/tests/test_result.py similarity index 64% rename from test_elasticsearch_dsl/test_result.py rename to tests/test_result.py index a97ade01b..a1f485edc 100644 --- a/test_elasticsearch_dsl/test_result.py +++ b/tests/test_result.py @@ -1,15 +1,35 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + import pickle from datetime import date -from pytest import raises, fixture -from elasticsearch_dsl import response, Search, Document, Date, Object +from pytest import fixture, raises + +from elasticsearch_dsl import Date, Document, Object, Search, response from elasticsearch_dsl.aggs import Terms -from elasticsearch_dsl.response.aggs import AggResponse, BucketData, Bucket +from elasticsearch_dsl.response.aggs import AggResponse, Bucket, BucketData + @fixture def agg_response(aggs_search, aggs_data): return response.Response(aggs_search, aggs_data) + def test_agg_response_is_pickleable(agg_response): agg_response.hits r = pickle.loads(pickle.dumps(agg_response)) @@ -18,6 +38,7 @@ def test_agg_response_is_pickleable(agg_response): assert r._search == agg_response._search assert r.hits == agg_response.hits + def test_response_is_pickleable(dummy_response): res = response.Response(Search(), dummy_response) res.hits @@ -27,6 +48,7 @@ def test_response_is_pickleable(dummy_response): assert r._search == res._search assert r.hits == res.hits + def test_hit_is_pickleable(dummy_response): res = response.Response(Search(), dummy_response) hits = pickle.loads(pickle.dumps(res.hits)) @@ -34,12 +56,14 @@ def test_hit_is_pickleable(dummy_response): assert hits == res.hits assert hits[0].meta == res.hits[0].meta + def test_response_stores_search(dummy_response): s = Search() r = response.Response(s, dummy_response) assert r._search is s + def test_attribute_error_in_hits_is_not_hidden(dummy_response): def f(hit): raise AttributeError() @@ -49,33 +73,42 @@ def f(hit): with raises(TypeError): r.hits + def test_interactive_helpers(dummy_response): res = response.Response(Search(), dummy_response) hits = res.hits h = hits[0] - rhits = "[, , , ]".format( - repr(dummy_response['hits']['hits'][0]['_source']), - repr(dummy_response['hits']['hits'][1]['_source'])[:60], - repr(dummy_response['hits']['hits'][2]['_source'])[:60], + rhits = ( + "[, , " + ", ]" + ).format( + repr(dummy_response["hits"]["hits"][0]["_source"]), + repr(dummy_response["hits"]["hits"][1]["_source"])[:60], + repr(dummy_response["hits"]["hits"][2]["_source"])[:60], ) assert res - assert '' % rhits == repr(res) + assert "" % rhits == repr(res) assert rhits == repr(hits) - assert {'meta', 'city', 'name'} == set(dir(h)) - assert "" % dummy_response['hits']['hits'][0]['_source'] == repr(h) + assert {"meta", "city", "name"} == set(dir(h)) + assert "" % dummy_response["hits"]["hits"][0][ + "_source" + ] == repr(h) + def test_empty_response_is_false(dummy_response): - dummy_response['hits']['hits'] = [] + dummy_response["hits"]["hits"] = [] res = response.Response(Search(), dummy_response) assert not res + def test_len_response(dummy_response): res = response.Response(Search(), dummy_response) assert len(res) == 4 + def test_iterating_over_response_gives_you_hits(dummy_response): res = response.Response(Search(), dummy_response) hits = list(h for h in res) @@ -86,12 +119,13 @@ def test_iterating_over_response_gives_you_hits(dummy_response): assert all(isinstance(h, response.Hit) for h in hits) h = hits[0] - assert 'test-index' == h.meta.index - assert 'company' == h.meta.doc_type - assert 'elasticsearch' == h.meta.id + assert "test-index" == h.meta.index + assert "company" == h.meta.doc_type + assert "elasticsearch" == h.meta.id assert 12 == h.meta.score - assert hits[1].meta.routing == 'elasticsearch' + assert hits[1].meta.routing == "elasticsearch" + def test_hits_get_wrapped_to_contain_additional_attrs(dummy_response): res = response.Response(Search(), dummy_response) @@ -100,46 +134,53 @@ def test_hits_get_wrapped_to_contain_additional_attrs(dummy_response): assert 123 == hits.total assert 12.0 == hits.max_score + def test_hits_provide_dot_and_bracket_access_to_attrs(dummy_response): res = response.Response(Search(), dummy_response) h = res.hits[0] - assert 'Elasticsearch' == h.name - assert 'Elasticsearch' == h['name'] + assert "Elasticsearch" == h.name + assert "Elasticsearch" == h["name"] - assert 'Honza' == res.hits[2].name.first + assert "Honza" == res.hits[2].name.first with raises(KeyError): - h['not_there'] + h["not_there"] with raises(AttributeError): h.not_there + def test_slicing_on_response_slices_on_hits(dummy_response): res = response.Response(Search(), dummy_response) assert res[0] is res.hits[0] assert res[::-1] == res.hits[::-1] + def test_aggregation_base(agg_response): assert agg_response.aggs is agg_response.aggregations assert isinstance(agg_response.aggs, response.AggResponse) + def test_metric_agg_works(agg_response): assert 25052.0 == agg_response.aggs.sum_lines.value + def test_aggregations_can_be_iterated_over(agg_response): aggs = [a for a in agg_response.aggs] assert len(aggs) == 3 assert all(map(lambda a: isinstance(a, AggResponse), aggs)) + def test_aggregations_can_be_retrieved_by_name(agg_response, aggs_search): - a = agg_response.aggs['popular_files'] + a = agg_response.aggs["popular_files"] assert isinstance(a, BucketData) - assert isinstance(a._meta['aggs'], Terms) - assert a._meta['aggs'] is aggs_search.aggs.aggs['popular_files'] + assert isinstance(a._meta["aggs"], Terms) + assert a._meta["aggs"] is aggs_search.aggs.aggs["popular_files"] + def test_bucket_response_can_be_iterated_over(agg_response): popular_files = agg_response.aggregations.popular_files @@ -148,12 +189,13 @@ def test_bucket_response_can_be_iterated_over(agg_response): assert all(isinstance(b, Bucket) for b in buckets) assert buckets == popular_files.buckets + def test_bucket_keys_get_deserialized(aggs_data, aggs_search): class Commit(Document): - info = Object(properties={'committed_date': Date()}) + info = Object(properties={"committed_date": Date()}) class Index: - name = 'test-commit' + name = "test-commit" aggs_search = aggs_search.doc_type(Commit) agg_response = response.Response(aggs_search, aggs_data) diff --git a/tests/test_search.py b/tests/test_search.py new file mode 100644 index 000000000..cf6f1c86a --- /dev/null +++ b/tests/test_search.py @@ -0,0 +1,586 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from copy import deepcopy + +from pytest import raises + +from elasticsearch_dsl import Document, Q, query, search +from elasticsearch_dsl.connections import CLIENT_HAS_NAMED_BODY_PARAMS +from elasticsearch_dsl.exceptions import IllegalOperation + + +def test_expand__to_dot_is_respected(): + s = search.Search().query("match", a__b=42, _expand__to_dot=False) + + assert {"query": {"match": {"a__b": 42}}} == s.to_dict() + + +def test_execute_uses_cache(): + s = search.Search() + r = object() + s._response = r + + assert r is s.execute() + + +def test_cache_can_be_ignored(mock_client): + s = search.Search(using="mock") + r = object() + s._response = r + s.execute(ignore_cache=True) + + if CLIENT_HAS_NAMED_BODY_PARAMS: + mock_client.search.assert_called_once_with(index=None) + else: + mock_client.search.assert_called_once_with(index=None, body={}) + + +def test_iter_iterates_over_hits(): + s = search.Search() + s._response = [1, 2, 3] + + assert [1, 2, 3] == list(s) + + +def test_cache_isnt_cloned(): + s = search.Search() + s._response = object() + + assert not hasattr(s._clone(), "_response") + + +def test_search_starts_with_no_query(): + s = search.Search() + + assert s.query._proxied is None + + +def test_search_query_combines_query(): + s = search.Search() + + s2 = s.query("match", f=42) + assert s2.query._proxied == query.Match(f=42) + assert s.query._proxied is None + + s3 = s2.query("match", f=43) + assert s2.query._proxied == query.Match(f=42) + assert s3.query._proxied == query.Bool(must=[query.Match(f=42), query.Match(f=43)]) + + +def test_query_can_be_assigned_to(): + s = search.Search() + + q = Q("match", title="python") + s.query = q + + assert s.query._proxied is q + + +def test_query_can_be_wrapped(): + s = search.Search().query("match", title="python") + + s.query = Q("function_score", query=s.query, field_value_factor={"field": "rating"}) + + assert { + "query": { + "function_score": { + "functions": [{"field_value_factor": {"field": "rating"}}], + "query": {"match": {"title": "python"}}, + } + } + } == s.to_dict() + + +def test_using(): + o = object() + o2 = object() + s = search.Search(using=o) + assert s._using is o + s2 = s.using(o2) + assert s._using is o + assert s2._using is o2 + + +def test_methods_are_proxied_to_the_query(): + s = search.Search().query("match_all") + + assert s.query.to_dict() == {"match_all": {}} + + +def test_query_always_returns_search(): + s = search.Search() + + assert isinstance(s.query("match", f=42), search.Search) + + +def test_source_copied_on_clone(): + s = search.Search().source(False) + assert s._clone()._source == s._source + assert s._clone()._source is False + + s2 = search.Search().source([]) + assert s2._clone()._source == s2._source + assert s2._source == [] + + s3 = search.Search().source(["some", "fields"]) + assert s3._clone()._source == s3._source + assert s3._clone()._source == ["some", "fields"] + + +def test_copy_clones(): + from copy import copy + + s1 = search.Search().source(["some", "fields"]) + s2 = copy(s1) + + assert s1 == s2 + assert s1 is not s2 + + +def test_aggs_allow_two_metric(): + s = search.Search() + + s.aggs.metric("a", "max", field="a").metric("b", "max", field="b") + + assert s.to_dict() == { + "aggs": {"a": {"max": {"field": "a"}}, "b": {"max": {"field": "b"}}} + } + + +def test_aggs_get_copied_on_change(): + s = search.Search().query("match_all") + s.aggs.bucket("per_tag", "terms", field="f").metric( + "max_score", "max", field="score" + ) + + s2 = s.query("match_all") + s2.aggs.bucket("per_month", "date_histogram", field="date", interval="month") + s3 = s2.query("match_all") + s3.aggs["per_month"].metric("max_score", "max", field="score") + s4 = s3._clone() + s4.aggs.metric("max_score", "max", field="score") + + d = { + "query": {"match_all": {}}, + "aggs": { + "per_tag": { + "terms": {"field": "f"}, + "aggs": {"max_score": {"max": {"field": "score"}}}, + } + }, + } + + assert d == s.to_dict() + d["aggs"]["per_month"] = {"date_histogram": {"field": "date", "interval": "month"}} + assert d == s2.to_dict() + d["aggs"]["per_month"]["aggs"] = {"max_score": {"max": {"field": "score"}}} + assert d == s3.to_dict() + d["aggs"]["max_score"] = {"max": {"field": "score"}} + assert d == s4.to_dict() + + +def test_search_index(): + s = search.Search(index="i") + assert s._index == ["i"] + s = s.index("i2") + assert s._index == ["i", "i2"] + s = s.index(u"i3") + assert s._index == ["i", "i2", "i3"] + s = s.index() + assert s._index is None + s = search.Search(index=("i", "i2")) + assert s._index == ["i", "i2"] + s = search.Search(index=["i", "i2"]) + assert s._index == ["i", "i2"] + s = search.Search() + s = s.index("i", "i2") + assert s._index == ["i", "i2"] + s2 = s.index("i3") + assert s._index == ["i", "i2"] + assert s2._index == ["i", "i2", "i3"] + s = search.Search() + s = s.index(["i", "i2"], "i3") + assert s._index == ["i", "i2", "i3"] + s2 = s.index("i4") + assert s._index == ["i", "i2", "i3"] + assert s2._index == ["i", "i2", "i3", "i4"] + s2 = s.index(["i4"]) + assert s2._index == ["i", "i2", "i3", "i4"] + s2 = s.index(("i4", "i5")) + assert s2._index == ["i", "i2", "i3", "i4", "i5"] + + +def test_doc_type_document_class(): + class MyDocument(Document): + pass + + s = search.Search(doc_type=MyDocument) + assert s._doc_type == [MyDocument] + assert s._doc_type_map == {} + + s = search.Search().doc_type(MyDocument) + assert s._doc_type == [MyDocument] + assert s._doc_type_map == {} + + +def test_sort(): + s = search.Search() + s = s.sort("fielda", "-fieldb") + + assert ["fielda", {"fieldb": {"order": "desc"}}] == s._sort + assert {"sort": ["fielda", {"fieldb": {"order": "desc"}}]} == s.to_dict() + + s = s.sort() + assert [] == s._sort + assert search.Search().to_dict() == s.to_dict() + + +def test_sort_by_score(): + s = search.Search() + s = s.sort("_score") + assert {"sort": ["_score"]} == s.to_dict() + + s = search.Search() + with raises(IllegalOperation): + s.sort("-_score") + + +def test_slice(): + s = search.Search() + assert {"from": 3, "size": 7} == s[3:10].to_dict() + assert {"from": 0, "size": 5} == s[:5].to_dict() + assert {"from": 3, "size": 10} == s[3:].to_dict() + assert {"from": 0, "size": 0} == s[0:0].to_dict() + assert {"from": 20, "size": 0} == s[20:0].to_dict() + + +def test_index(): + s = search.Search() + assert {"from": 3, "size": 1} == s[3].to_dict() + + +def test_search_to_dict(): + s = search.Search() + assert {} == s.to_dict() + + s = s.query("match", f=42) + assert {"query": {"match": {"f": 42}}} == s.to_dict() + + assert {"query": {"match": {"f": 42}}, "size": 10} == s.to_dict(size=10) + + s.aggs.bucket("per_tag", "terms", field="f").metric( + "max_score", "max", field="score" + ) + d = { + "aggs": { + "per_tag": { + "terms": {"field": "f"}, + "aggs": {"max_score": {"max": {"field": "score"}}}, + } + }, + "query": {"match": {"f": 42}}, + } + assert d == s.to_dict() + + s = search.Search(extra={"size": 5}) + assert {"size": 5} == s.to_dict() + s = s.extra(from_=42) + assert {"size": 5, "from": 42} == s.to_dict() + + +def test_complex_example(): + s = search.Search() + s = ( + s.query("match", title="python") + .query(~Q("match", title="ruby")) + .filter(Q("term", category="meetup") | Q("term", category="conference")) + .post_filter("terms", tags=["prague", "czech"]) + .script_fields(more_attendees="doc['attendees'].value + 42") + ) + + s.aggs.bucket("per_country", "terms", field="country").metric( + "avg_attendees", "avg", field="attendees" + ) + + s.query.minimum_should_match = 2 + + s = s.highlight_options(order="score").highlight("title", "body", fragment_size=50) + + assert { + "query": { + "bool": { + "filter": [ + { + "bool": { + "should": [ + {"term": {"category": "meetup"}}, + {"term": {"category": "conference"}}, + ] + } + } + ], + "must": [{"match": {"title": "python"}}], + "must_not": [{"match": {"title": "ruby"}}], + "minimum_should_match": 2, + } + }, + "post_filter": {"terms": {"tags": ["prague", "czech"]}}, + "aggs": { + "per_country": { + "terms": {"field": "country"}, + "aggs": {"avg_attendees": {"avg": {"field": "attendees"}}}, + } + }, + "highlight": { + "order": "score", + "fields": {"title": {"fragment_size": 50}, "body": {"fragment_size": 50}}, + }, + "script_fields": {"more_attendees": {"script": "doc['attendees'].value + 42"}}, + } == s.to_dict() + + +def test_reverse(): + d = { + "query": { + "filtered": { + "filter": { + "bool": { + "should": [ + {"term": {"category": "meetup"}}, + {"term": {"category": "conference"}}, + ] + } + }, + "query": { + "bool": { + "must": [{"match": {"title": "python"}}], + "must_not": [{"match": {"title": "ruby"}}], + "minimum_should_match": 2, + } + }, + } + }, + "post_filter": {"bool": {"must": [{"terms": {"tags": ["prague", "czech"]}}]}}, + "aggs": { + "per_country": { + "terms": {"field": "country"}, + "aggs": {"avg_attendees": {"avg": {"field": "attendees"}}}, + } + }, + "sort": ["title", {"category": {"order": "desc"}}, "_score"], + "size": 5, + "highlight": {"order": "score", "fields": {"title": {"fragment_size": 50}}}, + "suggest": { + "my-title-suggestions-1": { + "text": "devloping distibutd saerch engies", + "term": {"size": 3, "field": "title"}, + } + }, + "script_fields": {"more_attendees": {"script": "doc['attendees'].value + 42"}}, + } + + d2 = deepcopy(d) + + s = search.Search.from_dict(d) + + # make sure we haven't modified anything in place + assert d == d2 + assert {"size": 5} == s._extra + assert d == s.to_dict() + + +def test_from_dict_doesnt_need_query(): + s = search.Search.from_dict({"size": 5}) + + assert {"size": 5} == s.to_dict() + + +def test_params_being_passed_to_search(mock_client): + s = search.Search(using="mock") + s = s.params(routing="42") + s.execute() + + if CLIENT_HAS_NAMED_BODY_PARAMS: + mock_client.search.assert_called_once_with(index=None, routing="42") + else: + mock_client.search.assert_called_once_with(index=None, body={}, routing="42") + + +def test_source(): + assert {} == search.Search().source().to_dict() + + assert { + "_source": {"includes": ["foo.bar.*"], "excludes": ["foo.one"]} + } == search.Search().source(includes=["foo.bar.*"], excludes=["foo.one"]).to_dict() + + assert {"_source": False} == search.Search().source(False).to_dict() + + assert {"_source": ["f1", "f2"]} == search.Search().source( + includes=["foo.bar.*"], excludes=["foo.one"] + ).source(["f1", "f2"]).to_dict() + + +def test_source_on_clone(): + assert { + "_source": {"includes": ["foo.bar.*"], "excludes": ["foo.one"]}, + "query": {"bool": {"filter": [{"term": {"title": "python"}}]}}, + } == search.Search().source(includes=["foo.bar.*"]).source( + excludes=["foo.one"] + ).filter( + "term", title="python" + ).to_dict() + assert { + "_source": False, + "query": {"bool": {"filter": [{"term": {"title": "python"}}]}}, + } == search.Search().source(False).filter("term", title="python").to_dict() + + +def test_source_on_clear(): + assert ( + {} + == search.Search() + .source(includes=["foo.bar.*"]) + .source(includes=None, excludes=None) + .to_dict() + ) + + +def test_suggest_accepts_global_text(): + s = search.Search.from_dict( + { + "suggest": { + "text": "the amsterdma meetpu", + "my-suggest-1": {"term": {"field": "title"}}, + "my-suggest-2": {"text": "other", "term": {"field": "body"}}, + } + } + ) + + assert { + "suggest": { + "my-suggest-1": { + "term": {"field": "title"}, + "text": "the amsterdma meetpu", + }, + "my-suggest-2": {"term": {"field": "body"}, "text": "other"}, + } + } == s.to_dict() + + +def test_suggest(): + s = search.Search() + s = s.suggest("my_suggestion", "pyhton", term={"field": "title"}) + + assert { + "suggest": {"my_suggestion": {"term": {"field": "title"}, "text": "pyhton"}} + } == s.to_dict() + + +def test_exclude(): + s = search.Search() + s = s.exclude("match", title="python") + + assert { + "query": { + "bool": { + "filter": [{"bool": {"must_not": [{"match": {"title": "python"}}]}}] + } + } + } == s.to_dict() + + +def test_delete_by_query(mock_client): + s = search.Search(using="mock").query("match", lang="java") + s.delete() + + mock_client.delete_by_query.assert_called_once_with( + index=None, body={"query": {"match": {"lang": "java"}}} + ) + + +def test_update_from_dict(): + s = search.Search() + s.update_from_dict({"indices_boost": [{"important-documents": 2}]}) + s.update_from_dict({"_source": ["id", "name"]}) + + assert { + "indices_boost": [{"important-documents": 2}], + "_source": ["id", "name"], + } == s.to_dict() + + +def test_rescore_query_to_dict(): + s = search.Search(index="index-name") + + positive_query = Q( + "function_score", + query=Q("term", tags="a"), + script_score={"script": "_score * 1"}, + ) + + negative_query = Q( + "function_score", + query=Q("term", tags="b"), + script_score={"script": "_score * -100"}, + ) + + s = s.query(positive_query) + s = s.extra( + rescore={"window_size": 100, "query": {"rescore_query": negative_query}} + ) + assert s.to_dict() == { + "query": { + "function_score": { + "query": {"term": {"tags": "a"}}, + "functions": [{"script_score": {"script": "_score * 1"}}], + } + }, + "rescore": { + "window_size": 100, + "query": { + "rescore_query": { + "function_score": { + "query": {"term": {"tags": "b"}}, + "functions": [{"script_score": {"script": "_score * -100"}}], + } + } + }, + }, + } + + assert s.to_dict( + rescore={"window_size": 10, "query": {"rescore_query": positive_query}} + ) == { + "query": { + "function_score": { + "query": {"term": {"tags": "a"}}, + "functions": [{"script_score": {"script": "_score * 1"}}], + } + }, + "rescore": { + "window_size": 10, + "query": { + "rescore_query": { + "function_score": { + "query": {"term": {"tags": "a"}}, + "functions": [{"script_score": {"script": "_score * 1"}}], + } + } + }, + }, + } diff --git a/tests/test_update_by_query.py b/tests/test_update_by_query.py new file mode 100644 index 000000000..c10fde878 --- /dev/null +++ b/tests/test_update_by_query.py @@ -0,0 +1,173 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from copy import deepcopy + +from elasticsearch_dsl import Q, UpdateByQuery +from elasticsearch_dsl.response import UpdateByQueryResponse + + +def test_ubq_starts_with_no_query(): + ubq = UpdateByQuery() + + assert ubq.query._proxied is None + + +def test_ubq_to_dict(): + ubq = UpdateByQuery() + assert {} == ubq.to_dict() + + ubq = ubq.query("match", f=42) + assert {"query": {"match": {"f": 42}}} == ubq.to_dict() + + assert {"query": {"match": {"f": 42}}, "size": 10} == ubq.to_dict(size=10) + + ubq = UpdateByQuery(extra={"size": 5}) + assert {"size": 5} == ubq.to_dict() + + ubq = UpdateByQuery(extra={"extra_q": Q("term", category="conference")}) + assert {"extra_q": {"term": {"category": "conference"}}} == ubq.to_dict() + + +def test_complex_example(): + ubq = UpdateByQuery() + ubq = ( + ubq.query("match", title="python") + .query(~Q("match", title="ruby")) + .filter(Q("term", category="meetup") | Q("term", category="conference")) + .script( + source="ctx._source.likes += params.f", lang="painless", params={"f": 3} + ) + ) + + ubq.query.minimum_should_match = 2 + assert { + "query": { + "bool": { + "filter": [ + { + "bool": { + "should": [ + {"term": {"category": "meetup"}}, + {"term": {"category": "conference"}}, + ] + } + } + ], + "must": [{"match": {"title": "python"}}], + "must_not": [{"match": {"title": "ruby"}}], + "minimum_should_match": 2, + } + }, + "script": { + "source": "ctx._source.likes += params.f", + "lang": "painless", + "params": {"f": 3}, + }, + } == ubq.to_dict() + + +def test_exclude(): + ubq = UpdateByQuery() + ubq = ubq.exclude("match", title="python") + + assert { + "query": { + "bool": { + "filter": [{"bool": {"must_not": [{"match": {"title": "python"}}]}}] + } + } + } == ubq.to_dict() + + +def test_reverse(): + d = { + "query": { + "filtered": { + "filter": { + "bool": { + "should": [ + {"term": {"category": "meetup"}}, + {"term": {"category": "conference"}}, + ] + } + }, + "query": { + "bool": { + "must": [{"match": {"title": "python"}}], + "must_not": [{"match": {"title": "ruby"}}], + "minimum_should_match": 2, + } + }, + } + }, + "script": { + "source": "ctx._source.likes += params.f", + "lang": "painless", + "params": {"f": 3}, + }, + } + + d2 = deepcopy(d) + + ubq = UpdateByQuery.from_dict(d) + + assert d == d2 + assert d == ubq.to_dict() + + +def test_from_dict_doesnt_need_query(): + ubq = UpdateByQuery.from_dict({"script": {"source": "test"}}) + + assert {"script": {"source": "test"}} == ubq.to_dict() + + +def test_params_being_passed_to_search(mock_client): + ubq = UpdateByQuery(using="mock") + ubq = ubq.params(routing="42") + ubq.execute() + + mock_client.update_by_query.assert_called_once_with( + index=None, body={}, routing="42" + ) + + +def test_overwrite_script(): + ubq = UpdateByQuery() + ubq = ubq.script( + source="ctx._source.likes += params.f", lang="painless", params={"f": 3} + ) + assert { + "script": { + "source": "ctx._source.likes += params.f", + "lang": "painless", + "params": {"f": 3}, + } + } == ubq.to_dict() + ubq = ubq.script(source="ctx._source.likes++") + assert {"script": {"source": "ctx._source.likes++"}} == ubq.to_dict() + + +def test_update_by_query_response_success(): + ubqr = UpdateByQueryResponse({}, {"timed_out": False, "failures": []}) + assert ubqr.success() + + ubqr = UpdateByQueryResponse({}, {"timed_out": True, "failures": []}) + assert not ubqr.success() + + ubqr = UpdateByQueryResponse({}, {"timed_out": False, "failures": [{}]}) + assert not ubqr.success() diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 000000000..38caad45a --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,102 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pickle + +from pytest import raises + +from elasticsearch_dsl import Q, serializer, utils + + +def test_attrdict_pickle(): + ad = utils.AttrDict({}) + + pickled_ad = pickle.dumps(ad) + assert ad == pickle.loads(pickled_ad) + + +def test_attrlist_pickle(): + al = utils.AttrList([]) + + pickled_al = pickle.dumps(al) + assert al == pickle.loads(pickled_al) + + +def test_attrlist_slice(): + class MyAttrDict(utils.AttrDict): + pass + + l = utils.AttrList([{}, {}], obj_wrapper=MyAttrDict) + assert isinstance(l[:][0], MyAttrDict) + + +def test_merge(): + a = utils.AttrDict({"a": {"b": 42, "c": 47}}) + b = {"a": {"b": 123, "d": -12}, "e": [1, 2, 3]} + + utils.merge(a, b) + + assert a == {"a": {"b": 123, "c": 47, "d": -12}, "e": [1, 2, 3]} + + +def test_merge_conflict(): + for d in ( + {"a": 42}, + {"a": {"b": 47}}, + ): + utils.merge({"a": {"b": 42}}, d) + with raises(ValueError): + utils.merge({"a": {"b": 42}}, d, True) + + +def test_attrdict_bool(): + d = utils.AttrDict({}) + + assert not d + d.title = "Title" + assert d + + +def test_attrlist_items_get_wrapped_during_iteration(): + al = utils.AttrList([1, object(), [1], {}]) + + l = list(iter(al)) + + assert isinstance(l[2], utils.AttrList) + assert isinstance(l[3], utils.AttrDict) + + +def test_serializer_deals_with_Attr_versions(): + d = utils.AttrDict({"key": utils.AttrList([1, 2, 3])}) + + assert serializer.serializer.dumps(d) == serializer.serializer.dumps( + {"key": [1, 2, 3]} + ) + + +def test_serializer_deals_with_objects_with_to_dict(): + class MyClass(object): + def to_dict(self): + return 42 + + assert serializer.serializer.dumps(MyClass()) == "42" + + +def test_recursive_to_dict(): + assert utils.recursive_to_dict({"k": [1, (1.0, {"v": Q("match", key="val")})]}) == { + "k": [1, (1.0, {"v": {"match": {"key": "val"}}})] + } diff --git a/test_elasticsearch_dsl/test_validation.py b/tests/test_validation.py similarity index 62% rename from test_elasticsearch_dsl/test_validation.py rename to tests/test_validation.py index 2d69739cc..ff20b326c 100644 --- a/test_elasticsearch_dsl/test_validation.py +++ b/tests/test_validation.py @@ -1,9 +1,36 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from datetime import datetime -from elasticsearch_dsl import Document, Nested, Text, Date, Object, Boolean, Integer, InnerDoc +from pytest import raises + +from elasticsearch_dsl import ( + Boolean, + Date, + Document, + InnerDoc, + Integer, + Nested, + Object, + Text, +) from elasticsearch_dsl.exceptions import ValidationException -from pytest import raises class Author(InnerDoc): name = Text(required=True) @@ -12,7 +39,8 @@ class Author(InnerDoc): def clean(self): print(self, type(self), self.name) if self.name.lower() not in self.email: - raise ValidationException('Invalid email!') + raise ValidationException("Invalid email!") + class BlogPost(Document): authors = Nested(Author, required=True) @@ -30,10 +58,12 @@ def clean(self, data): data = datetime.now() return super(AutoNowDate, self).clean(data) + class Log(Document): timestamp = AutoNowDate(required=True) data = Text() + def test_required_int_can_be_0(): class DT(Document): i = Integer(required=True) @@ -41,6 +71,7 @@ class DT(Document): dt = DT(i=0) assert dt.full_clean() is None + def test_required_field_cannot_be_empty_list(): class DT(Document): i = Integer(required=True) @@ -49,11 +80,12 @@ class DT(Document): with raises(ValidationException): dt.full_clean() + def test_validation_works_for_lists_of_values(): class DT(Document): i = Date(required=True) - dt = DT(i=[datetime.now(), 'not date']) + dt = DT(i=[datetime.now(), "not date"]) with raises(ValidationException): dt.full_clean() @@ -67,26 +99,29 @@ def test_field_with_custom_clean(): assert isinstance(l.timestamp, datetime) + def test_empty_object(): - d = BlogPost(authors=[{'name': 'Honza', 'email': 'honza@elastic.co'}]) + d = BlogPost(authors=[{"name": "Honza", "email": "honza@elastic.co"}]) d.inner = {} d.full_clean() + def test_missing_required_field_raises_validation_exception(): d = BlogPost() with raises(ValidationException): d.full_clean() d = BlogPost() - d.authors.append({'name': 'Honza'}) + d.authors.append({"name": "Honza"}) with raises(ValidationException): d.full_clean() d = BlogPost() - d.authors.append({'name': 'Honza', 'email': 'honza@elastic.co'}) + d.authors.append({"name": "Honza", "email": "honza@elastic.co"}) d.full_clean() + def test_boolean_doesnt_treat_false_as_empty(): d = BlogPostWithStatus() with raises(ValidationException): @@ -98,13 +133,14 @@ def test_boolean_doesnt_treat_false_as_empty(): def test_custom_validation_on_nested_gets_run(): - d = BlogPost(authors=[Author(name='Honza', email='king@example.com')], created=None) + d = BlogPost(authors=[Author(name="Honza", email="king@example.com")], created=None) assert isinstance(d.authors[0], Author) with raises(ValidationException): d.full_clean() + def test_accessing_known_fields_returns_empty_value(): d = BlogPost() @@ -114,10 +150,9 @@ def test_accessing_known_fields_returns_empty_value(): assert None is d.authors[0].name assert None is d.authors[0].email + def test_empty_values_are_not_serialized(): - d = BlogPost(authors=[{'name': 'Honza', 'email': 'honza@elastic.co'}], created=None) + d = BlogPost(authors=[{"name": "Honza", "email": "honza@elastic.co"}], created=None) d.full_clean() - assert d.to_dict() == { - 'authors': [{'name': 'Honza', 'email': 'honza@elastic.co'}] - } + assert d.to_dict() == {"authors": [{"name": "Honza", "email": "honza@elastic.co"}]} diff --git a/tests/test_wrappers.py b/tests/test_wrappers.py new file mode 100644 index 000000000..454722711 --- /dev/null +++ b/tests/test_wrappers.py @@ -0,0 +1,93 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datetime import datetime, timedelta + +import pytest + +from elasticsearch_dsl import Range + + +@pytest.mark.parametrize( + "kwargs, item", + [ + ({}, 1), + ({}, -1), + ({"gte": -1}, -1), + ({"lte": 4}, 4), + ({"lte": 4, "gte": 2}, 4), + ({"lte": 4, "gte": 2}, 2), + ({"gt": datetime.now() - timedelta(seconds=10)}, datetime.now()), + ], +) +def test_range_contains(kwargs, item): + assert item in Range(**kwargs) + + +@pytest.mark.parametrize( + "kwargs, item", + [ + ({"gt": -1}, -1), + ({"lt": 4}, 4), + ({"lt": 4}, 42), + ({"lte": 4, "gte": 2}, 1), + ({"lte": datetime.now() - timedelta(seconds=10)}, datetime.now()), + ], +) +def test_range_not_contains(kwargs, item): + assert item not in Range(**kwargs) + + +@pytest.mark.parametrize( + "args,kwargs", + [ + (({},), {"lt": 42}), + ((), {"not_lt": 42}), + ((object(),), {}), + ((), {"lt": 1, "lte": 1}), + ((), {"gt": 1, "gte": 1}), + ], +) +def test_range_raises_value_error_on_wrong_params(args, kwargs): + with pytest.raises(ValueError): + Range(*args, **kwargs) + + +@pytest.mark.parametrize( + "range,lower,inclusive", + [ + (Range(gt=1), 1, False), + (Range(gte=1), 1, True), + (Range(), None, False), + (Range(lt=42), None, False), + ], +) +def test_range_lower(range, lower, inclusive): + assert (lower, inclusive) == range.lower + + +@pytest.mark.parametrize( + "range,upper,inclusive", + [ + (Range(lt=1), 1, False), + (Range(lte=1), 1, True), + (Range(), None, False), + (Range(gt=42), None, False), + ], +) +def test_range_upper(range, upper, inclusive): + assert (upper, inclusive) == range.upper diff --git a/utils/build-dists.py b/utils/build-dists.py new file mode 100644 index 000000000..6189f7c91 --- /dev/null +++ b/utils/build-dists.py @@ -0,0 +1,123 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""A command line tool for building and verifying releases +Can be used for building both 'elasticsearch' and 'elasticsearchX' dists. +Only requires 'name' in 'setup.py' and the directory to be changed. +""" + +import contextlib +import os +import re +import shlex +import shutil +import tempfile + +base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +tmp_dir = None + + +@contextlib.contextmanager +def set_tmp_dir(): + global tmp_dir + tmp_dir = tempfile.mkdtemp() + yield tmp_dir + shutil.rmtree(tmp_dir) + tmp_dir = None + + +def run(*argv, expect_exit_code=0): + global tmp_dir + if tmp_dir is None: + os.chdir(base_dir) + else: + os.chdir(tmp_dir) + + cmd = " ".join(shlex.quote(x) for x in argv) + print("$ " + cmd) + exit_code = os.system(cmd) + if exit_code != expect_exit_code: + print( + "Command exited incorrectly: should have been %d was %d" + % (expect_exit_code, exit_code) + ) + exit(exit_code or 1) + + +def test_dist(dist): + with set_tmp_dir() as tmp_dir: + dist_name = ( + re.match(r"^(elasticsearch\d*[_-]dsl)-", os.path.basename(dist)) + .group(1) + .replace("-", "_") + ) + + # Build the venv and install the dist + run("python", "-m", "venv", os.path.join(tmp_dir, "venv")) + venv_python = os.path.join(tmp_dir, "venv/bin/python") + run(venv_python, "-m", "pip", "install", "-U", "pip") + run(venv_python, "-m", "pip", "install", dist) + + # Test the sync namespaces + run(venv_python, "-c", f"from {dist_name} import Q") + + # Ensure that the namespaces are correct for the dist + for suffix in ("", "1", "2", "5", "6", "7", "8", "9", "10"): + distx_name = f"elasticsearch{suffix}_dsl" + run( + venv_python, + "-c", + f"import {distx_name}", + expect_exit_code=256 if distx_name != dist_name else 0, + ) + # Tests the dependencies of the dist + run( + venv_python, + "-c", + f"import elasticsearch{suffix}", + expect_exit_code=256 if distx_name != dist_name else 0, + ) + + # Uninstall the dist, see that we can't import things anymore + run(venv_python, "-m", "pip", "uninstall", "--yes", dist_name) + run( + venv_python, + "-c", + f"from {dist_name} import Q", + expect_exit_code=256, + ) + + +def main(): + run("rm", "-rf", "build/", "dist/", "*.egg-info", ".eggs") + run("python", "setup.py", "sdist", "bdist_wheel") + + for dist in os.listdir(os.path.join(base_dir, "dist")): + test_dist(os.path.join(base_dir, "dist", dist)) + + # After this run 'python -m twine upload dist/*' + print( + "\n\n" + "===============================\n\n" + " * Releases are ready! *\n\n" + "$ python -m twine upload dist/*\n\n" + "===============================" + ) + + +if __name__ == "__main__": + main() diff --git a/utils/license-headers.py b/utils/license-headers.py new file mode 100644 index 000000000..e28622dad --- /dev/null +++ b/utils/license-headers.py @@ -0,0 +1,122 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Script which verifies that all source files have a license header. +Has two modes: 'fix' and 'check'. 'fix' fixes problems, 'check' will +error out if 'fix' would have changed the file. +""" + +import os +import sys +from itertools import chain +from typing import Iterator, List + +lines_to_keep = ["# -*- coding: utf-8 -*-\n", "#!/usr/bin/env python\n"] +license_header_lines = [ + "# Licensed to Elasticsearch B.V. under one or more contributor\n", + "# license agreements. See the NOTICE file distributed with\n", + "# this work for additional information regarding copyright\n", + "# ownership. Elasticsearch B.V. licenses this file to you under\n", + '# the Apache License, Version 2.0 (the "License"); you may\n', + "# not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing,\n", + "# software distributed under the License is distributed on an\n", + '# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n', + "# KIND, either express or implied. See the License for the\n", + "# specific language governing permissions and limitations\n", + "# under the License.\n", + "\n", +] + + +def find_files_to_fix(sources: List[str]) -> Iterator[str]: + """Iterates over all files and dirs in 'sources' and returns + only the filepaths that need fixing. + """ + for source in sources: + if os.path.isfile(source) and does_file_need_fix(source): + yield source + elif os.path.isdir(source): + for root, _, filenames in os.walk(source): + for filename in filenames: + filepath = os.path.join(root, filename) + if does_file_need_fix(filepath): + yield filepath + + +def does_file_need_fix(filepath: str) -> bool: + if not filepath.endswith(".py"): + return False + with open(filepath, mode="r") as f: + first_license_line = None + for line in f: + if line == license_header_lines[0]: + first_license_line = line + break + elif line not in lines_to_keep: + return True + for header_line, line in zip( + license_header_lines, chain((first_license_line,), f) + ): + if line != header_line: + return True + return False + + +def add_header_to_file(filepath: str) -> None: + with open(filepath, mode="r") as f: + lines = list(f) + i = 0 + for i, line in enumerate(lines): + if line not in lines_to_keep: + break + lines = lines[:i] + license_header_lines + lines[i:] + with open(filepath, mode="w") as f: + f.truncate() + f.write("".join(lines)) + print(f"Fixed {os.path.relpath(filepath, os.getcwd())}") + + +def main(): + mode = sys.argv[1] + assert mode in ("fix", "check") + sources = [os.path.abspath(x) for x in sys.argv[2:]] + files_to_fix = find_files_to_fix(sources) + + if mode == "fix": + for filepath in files_to_fix: + add_header_to_file(filepath) + else: + no_license_headers = list(files_to_fix) + if no_license_headers: + print("No license header found in:") + cwd = os.getcwd() + [ + print(f" - {os.path.relpath(filepath, cwd)}") + for filepath in no_license_headers + ] + sys.exit(1) + else: + print("All files had license header") + + +if __name__ == "__main__": + main()