From 7433d44f3b208da53190d2d0b3ca9f0207f2253f Mon Sep 17 00:00:00 2001 From: medcl Date: Tue, 17 Apr 2018 13:41:49 +0800 Subject: [PATCH] add upstream docs as reference --- LICENSE | 21 --- breaking-changes.asciidoc | 3 +- community.asciidoc | 11 +- configuration.asciidoc | 267 +++++++++++++++++------------ connection-pool.asciidoc | 136 +++++++++------ crud.asciidoc | 133 ++++++++------ futures.asciidoc | 110 +++++++----- index-operations.asciidoc | 112 ++++++------ installation.asciidoc | 71 ++++---- namespaces.asciidoc | 58 ++++--- overview.asciidoc | 10 +- per-request-configuration.asciidoc | 121 ++++++++----- php-version-requirement.asciidoc | 6 +- php_json_objects.asciidoc | 118 +++++++------ quickstart.asciidoc | 192 ++++++++++----------- search-operations.asciidoc | 125 ++++++++------ security.asciidoc | 86 ++++++---- selectors.asciidoc | 72 ++++---- serializers.asciidoc | 123 +++++++------ 19 files changed, 1010 insertions(+), 765 deletions(-) delete mode 100644 LICENSE diff --git a/LICENSE b/LICENSE deleted file mode 100644 index a81bb31..0000000 --- a/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2018 Lawrence - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/breaking-changes.asciidoc b/breaking-changes.asciidoc index 2460785..23ee4a2 100644 --- a/breaking-changes.asciidoc +++ b/breaking-changes.asciidoc @@ -1,4 +1,3 @@ -[[_breaking_changes_from_5_x]] == Breaking changes from 5.x -没有!:) \ No newline at end of file +None! :) \ No newline at end of file diff --git a/community.asciidoc b/community.asciidoc index 0d2488d..9f286c7 100644 --- a/community.asciidoc +++ b/community.asciidoc @@ -1,13 +1,12 @@ -[[_community_dsls]] -== 社区 DSLs + +== Community DSLs === ElasticsearchDSL https://github.com/ongr-io/ElasticsearchDSL[Link: ElasticsearchDSL] - [quote, ElasticsearchDSL] __________________________ -引入 Elasticsearch DSL 类库,目的是为 Elasticsearch bundle 和 Elasticsearch PHP 客户端提供对象查询语句构造器。你可以轻松地构造任何 Elasticsearch 查询语句且转变语句为数组。 +Introducing Elasticsearch DSL library to provide objective query builder for Elasticsearch bundle and elasticsearch-php client. You can easily build any Elasticsearch query and transform it to an array. __________________________ === elasticsearcher @@ -16,7 +15,7 @@ https://github.com/madewithlove/elasticsearcher[Link: elasticsearcher] [quote, elasticsearcher] __________________________ -这个独立的包是位于 Elasticsearch PHP 客户端之上的一个轻量级的封装。它主要的目的是让应用中的查询语句和索引的构造更为简易。它并非要隐藏或替代 Elasticsearch PHP 客户端的功能。 +This agnostic package is a lightweight wrapper on top of the Elasticsearch PHP client. Its main goal is to allow for easier structuring of queries and indices in your application. It does not want to hide or replace functionality of the Elasticsearch PHP client. __________________________ == Community Integrations @@ -113,4 +112,4 @@ _____________________ This helper is a light library which wrap the official client elasticsearch-php. It will help you to manage your ES Indices with no downtime. This helper implements the philosophy described in the https://www.elastic.co/guide/en/elasticsearch/guide/master/index-aliases.html[official documentation] which can be summarized in a few words : *use alias instead of index directly* -_____________________ +_____________________ \ No newline at end of file diff --git a/configuration.asciidoc b/configuration.asciidoc index 3d01977..19845a2 100644 --- a/configuration.asciidoc +++ b/configuration.asciidoc @@ -1,18 +1,23 @@ -[[_configuration]] -== 配置 -几乎所有应用(译者注:如 mysql、redis 等)的客户端都可以配置。大多数用户只需配置一些参数来满足他们的需求,但是也有可能需要修改大量的内核代码来满足需求。 +== Configuration -在客户端对象实例化前就应该通过 ClientBuilder 对象来完成自定义配置。我们会概述一下所有的配置参数,并且展示一些代码示例。 +Almost every aspect of the client is configurable. Most users will only need to configure a few parameters to suit +their needs, but it is possible to completely replace much of the internals if required. -=== Inline Host 配置法 +Custom configuration is accomplished before the client is instantiated, through the ClientBuilder helper object. +We'll walk through all the configuration options and show sample code to replace the various components. -最常见的配置是告诉客户端有关集群的信息:有多少个节点,节点的ip地址和端口号。如果没有指定主机名,客户端会连接 `localhost:9200` 。 +=== Inline Host Configuration -利用 `ClientBuilder` 的 `setHosts()` 方法可以改变客户端的默认连接方式。 `setHosts()` 方法接收一个一维数组,数组里面每个值都代表集群里面的一个节点信息。值的格式多种多样,主要看你的需求: +The most common configuration is telling the client about your cluster: how many nodes, their addresses and ports. If +no hosts are specified, the client will attempt to connect to `localhost:9200`. + +This behavior can be changed by using the `setHosts()` method on `ClientBuilder`. The method accepts an array of values, +each entry corresponding to one node in your cluster. The format of the host can vary, depending on your needs (ip vs +hostname, port, ssl, etc) [source,php] --------------------------------------------------- +---- $hosts = [ '192.168.1.1:9200', // IP + Port '192.168.1.2', // Just IP @@ -24,12 +29,13 @@ $hosts = [ $client = ClientBuilder::create() // Instantiate a new ClientBuilder ->setHosts($hosts) // Set the hosts ->build(); // Build the client object --------------------------------------------------- +---- -注意 `ClientBuilder` 对象允许链式操作。当然也可以分别调用上述的方法: +Notice that the `ClientBuilder` object allows chaining method calls for brevity. It is also possible to call the methods +individually: [source,php] --------------------------------------------------- +---- $hosts = [ '192.168.1.1:9200', // IP + Port '192.168.1.2', // Just IP @@ -41,16 +47,23 @@ $hosts = [ $clientBuilder = ClientBuilder::create(); // Instantiate a new ClientBuilder $clientBuilder->setHosts($hosts); // Set the hosts $client = $clientBuilder->build(); // Build the client object --------------------------------------------------- +---- + +=== Extended Host Configuration -=== Extended Host 配置法 +The client also supports an _extended_ host configuration syntax. The inline configuration method relies on PHP's +`filter_var()` and `parse_url()` methods to validate and extract the components of a URL. Unfortunately, these built-in +methods run into problems with certain edge-cases. For example, `filter_var()` will not accept URL's that have underscores +(which are questionably legal, depending on how you interpret the RFCs). Similarly, `parse_url()` will choke if a +Basic Auth's password contains special characters such as a pound sign (`#`) or question-marks (`?`). -客户端也支持 Extended Host 配置语法。Inline Host 配置法依赖 PHP 的 `filter_var()` 函数和 `parse_url()` 函数来验证和提取一个 URL 的各个部分。然而,这些 php 函数在一些特定的场景下会出错。例如, `filter_var()` 函数不接收有下划线的 URL。同样,如果 Basic Auth 的密码含有特定字符(如#、?),那么 `parse_url()` 函数会报错。 +For this reason, the client supports an extended host syntax which provides greater control over host initialization. +None of the components are validated, so edge-cases like underscores domain names will not cause problems. -因而客户端也支持 Extended Host 配置语法,从而使客户端实例化更加可控: +The extended syntax is an array of parameters for each host: [source,php] --------------------------------------------------- +---- $hosts = [ // This is effectively equal to: "https://username:password!#$?*abc@foo.com:9200/" [ @@ -69,33 +82,41 @@ $hosts = [ $client = ClientBuilder::create() // Instantiate a new ClientBuilder ->setHosts($hosts) // Set the hosts ->build(); // Build the client object --------------------------------------------------- +---- -每个节点只需要配置 `host` 参数。如果其它参数不指定,那么默认的端口是 `9200` ,默认的 scheme 是 `http` 。 +Only the `host` parameter is required for each configured host. If not provided, the default port is `9200`. The default +scheme is `http`. -=== 认证与加密 +=== Authorization and Encryption -想了解 HTTP 认证和 SSL 加密的内容,请查看 link:_security.html[认证与加密]。 +For details about HTTP Authorization and SSL encryption, please see link:_security.html[Authorization and SSL]. -=== 设置重连次数 +=== Set retries -在一个集群中,如果操作抛出如下异常:connection refusal, connection timeout, DNS lookup timeout 等等(不包括4xx和5xx),客户端便会重连。客户端默认重连 `n` (n=节点数)次。 +By default, the client will retry `n` times, where `n = number of nodes` in your cluster. A retry is only performed +if the operation results in a "hard" exception: connection refusal, connection timeout, DNS lookup timeout, etc. 4xx and +5xx errors are not considered retry'able events, since the node returns an operational response. -如果你不想重连,或者想更改重连次数。你可以使用 `setRetries()` 方法: +If you would like to disable retries, or change the number, you can do so with the `setRetries()` method: [source,php] --------------------------------------------------- +---------------------------- + $client = ClientBuilder::create() ->setRetries(2) ->build(); --------------------------------------------------- +---------------------------- -假如客户端重连次数超过设定值,便会抛出最后接收到的异常。例如,如果你有 10 个节点,设置 `setRetries(5)` ,客户端便会最多发送 5 次连接命令。如果 5 个节点返回的结果都是 connection timeout,那么客户端会抛出 `OperationTimeoutException` 。由于连接池处于使用状态,这些节点也可能会被标记为死节点。 +When the client runs out of retries, it will throw the last exception that it received. For example, if you have ten +alive nodes, and `setRetries(5)`, the client will attempt to execute the command up to five times. If all five nodes +result in a connection timeout (for example), the client will throw an `OperationTimeoutException`. Depending on the +Connection Pool being used, these nodes may also be marked dead. -为了识别是否为重连异常,抛出的异常会包含一个 `MaxRetriesException` 。例如,你可以在 catch 内使用 `getPrevious()` 来捕获一个特定的 curl 异常,以便查看是否包含 `MaxRetriesException` 。 +To help in identification, exceptions that are thrown due to max retries will wrap a `MaxRetriesException`. For example, +you can catch a specific curl exception then check if it wraps a MaxRetriesException using `getPrevious()`: [source,php] --------------------------------------------------- +---- $client = Elasticsearch\ClientBuilder::create() ->setHosts(["localhost:1"]) ->setRetries(0) @@ -109,12 +130,14 @@ try { echo "Max retries!"; } } --------------------------------------------------- +---- -由于所有 curl 抛出的异常(`CouldNotConnectToHost`, `CouldNotResolveHostException`, `OperationTimeoutException`)都继承 `TransportException` 。这样你就能够用 `TransportException` 来替代如上3种异常: +Alternatively, all "hard" curl exceptions (`CouldNotConnectToHost`, `CouldNotResolveHostException`, `OperationTimeoutException`) +extend the more general `TransportException`. So you could instead catch the general `TransportException` and then +check it's previous value: [source,php] --------------------------------------------------- +---- $client = Elasticsearch\ClientBuilder::create() ->setHosts(["localhost:1"]) ->setRetries(0) @@ -128,59 +151,39 @@ try { echo "Max retries!"; } } --------------------------------------------------- +---- -=== 开启日志 -Elasticsearch-PHP 支持日志记录,但由于性能原因,所以默认没有开启。如果你希望开启日志,你就要选择一个日志记录工具并安装它,然后在客户端中开启日志。推荐使用 https://github.com/Seldaek/monolog[Monolog],不过任何实现 PSR/Log 接口的日志记录工具都可以使用。 +[[enabling_logger]] +=== Enabling the Logger +Elasticsearch-PHP supports logging, but it is not enabled by default for performance reasons. If you wish to enable logging, +you need to select a logging implementation, install it, then enable the logger in the Client. The recommended logger +is https://github.com/Seldaek/monolog[Monolog], but any logger that implements the `PSR/Log` interface will work. -你会发现在安装 elasticsearch-php 时会建议安装 Monolog。为了使用 Monolog,请把它加入 `composer.json` : +You might have noticed that Monolog was suggested during installation. To begin using Monolog, add it to your `composer.json`: -[source,js] --------------------------------------------------- +[source,json] +---------------------------- { "require": { ... - "elasticsearch/elasticsearch" : "~6.0", + "elasticsearch/elasticsearch" : "~5.0", "monolog/monolog": "~1.0" } } --------------------------------------------------- +---------------------------- -然后用 composer 更新: +And then update your composer installation: -[source,sh] --------------------------------------------------- +[source,shell] +---------------------------- php composer.phar update --------------------------------------------------- +---------------------------- -一旦安装好 Monolog(或其他日志记录工具),你就要创建一个日志对象并且注入到客户端中。 `ClientBuilder` 对象有一个静态方法来构建一个通用的 Monolog-based 日志对象。你只需要提供存放日志路径就行: +Once Monolog (or another logger) is installed, you need to create a log object and inject it into the client: [source,php] --------------------------------------------------- -$logger = ClientBuilder::defaultLogger('path/to/your.log'); - -$client = ClientBuilder::create() // Instantiate a new ClientBuilder - ->setLogger($logger) // Set the logger with a default logger - ->build(); // Build the client object --------------------------------------------------- - -你也可以指定记录的日志级别: - -[source,php] --------------------------------------------------- -// set severity with second parameter -$logger = ClientBuilder::defaultLogger('/path/to/logs/', Logger::INFO); - -$client = ClientBuilder::create() // Instantiate a new ClientBuilder - ->setLogger($logger) // Set the logger with a default logger - ->build(); // Build the client object --------------------------------------------------- - -`defaultLogger()` 方法只是一个辅助方法,不要求你使用它。你可以自己创建日志对象,然后注入: - -[source,php] --------------------------------------------------- +---- use Monolog\Logger; use Monolog\Handler\StreamHandler; @@ -190,18 +193,24 @@ $logger->pushHandler(new StreamHandler('path/to/your.log', Logger::WARNING)); $client = ClientBuilder::create() // Instantiate a new ClientBuilder ->setLogger($logger) // Set your custom logger ->build(); // Build the client object --------------------------------------------------- +---- + -=== 配置 HTTP Handler +=== Configure the HTTP Handler -Elasticsearch-PHP 使用的是可替代的 HTTP 传输层——https://github.com/guzzle/RingPHP/[RingPHP]。这允许客户端构建一个普通的 HTTP 请求,然后通过传输层发送出去。真正的请求细节隐藏在客户端内,并且这是模块化的,因此你可以根据你的需求来选择 HTTP handlers。 +Elasticsearch-PHP uses an interchangeable HTTP transport layer called https://github.com/guzzle/RingPHP/[RingPHP]. This +allows the client to construct a generic HTTP request, then pass it to the transport layer to execute. The actual execution +details are hidden from the client and modular, so that you can choose from several HTTP handlers depending on your needs. -客户端使用的默认 handler 是结合型 handler(combination handler)。当使用同步模式,handler 会使用 `CurlHandler` 来一个一个地发送 curl 请求。这种方式对于单一请求(single requests)来说特别迅速。当异步(future)模式开启,handler 就转换成使用 `CurlMultiHandler` , `CurlMultiHandler` 以 curl_multi 方式来发送请求。这样会消耗更多性能,但是允许批量 HTTP 请求并行执行。 +The default handler that the client uses is a combination handler. When executing in synchronous mode, the handler +uses `CurlHandler`, which executes single curl calls. These are very fast for single requests. When asynchronous (future) +mode is enabled, the handler switches to `CurlMultiHandler`, which uses the curl_multi interface. This involves a bit +more overhead, but allows batches of HTTP requests to be processed in parallel. -你可以从以下一些助手函数中选择一个来配置 HTTP handler,或者你也可以自定义 HTTP handler: +You can configure the HTTP handler with one of several helper functions, or provide your own custom handler: [source,php] --------------------------------------------------- +---- $defaultHandler = ClientBuilder::defaultHandler(); $singleHandler = ClientBuilder::singleHandler(); $multiHandler = ClientBuilder::multiHandler(); @@ -210,64 +219,81 @@ $customHandler = new MyCustomHandler(); $client = ClientBuilder::create() ->setHandler($defaultHandler) ->build(); --------------------------------------------------- +---- -想要了解自定义 Ring handler 的细节,请查看 http://ringphp.readthedocs.io/en/latest/[RingPHP文档]。 +For details on creating your own custom Ring handler, please see the http://guzzle.readthedocs.org/en/latest/handlers.html[RingPHP Documentation] -在所有的情况下都推荐使用默认的 handler。这不仅可以以同步模式快速发送请求,而且也保留了异步模式来实现并行请求。 如果你觉得你永远不会用到 future 模式,你可以考虑用 `singleHandler` ,这样会间接节省一些性能。 +The default handler is recommended in almost all cases. This allows fast synchronous execution, while retaining flexibility +to invoke parallel batches with async future mode. You may consider using just the `singleHandler` if you know you will +never need async capabilities, since it will save a small amount of overhead by reducing indirection. -=== 设置连接池 -客户端会维持一个连接池,连接池内每个连接代表集群的一个节点。这里有好几种连接池可供使用,每个的行为都有些细微差距。连接池可通过 `setConnectionPool()` 来配置: +=== Setting the Connection Pool + +The client maintains a pool of connections, with each connection representing a node in your cluster. There are several +connection pool implementations available, and each has slightly different behavior (pinging vs no pinging, etc). +Connection pools are configured via the `setConnectionPool()` method: [source,php] --------------------------------------------------- +---- $connectionPool = '\Elasticsearch\ConnectionPool\StaticNoPingConnectionPool'; $client = ClientBuilder::create() ->setConnectionPool($connectionPool) ->build(); --------------------------------------------------- +---- -更多细节请查询 link:_connection_pool.html[连接池配置]。 +For more details, please see the dedicated page on link:_connection_pool.html[configuring connection pools]. -=== 设置选择器(Selector) +=== Setting the Connection Selector -连接池是用来管理集群的连接,但是选择器则是用来确定下一个 API 请求要用哪个连接。这里有几个选择器可供选择。选择器可通过 `setSelector()` 方法来更改: +The connection pool manages the connections to your cluster, but the Selector is the logic that decides which connection +should be used for the next API request. There are several selectors that you can choose from. Selectors can be changed +via the `setSelector()` method: [source,php] --------------------------------------------------- +---- $selector = '\Elasticsearch\ConnectionPool\Selectors\StickyRoundRobinSelector'; $client = ClientBuilder::create() ->setSelector($selector) ->build(); --------------------------------------------------- +---- + +For more details, please see the dedicated page on link:_selectors.html[configuring selectors]. -更多细节请查询 link:_selectors.html[选择器配置]。 -=== 设置序列化器(Serializer) +=== Setting the Serializer -客户端的请求数据是关联数组,但是 Elasticsearch 接受 JSON 数据。序列化器是指把 PHP 数组序列化为 JSON 数据。当然 Elasticsearch 返回的 JSON 数据也会反序列化为 PHP 数组。这看起来有些繁琐,但把序列化器模块化对于处理一些极端案例有莫大帮助。 +Requests are given to the client in the form of associative arrays, but Elasticsearch expects JSON. The Serializer's +job is to serialize PHP objects into JSON. It also de-serializes JSON back into PHP arrays. This seems trivial, but +there are a few edgecases which make it useful for the serializer to remain modular. -大部分人不会更改默认的序列化器( `SmartSerializer` ),但你真的想改变,那可以通过 `setSerializer()` 方法: +The majority of people will never need to change the default serializer (`SmartSerializer`), but if you need to, +it can be done via the `setSerializer()` method: [source,php] --------------------------------------------------- +---- $serializer = '\Elasticsearch\Serializers\SmartSerializer'; $client = ClientBuilder::create() ->setSerializer($serializer) ->build(); --------------------------------------------------- +---- + +For more details, please see the dedicated page on link:_serializers.html[configuring serializers]. -更多细节请查询 link:_serializers.html[序列化器配置]。 -=== 设置自定义 ConnectionFactory +=== Setting a custom ConnectionFactory -当连接池发送请求时,ConnectionFactory 就会实例化连接对象。一个连接对象代表一个节点。因为 handler(通过RingPHP)才是真正的执行网络请求,那么连接对象的主要工作就是维持连接:节点是活节点吗?ping 的通吗?host 和端口是什么? +The ConnectionFactory instantiates new Connection objects when requested by the ConnectionPool. A single Connection +represents a single node. Since the client hands actual networking work over to RingPHP, the Connection's main job is +book-keeping: Is this node alive? Did it fail a ping request? What is the host and port? -很少会去自定义 ConnectionFactory,但是如果你想做,那么你要提供一个完整的 ConnectionFactory 对象作为 `setConnectionFactory()` 方法的参数。这个自定义对象需要实现 ConnectionFactoryInterface 接口。 +There is little reason to provide your own ConnectionFactory, but if you need to do so, you need to supply an intact +ConnectionFactory object to the `setConnectionFactory()` method. The object should implement the `ConnectionFactoryInterface` +interface. [source,php] --------------------------------------------------- +---- + class MyConnectionFactory implements ConnectionFactoryInterface { @@ -303,18 +329,23 @@ $connectionFactory = new MyConnectionFactory( $client = ClientBuilder::create() ->setConnectionFactory($connectionFactory); ->build(); --------------------------------------------------- +---- + +As you can see, if you decide to inject your own ConnectionFactory, you take over the responsibiltiy of wiring it correctly. +The ConnectionFactory requires a working HTTP handler, serializer, logger and tracer. -如上所述,如果你想注入自定义的 ConnectionFactory,你自己就要负责写对它。自定义 ConnectionFactory 需要用到 HTTP handler,序列化器,日志和追踪。 -=== 设置 Endpoint 闭包 +=== Set the Endpoint closure -客户端使用 Endpoint 闭包来发送 API 请求到 Elasticsearch 的 Endpoint 对象。一个命名空间对象会通过闭包构建一个新的 Endpoint,这个意味着如果你想扩展 API 的 Endpoint,你可以很方便的做到。 +The client uses an Endpoint closure to dispatch API requests to the correct Endpoint object. A namespace object will +construct a new Endpoint via this closure, which means this is a handy location if you wish to extend the available set +of API endpoints available -例如,我们可以新增一个 endpoint: +For example, we could add a new endpoint like so: [source,php] --------------------------------------------------- +---- + $transport = $this->transport; $serializer = $this->serializer; @@ -335,18 +366,24 @@ $newEndpoint = function ($class) use ($transport, $serializer) { $client = ClientBuilder::create() ->setEndpoint($newEndpoint) ->build(); --------------------------------------------------- +---- -很明显,如果你这样做的话,那么你就要负责对现存的 Endpoint 进行维护,以确保所有的方法都能正常运行。同时你也要确保端口和序列化都写入每个 Endpoint。 +Obviously, by doing this you take responsibility that all existing endpoints still function correctly. And you also +assume the responsibility of correctly wiring the Transport and Serializer into each endpoint. -=== 从 hash 配置中创建客户端 -为了更加容易的创建客户端,所有的配置都可以用 hash 形式来替代单一配置方法。这种配置方法可以通过静态方法 `ClientBuilder::FromConfig()` 来完成,它接收一个数组,返回一个配置好的客户端。 +=== Building the client from a configuration hash + +To help ease automated building of the client, all configurations can be provided in a setting +hash instead of calling the individual methods directly. This functionality is exposed through +the `ClientBuilder::FromConfig()` static method, which accepts an array of configurations +and returns a fully built client. + +Array keys correspond to the method name, e.g. `retries` key corresponds to `setRetries()` method. -数组的键名对应方法名(如 retries 对应 setRetries() 方法): [source,php] --------------------------------------------------- +---- $params = [ 'hosts' => [ 'localhost:9200' @@ -355,12 +392,16 @@ $params = [ 'handler' => ClientBuilder::singleHandler() ]; $client = ClientBuilder::fromConfig($params); --------------------------------------------------- +---- + -为了帮助用户找出潜在的问题,未知参数会抛出异常。如果你不想要抛出异常,你可以在 fromConfig() 中设置 $quiet = true 来关闭异常: +Unknown parameters will throw an exception, to help the user find potential problems. +If this behavior is not desired (e.g. you are using the hash for other purposes, and may have +keys unrelated to the Elasticsearch client), you can set $quiet = true in fromConfig() to +silence the exceptions. [source,php] --------------------------------------------------- +---- $params = [ 'hosts' => [ 'localhost:9200' @@ -371,4 +412,4 @@ $params = [ // Set $quiet to true to ignore the unknown `imNotReal` key $client = ClientBuilder::fromConfig($params, true); --------------------------------------------------- +---- diff --git a/connection-pool.asciidoc b/connection-pool.asciidoc index f9edc30..192fb3d 100644 --- a/connection-pool.asciidoc +++ b/connection-pool.asciidoc @@ -1,86 +1,106 @@ -[[_connection_pool]] -== 连接池 -连接池是客户端内的一个对象,主要是维持现有节点的连接。理论上来讲,节点只有死节点与活节点。 +== Connection Pool -然而在现实世界中,事情绝不会这么明确。有时候节点是处在 _“可能挂了但还未确认”_ 、 _“连接超时但未知原因”_ 或 _“最近挂过但现在可用”_ 的灰色地带中。而连接池的工作就是管理这些无规则的连接,并为客户端提供最稳定的连接状态。 +The connection pool is an object inside the client that is responsible for maintaining the current list of nodes. +Theoretically, nodes are either dead or alive. -如果一个连接池找不到一个活节点来发送查询,那么就会返回一个 `NoNodesAvailableException` 异常给客户端。这里跟最大重连次数(retry)有所不同。假如有这么一个例子:你的集群中可能有 10 个节点。你发送一个请求,其中有 9 个节点因为连接超时而请求失败。而第 10 个节点发送请求成功并成功执行请求。在上述例子中,前 9 个节点会被标记为死节点(连接池处于使用状态才会被标记),且它们的“死亡”定时器会启动生效。 +However, in the real world, things are never so clear. Nodes are sometimes in a gray-zone of _"probably dead but not +confirmed"_, _"timed-out but unclear why"_ or _"recently dead but now alive"_. The connection pool's job is to +manage this set of unruly connections and try to provide the best behavior to the client. -当要发送下一个请求时,节点1-9是被标记为死节点,所以请求会跳过这些节点。请求只会发送到唯一的活节点 10 中,而假如发送到这个节点也失败了,那么就会返回 `NoNodesAvailableException` 。你会留意到这里的发送次数比重连次数(retries)的值要少,因为重连次数只适用于活节点。在这种情况下,只有一个节点是活节点,请求失败后就会返回 `NoNodesAvailableException` 。 +If a connection pool is unable to find an alive node to query against, it will return a `NoNodesAvailableException`. +This is distinct from an exception due to maximum retries. For example, your cluster may have 10 nodes. You execute +a request and 9 out of the 10 nodes fail due to connection timeouts. The tenth node succeeds and the query executes. +The first nine nodes will be marked dead (depending on the connection pool being used) and their "dead" timers will begin +ticking. -这里有几种连接池可供选择: +When the next request is sent to the client, nodes 1-9 are still considered "dead", so they will be skipped. The request +is sent to the only known alive node (#10), and if this node fails, a `NoNodesAvailableException` is returned. You'll note +this is much less than the `retries` value, because `retries` only applies to retries against alive nodes. In this case, +only one node is known to be alive, so `NoNodesAvailableException` is returned. -=== staticNoPingConnectionPool(默认) -连接池维持一个静态的 hosts 清单,这些 hosts 在客户端初始化时都被假定为活节点。如果一个节点处理请求失败,那么该节点会被标记为死节点并持续 60 秒,而请求会发送到下一个节点。60 秒过后,节点则会再生并加入请求轮询中。每增加一次请求失败次数都会导致死亡时间以指数级别增长。 +There are several connection pool implementations that you can choose from: -请求成功一次后会重置 "failed ping timeout" 计数器。 +=== staticNoPingConnectionPool (default) -如果你想明确的设置连接池为 `StaticNoPingConnectionPool` ,你可能要在ClientBuilder对象中使用 `setConnectionPool()` 方法: +This connection pool maintains a static list of hosts, which are assumed to be alive when the client initializes. If +a node fails a request, it is marked as `dead` for 60 seconds and the next node is tried. After 60 seconds, the node +is revived and put back into rotation. Each additional failed request will cause the dead timeout to increase exponentially. + +A successful request will reset the "failed ping timeout" counter. + +If you wish to explicitly set the `StaticNoPingConnectionPool` implementation, you may do so with the `setConnectionPool()` +method of the ClientBuilder object: [source,php] --------------------------------------------------- +---- $client = ClientBuilder::create() ->setConnectionPool('\Elasticsearch\ConnectionPool\StaticNoPingConnectionPool', []) ->build(); --------------------------------------------------- +---- -注意:要通过命名空间加类名的方法来指定连接池。 +Note that the implementation is specified via a namespace path to the class. -=== StaticConnectionPool +=== staticConnectionPool -`StaticConnectionPool` 除了要在使用前 ping 节点来确定是否为活节点,其它的特性与 `StaticNoPingConnectionPool` 一致。这可能对于执行时间较长的脚本比较有用,但这往往会增加额外开销,因为对一般的PHP脚本来说这是不必要的。 +Identical to the `StaticNoPingConnectionPool`, except it pings nodes before they are used to determine if they are alive. +This may be useful for long-running scripts, but tends to be additional overhead that is unnecessary for average PHP scripts. -使用 `StaticConnectionPool` 的方法: +To use the `StaticConnectionPool`: [source,php] --------------------------------------------------- +---- $client = ClientBuilder::create() ->setConnectionPool('\Elasticsearch\ConnectionPool\StaticConnectionPool', []) ->build(); --------------------------------------------------- +---- -注意:要通过命名空间加类名的方法来指定连接池。 +Note that the implementation is specified via a namespace path to the class. -=== SimpleConnectionPool +=== simpleConnectionPool -`SimpleConnectionPool` 仅仅返回选择器(Selector)指定的下个节点信息,它不监测节点的“生死状态”。不管节点是活节点还是死节点,这种连接池都会返回节点信息给客户端。它仅仅是个简单的静态 host 连接池。 +The `SimpleConnectionPool` simply returns the next node as specified by the Selector; it does not perform track +the "liveness" of nodes. This pool will return nodes whether they are alive or dead. It is just a simple pool of static +hosts. -`SimpleConnectionPool` 不建议常规使用,但是它是个有用的调试工具。 +The `SimpleConnectionPool` is not recommended for routine use, but it may be a useful debugging tool. -使用 `SimpleConnectionPool` 的方法: +To use the `SimpleConnectionPool`: [source,php] --------------------------------------------------- +---- $client = ClientBuilder::create() ->setConnectionPool('\Elasticsearch\ConnectionPool\SimpleConnectionPool', []) ->build(); --------------------------------------------------- +---- -注意:要通过命名空间加类名的方法来指定连接池。 +Note that the implementation is specified via a namespace path to the class. -=== SniffingConnectionPool +=== sniffingConnectionPool -`SniffingConnectionPool` 与前面的两个静态连接池有所不同,它是动态的。用户提供 hosts 种子,而客户端则会嗅探这些 hosts 并发现集群的其余节点。 `SniffingConnectionPool` 通过 Cluster State API 来实现嗅探。当集群添加新节点或删除节点,客户端会更新连接池的活跃连接。 +Unlike the two previous static connection pools, this one is dynamic. The user provides a seed list of hosts, which the +client uses to "sniff" and discover the rest of the cluster. It achieves this through the Cluster State API. As new +nodes are added or removed from the cluster, the client will update it's pool of active connections. -使用 `SniffingConnectionPool` 的方法: +To use the `SniffingConnectionPool`: [source,php] --------------------------------------------------- +---- $client = ClientBuilder::create() ->setConnectionPool('\Elasticsearch\ConnectionPool\SniffingConnectionPool', []) ->build(); --------------------------------------------------- +---- -注意:要通过命名空间加类名的方法来指定连接池。 +Note that the implementation is specified via a namespace path to the class. -=== 自定义连接池 -如果你要是实现自定义连接池,你的类则必须实现 `ConnectionPoolInterface` 接口: +=== Custom Connection Pool + +If you wish to implement your own custom Connection Pool, your class must implement `ConnectionPoolInterface`: [source,php] --------------------------------------------------- +---- class MyCustomConnectionPool implements ConnectionPoolInterface { @@ -102,23 +122,25 @@ class MyCustomConnectionPool implements ConnectionPoolInterface // code here } } --------------------------------------------------- +---- -然后你要实例化自定义的连接池并注入到 ClientBuilder: +You can then instantiate an instance of your ConnectionPool and inject it into the ClientBuilder: [source,php] --------------------------------------------------- +---- $myConnectionPool = new MyCustomConnectionPool(); $client = ClientBuilder::create() ->setConnectionPool($myConnectionPool, []) ->build(); --------------------------------------------------- +---- -如果你的连接池只有较小的变更,你可以考虑扩展 `AbstractConnectionPool` ,它提供一些具体的助手方法。如果你不选择这种做法,你就要确保连接池拥有兼容的构造方法(因为在接口中没有定义好): +If your connection pool only makes minor changes, you may consider extending `AbstractConnectionPool`, which provides +some helper concrete methods. If you choose to go down this route, you need to make sure your ConnectionPool's implementation +has a compatible constructor (since it is not defined in the interface): [source,php] --------------------------------------------------- +---- class MyCustomConnectionPool extends AbstractConnectionPool implements ConnectionPoolInterface { @@ -145,28 +167,38 @@ class MyCustomConnectionPool extends AbstractConnectionPool implements Connectio // code here } } --------------------------------------------------- +---- -假如你的构造方法与 `AbstractConnectionPool` 的构造方法相同,你可以用对象注入或命名空间实例化来设置连接池: +If your constructor matches AbstractConnectionPool, you may use either object injection or namespace instantiation: [source,php] --------------------------------------------------- +---- $myConnectionPool = new MyCustomConnectionPool(); $client = ClientBuilder::create() ->setConnectionPool($myConnectionPool, []) // object injection ->setConnectionPool('/MyProject/ConnectionPools/MyCustomConnectionPool', []) // or namespace ->build(); --------------------------------------------------- +---- + -=== 选择什么连接池?PHP 和连接池的关系 +=== Which connection pool to choose? PHP and connection pooling -初看觉得 `sniffingConnectionPool` 似乎比较高级。对许多语言来说当然如此。但是 PHP 则有些不同。 +At first glance, the `sniffingConnectionPool` implementation seems superior. For many languages, it is. In PHP, the +conversation is a bit more nuanced. -因为 PHP 是无共享架构(share-nothing architecture),php 脚本实例化后无法维持一个连接池。这意味着每个脚本在重新执行时都要负责创建、维持和销毁连接。 +Because PHP is a share-nothing architecture, there is no way to maintain a connection pool across script instances. +This means that every script is responsible for creating, maintaining, and destroying connections everytime the script +is re-run. -嗅探是相对轻量的操作(调用一次API到 `/_cluster/state` ,然后 ping 每个节点),但是对于某些 PHP 程序来说,这可能是一笔不可忽视的开销。一般的 PHP 脚本可能会加载客户端,执行一些请求然后关闭。想象一下这个脚本每秒调用 1000 次: `SniffingConnectionPool` 会每秒执行嗅探和 ping 所有节点 1000 次。嗅探程序则会增加大量的开销。 +Sniffing is a relatively lightweight operation (one API call to `/_cluster/state`, followed by pings to each node) but +it may be a non-negligible overhead for certain PHP applications. The average PHP script will likely load the client, +execute a few queries and then close. Imagine this script being called 1000 times per second: the sniffing connection +pool will perform the sniffing and pinging process 1000 times per second. The sniffing process will add a large +amount of overhead -在实际中,如果你的脚本只是执行一些请求,用嗅探就太粗暴了。嗅探对于常驻进程来说往往更加有用。 +In reality, if your script only executes a few queries, the sniffing concept is _too_ robust. It tends to be more +useful in long-lived processes which potentially "out-live" a static list. -基于上述原因,默认连接池才设置为当前的 `staticNoPingConnectionPool` 。当然你可以更改默认连接池,但我们强烈建议你进行测试并确保连接池对于性能没有产生不良影响。 +For this reason the default connection pool is currently the `staticNoPingConnectionPool`. You can, of course, change +this default - but we strongly recommend you load test and verify that it does not negatively impact your performance. diff --git a/crud.asciidoc b/crud.asciidoc index 9a0fe33..4f34f9c 100644 --- a/crud.asciidoc +++ b/crud.asciidoc @@ -1,18 +1,19 @@ -[[_indexing_documents]] -== 索引文档 -当你要在 Elasticsearch 增加文档时,你就需要索引 JSON 文档。JSON 文档会映射 PHP 关联数组,因为 PHP 关联数组可以 encode 为 JSON 数据格式。 +== Indexing Documents -因此在 Elasticsearch-PHP 中你可以传递关联数组给客户端来索引文档。我们会概述几种方法来增加文档到 Elasticsearch。 +When you add documents to Elasticsearch, you index JSON documents. This maps naturally to PHP associative arrays, since +they can easily be encoded in JSON. Therefore, in Elasticsearch-PHP you create and pass associative arrays to the client +for indexing. There are several methods of ingesting data into Elasticsearch, which we will cover here -=== 单一文档索引 +=== Single document indexing -当索引一个文档时,你可以提供一个 ID 或者让 Elasticsearch 自动生成。 +When indexing a document, you can either provide an ID or let elasticsearch generate one for you. -提供 ID 值: +{zwsp} + +.Providing an ID value [source,php] --------------------------------------------------- +---- $params = [ 'index' => 'my_index', 'type' => 'my_type', @@ -22,12 +23,12 @@ $params = [ // Document will be indexed to my_index/my_type/my_id $response = $client->index($params); --------------------------------------------------- - -不提供 ID 值: +---- +{zwsp} + +.Omitting an ID value [source,php] --------------------------------------------------- +---- $params = [ 'index' => 'my_index', 'type' => 'my_type', @@ -36,12 +37,15 @@ $params = [ // Document will be indexed to my_index/my_type/ $response = $client->index($params); --------------------------------------------------- +---- +{zwsp} + -如果你需要设置其他的参数,如 `routing` 的值,你可以指定这些参数到 `index` , `type` 等参数后。例如,索引一个新的文档时设置 routing 值和 timestamp 值: +If you need to set other parameters, such as a `routing` value, you specify those in the array alongside the `index`, +`type`, etc. For example, let's set the routing and timestamp of this new document: +.Additional parameters [source,php] --------------------------------------------------- +---- $params = [ 'index' => 'my_index', 'type' => 'my_type', @@ -53,22 +57,26 @@ $params = [ $response = $client->index($params); --------------------------------------------------- +---- +{zwsp} + -=== 批量(bulk)索引 +=== Bulk Indexing -Elasticsearch 也支持批量(bulk)索引文档。bulk API 要求提供 JSON 格式的 action/元数据 键值对。在 PHP 中构建批量文档数据也是相似的。你首先要创建一个 action 数组对象(如 `index` 对象),然后你还要创建一个 body 对象。而 PHP 程序则重复上述操作构建文档数据。 +Elasticsearch also supports bulk indexing of documents. The bulk API expects JSON action/metadata pairs, separated by +newlines. When constructing your documents in PHP, the process is similar. You first create an action array object +(e.g. `index` object), then you create a document body object. This process repeats for all your documents. -一个简单的例子如下所示: +A simple example might look like this: +.Bulk indexing with PHP arrays [source,php] --------------------------------------------------- +---- for($i = 0; $i < 100; $i++) { $params['body'][] = [ 'index' => [ '_index' => 'my_index', '_type' => 'my_type', - ] + ] ]; $params['body'][] = [ @@ -78,12 +86,15 @@ for($i = 0; $i < 100; $i++) { } $responses = $client->bulk($params); --------------------------------------------------- +---- + +In practice, you'll likely have more documents than you want to send in a single bulk request. In that case, you need +to batch up the requests and periodically send them: -实际上在一次 bulk 请求中发送数量会比文档实际数量少。如果是这种情况,你就要设置批量值然后周期性地发送: +.Bulk indexing with batches [source,php] --------------------------------------------------- +---- $params = ['body' => []]; for ($i = 1; $i <= 1234567; $i++) { @@ -116,15 +127,16 @@ for ($i = 1; $i <= 1234567; $i++) { if (!empty($params['body'])) { $responses = $client->bulk($params); } --------------------------------------------------- +---- -[[_getting_documents]] -== 获取文档 +== Getting Documents -Elasticsearch 提供实时获取文档的方法。这意味着只要文档被索引且客户端收到消息确认后,你就可以立即在任何的分片中检索文档。Get 操作通过 `index/type/id` 方式请求一个文档信息: +Elasticsearch provides realtime GETs of documents. This means that as soon as the document has been indexed and your +client receives an acknowledgement, you can immediately retrieve the document from any shard. Get operations are +performed by requesting a document by it's full `index/type/id` path: -[source,js] --------------------------------------------------- +[source,php] +---- $params = [ 'index' => 'my_index', 'type' => 'my_type', @@ -133,19 +145,22 @@ $params = [ // Get doc at /my_index/my_type/my_id $response = $client->get($params); --------------------------------------------------- +---- +{zwsp} + + +== Updating Documents -[[_updating_documents]] -== 更新文档 +Updating a document allows you to either completely replace the contents of the existing document, or perform a partial +update to just some fields (either changing an existing field, or adding new fields). -更新文档操作既可以完全覆盖现存文档全部字段,又可以部分更新字段(更改现存字段,或添加新字段)。 +=== Partial document update -=== 部分更新 +If you want to partially update a document (e.g. change an existing field, or add a new one) you can do so by specifying +the `doc` in the `body` parameter. This will merge the fields in `doc` with the existing document -如果你要部分更新文档(如更改现存字段,或添加新字段),你可以在 body 参数中指定一个 doc 参数。这样 doc 参数内的字段会与现存字段进行合并。 [source,php] --------------------------------------------------- +---- $params = [ 'index' => 'my_index', 'type' => 'my_type', @@ -159,14 +174,16 @@ $params = [ // Update doc at /my_index/my_type/my_id $response = $client->update($params); --------------------------------------------------- +---- +{zwsp} + -=== script 更新 +=== Scripted document update -有时你要执行一个脚本来进行更新操作,如对字段进行自增操作或添加新字段。为了执行一个脚本更新,你要提供脚本命令和一些参数: +Sometimes you need to perform a scripted update, such as incrementing a counter or appending a new value to an array. +To perform a scripted update, you need to provide a script and (usually) a set of parameters: [source,php] --------------------------------------------------- +---- $params = [ 'index' => 'my_index', 'type' => 'my_type', @@ -180,39 +197,44 @@ $params = [ ]; $response = $client->update($params); --------------------------------------------------- +---- +{zwsp} + -=== Upserts 更新 +=== Upserts -Upserts 操作是指“更新或插入”操作。这意味着一个 upsert 操作会先执行 script 更新,如果文档不存在(或是你更新的字段不存在),则会插入一个默认值。 +Upserts are "Update or Insert" operations. This means an upsert will attempt to run your update script, but if the document +does not exist (or the field you are trying to update doesn't exist), default values will be inserted instead. [source,php] --------------------------------------------------- +---- $params = [ 'index' => 'my_index', 'type' => 'my_type', 'id' => 'my_id', 'body' => [ - 'script' => 'ctx._source.counter += count', - 'params' => [ - 'count' => 4 + 'script' => [ + 'source' => 'ctx._source.counter += params.count', + 'params' => [ + 'count' => 4 + ], ], 'upsert' => [ 'counter' => 1 - ] + ], ] ]; $response = $client->update($params); --------------------------------------------------- +---- +{zwsp} + + -[[_deleting_documents]] -== 删除文档 +== Deleting documents -通过指定文档的 `/index/type/id` 路径可以删除文档: +Finally, you can delete documents by specifying their full `/index/type/id` path: [source,php] --------------------------------------------------- +---- $params = [ 'index' => 'my_index', 'type' => 'my_type', @@ -221,4 +243,5 @@ $params = [ // Delete doc at /my_index/my_type/my_id $response = $client->delete($params); --------------------------------------------------- +---- +{zwsp} + diff --git a/futures.asciidoc b/futures.asciidoc index d55dd35..ceee520 100644 --- a/futures.asciidoc +++ b/futures.asciidoc @@ -1,20 +1,27 @@ -[[_future_mode]] -== Future 模式 -客户端提供 future 模式(或叫异步模式)。future 模式允许批量发送请求(并行发送到集群),这对于提高性能和生产力有极大帮助。 +== Future Mode -PHP 是单线程的脚本语言,然而 libcurl 的 multi interface 功能使得像 PHP 这种单线程的语言可以批量发送请求,从而获得并发性特征。批量请求是通过底层的多线程 libcurl 库并行的发送请求给 Elasticsearch,而返回给PHP的数据也是批量的。 +The client offers a mode called "future" or "async" mode. This allows batch processing of requests (sent in parallel +to the cluster), which can have a dramatic impact on performance and throughput. -在单线程环境下,执行 `n` 个请求的时间等于 `n` 个请求时间相加。在 multi interface 功能下,执行 `n` 个请求的时间等于最慢的一个请求时间。 +PHP is fundamentally single-threaded, however libcurl provides functionality called the "multi interface". This allows +languages like PHP to gain concurrency by providing a batch of requests to process. The batch is executed in a parallel +by the underlying multithreaded libcurl library, and the batch of responses is then returned to PHP. -除此以外,multi-interface 功能允许批量请求同时发送到不同的主机,这意味着 Elasticsearch-PHP 可以更高效地利用集群。 +In a single-threaded environment, the time to execute `n` requests is the sum of those `n` request's latencies. With +the multi interface, the time to execute `n` requests is the latency of the slowest request (assuming enough handles +are available to execute all requests in parallel). -=== 使用 Future 模式 +Furthermore, the multi-interface allows requests to different hosts simultaneously, which means the Elasticsearch-PHP +client can more effectively utilize your full cluster. -使用这种模式相对简单,只是你要写多一点代码。为了开启 future 模式,在 client 选项中增加 `future` 参数,并设置值为 `'lazy'` : +=== Using Future Mode + +Utilizing this feature is relatively straightforward, but it does introduce more responsibility into your code. To enable +future mode, set the `future` flag in the client options to `'lazy'`: [source,php] --------------------------------------------------- +---- $client = ClientBuilder::create()->build(); $params = [ @@ -27,16 +34,22 @@ $params = [ ]; $future = $client->get($params); --------------------------------------------------- +---- -这里会返回一个 _future_ 对象,而不是真正的响应数据。future 对象是待处理对象,它看起来就像是个占位符。你可以把 future 对象当成是普通对象在代码中传递使用。当你需要响应数据时,你可以解析 future 对象。如果 future 对象已经被解析,可以立即使用响应数据。如果 future 对象还没被解析完,那么解析动作会阻塞 PHP 脚本的执行,直到解析完成。 +This will return a _future_, rather than the actual response. A future represents a _future computation_ and acts like +a placeholder. You can pass a future around your code like a regular object. When you need the result values, you +can _resolve_ the future. If the future has already resolved (due to some other activity), the values will be immediately +available. If the future has not resolved yet, the resolution will block until those values have become available (e.g. +after the API call completes). -在实际应用中,你可以通过设置 `future: lazy` 键值对构造一个请求队列,而返回的 future 对象直到解析完成,程序才会继续执行。无论什么时候,全部的请求都是以并行方式发送到集群,以异步方式返回给 curl。 +In practice, this means you can queue up a batch of requests by using `future: lazy` and they will pend until you resolve +the futures, at which time all requests will be sent in parallel to the cluster and return asynchronously to curl. -这听起来好复杂,但由于RingPHP的 `FutureArray` 接口,这些操作则变得很简单。它让 future 对象看起来像是一个关联数组。例如: +This sounds tricky, but it is actually very simple thanks to RingPHP's `FutureArray` interface, which makes the future +act like a simple associative array. For example: [source,php] --------------------------------------------------- +---- $client = ClientBuilder::create()->build(); $params = [ @@ -51,12 +64,13 @@ $params = [ $future = $client->get($params); $doc = $future['_source']; // This call will block and force the future to resolve --------------------------------------------------- +---- -就像通常的响应数据那样,future 对象可以用迭代关联数组的方式解析特定的值(轮流解析未解析的请求和值)。这样就可以写成如下形式: +Interacting with the future as an associative array, just like a normal response, will cause the future to resolve +that particular value (which in turn resolves all pending requests and values). This allows patterns such as: [source,php] --------------------------------------------------- +---- $client = ClientBuilder::create()->build(); $futures = []; @@ -78,14 +92,16 @@ foreach ($futures as $future) { // access future's values, causing resolution if necessary echo $future['_source']; } --------------------------------------------------- +---- -请求队列会并行执行,执行后赋值给 futures 数组。每批请求默认为 100 个。 +The queued requests will execute in parallel and populate their futures after execution. Batch size defaults to +100 requests-per-batch. -如果你想强制解析 future 对象,但又不立刻获取响应数据。你可以用 future 对象的 `wait()` 方法来强制解析: +If you wish to force future resolution, but don't actually need the values immediately, you can call `wait()` on the future +to force resolution too: [source,php] --------------------------------------------------- +---- $client = ClientBuilder::create()->build(); $futures = []; @@ -104,14 +120,16 @@ for ($i = 0; $i < 1000; $i++) { //wait() forces future resolution and will execute the underlying curl batch $futures[999]->wait(); --------------------------------------------------- +---- -=== 更改批量值 +=== Changing batch size -默认的批量值为 100 个,这意味着在客户端强制 future 对象解析前(执行 `curl_multi` 调用),队列可以容纳 100 个请求。批量值可以更改,取决于你的需求。批量值的调整是通过配置 HTTP handler 时设置 `max_handles` 参数来实现: +The default batch size is 100, meaning 100 requests will queue up before the client forces futures to begin resolving +(e.g. initiate a `curl_multi` call). The batch size can be changed depending on your preferences. The batch size +is controllable via the `max_handles` setting when configuring the handler: [source,php] --------------------------------------------------- +---- $handlerParams = [ 'max_handles' => 500 ]; @@ -121,12 +139,15 @@ $defaultHandler = ClientBuilder::defaultHandler($handlerParams); $client = ClientBuilder::create() ->setHandler($defaultHandler) ->build(); --------------------------------------------------- +---- -上面的设置会更改批量发送数量为 500。注意:不管队列数量是否为最大批量值,强制解析 future 对象都会引起底层的 curl 执行批量请求操作。在如下的示例中,只有 499 个对象加入队列,但最后的 future 对象被解析会引起强制发送批量请求: +This will change the behavior to wait on 500 queued requests before sending the batch. Note, however, that forcing a +future to resolve will cause the underlying curl batch to execute, regardless of if the batch is "full" or not. In this +example, only 499 requests are added to the queue...but the final future resolution will force the batch to flush +anyway: [source,php] --------------------------------------------------- +---- $handlerParams = [ 'max_handles' => 500 ]; @@ -154,14 +175,15 @@ for ($i = 0; $i < 499; $i++) { // resolve the future, and therefore the underlying batch $body = $future[499]['body']; --------------------------------------------------- +---- -=== 各种批量执行 +=== Heterogeneous batches are OK -队列里面允许存在各种请求。比如,你可以把 get 请求、index 请求和 search 请求放到队列里面: +It is possible to queue up heterogeneous batches of requests. For example, you can queue up several GETs, indexing requests +and a search: [source,php] --------------------------------------------------- +---- $client = ClientBuilder::create()->build(); $futures = []; @@ -212,18 +234,26 @@ $searchResults = $futures['searchRequest']['hits']; // Should return immediately, since the previous future resolved the entire batch $doc = $futures['getRequest']['_source']; --------------------------------------------------- +---- -=== 警告 +=== Caveats to Future mode -使用 future 模式时需要注意几点。最大也是最明显的问题是:你要自己去解析 future 对象。这挺麻烦的,而且偶尔会引起一些意料不到的状况。 +There are a few caveats to using future mode. The biggest is also the most obvious: you need to deal with resolving the +future yourself. This is usually trivial, but can sometimes introduce unexpected complications. -例如,假如你手动使用 `wait()` 方法解析,在需要重新构建 future 对象并解析的情况下,你也许要调用好几次 `wait()` 方法。这是因为每次重新构造 future 对象都会引起 future 对象的重新赋值(覆盖解析结果),所以每个 future 对象都要重新解析获取结果。 +For example, if you resolve manually using `wait()`, you may need to call `wait()` several times if there were retries. +This is because each retry will introduce another layer of wrapped futures, and each needs to be resolved to get the +final result. -如果你使用 ArrayInterface 返回的结果( `$response['hits']['hits']` )则不用进行额外处理。然而 FutureArrayInterface 就要全面解析 future 对象才能使用响应数据。 +This is not needed if you access values via the ArrayInterface however (e.g. `$response['hits']['hits']`), since +FutureArrayInterface will automatically and fully resolve the future to provide values. -另外一点是一些方法会失效。比如 exists 方法( `$client->exists()` , `$client->indices()->exists` , `$client->indices->templateExists()` 等)在正常情况下会返回 true 或 false。 +Another caveat is that certain APIs will lose their "helper" functionality. For example, "exists" APIs (e.g. +`$client->exists()`, `$client->indices()->exists`, `$client->indices->templateExists()`, etc) typically return a true +or false under normal operation. -当使用 future 模式时,future 对象还未封装好,这代表客户端无法检测响应结果和返回 true 或 false。所以你会得到从 Elasticsearch 返回的未封装响应数据,而你不得不对这些数据进行处理。 +When operated in future mode, unwrapping of the future is left to your application, +which means the client can no longer inspect the response and return a simple true/false. Instead, you'll see the raw +response from Elasticsearch and will have to take action appropriately. -这些注意事项也适用于 `ping()` 方法。 \ No newline at end of file +This also applies to `ping()`. \ No newline at end of file diff --git a/index-operations.asciidoc b/index-operations.asciidoc index c9f60c8..61b9980 100644 --- a/index-operations.asciidoc +++ b/index-operations.asciidoc @@ -1,14 +1,16 @@ -[[_index_management_operations]] -== 索引管理操作 -索引管理操作可以让你管理集群中的索引,例如创建、删除和更新索引和索引的映射/配置。 +== Index Management Operations -=== 创建一个索引 +Index management operations allow you to manage the indices in your Elasticsearch cluster, such as creating, deleting and +updating indices and their mappings/settings. -索引操作包含在一个特定的命名空间内,与其它直接从属于客户端对象的方法隔离开来。让我们创建一个索引作为示例: +=== Create an index + +The index operations are all contained under a distinct namespace, separated from other methods that are on the root +client object. As an example, let's create a new index: [source,php] --------------------------------------------------- +---- $client = ClientBuilder::create()->build(); $params = [ 'index' => 'my_index' @@ -16,13 +18,14 @@ $params = [ // Create the index $response = $client->indices()->create($params); --------------------------------------------------- +---- {zwsp} + -你可以在一个创建索引 API 中指定任何参数。所有的参数通常会注入请求体中的 `body` 参数下: +You can specify any parameters that would normally be included in a new index creation API. All parameters that +would normally go in the request body are located in the 'body' parameter: [source,php] --------------------------------------------------- +---- $client = ClientBuilder::create()->build(); $params = [ 'index' => 'my_index', @@ -38,7 +41,7 @@ $params = [ ], 'properties' => [ 'first_name' => [ - 'type' => 'string', + 'type' => 'keyword', 'analyzer' => 'standard' ], 'age' => [ @@ -53,22 +56,24 @@ $params = [ // Create the index with mappings and settings now $response = $client->indices()->create($params); --------------------------------------------------- +---- {zwsp} + -=== 创建一个索引(复杂示例) +=== Create an index (advanced example) -这是一个以更为复杂的方式创建索引的示例,示例中展示了如何定义 analyzers,tokenizers,filters 和索引的 settings。虽然创建方式与之前的示例本质一样,但是这个复杂示例对于理解客户端的使用方法具有莫大帮助,因为这种特定的语法结构很容易被混淆。 +This is a more complicated example of creating an index, showing how to define analyzers, tokenizers, filters and +index settings. Although essentially the same as the previous example, the more complicated example can be helpful +for "real world" usage of the client, since this particular syntax is easy to mess up. [source,php] --------------------------------------------------- +---- $params = [ 'index' => 'reuters', 'body' => [ - 'settings' => [ + 'settings' => [ <1> 'number_of_shards' => 1, 'number_of_replicas' => 0, - 'analysis' => [ + 'analysis' => [ <2> 'filter' => [ 'shingle' => [ 'type' => 'shingle' @@ -95,40 +100,40 @@ $params = [ ] ] ], - 'mappings' => [ - '_default_' => [ + 'mappings' => [ <3> + '_default_' => [ <4> 'properties' => [ 'title' => [ - 'type' => 'string', + 'type' => 'keyword', 'analyzer' => 'reuters', 'term_vector' => 'yes', 'copy_to' => 'combined' ], 'body' => [ - 'type' => 'string', + 'type' => 'keyword', 'analyzer' => 'reuters', 'term_vector' => 'yes', 'copy_to' => 'combined' ], 'combined' => [ - 'type' => 'string', + 'type' => 'keyword', 'analyzer' => 'reuters', 'term_vector' => 'yes' ], 'topics' => [ - 'type' => 'string', + 'type' => 'keyword', 'index' => 'not_analyzed' ], 'places' => [ - 'type' => 'string', + 'type' => 'keyword', 'index' => 'not_analyzed' ] ] ], - 'my_type' => [ + 'my_type' => [ <5> 'properties' => [ 'my_field' => [ - 'type' => 'string' + 'type' => 'keyword' ] ] ] @@ -136,25 +141,30 @@ $params = [ ] ]; $client->indices()->create($params); --------------------------------------------------- +---- +<1> The top level `settings` contains config about the index (# of shards, etc) as well as analyzers +<2> `analysis` is nested inside of `settings`, and contains tokenizers, filters, char filters and analyzers +<3> `mappings` is another element nested inside of `settings`, and contains the mappings for various types +<4> The `_default_` type is a dynamic template that is applied to all fields that don't have an explicit mapping +<5> The `my_type` type is an example of a user-defined type that holds a single field, `my_field` + -=== 删除一个索引 +=== Delete an index -删除一个索引十分简单: +Deleting an index is very simple: [source,php] --------------------------------------------------- +---- $params = ['index' => 'my_index']; $response = $client->indices()->delete($params); --------------------------------------------------- +---- {zwsp} + === Put Settings API - -Put Settings API 允许你更改索引的配置参数: +The Put Settings API allows you to modify any index setting that is dynamic: [source,php] --------------------------------------------------- +---- $params = [ 'index' => 'my_index', 'body' => [ @@ -166,15 +176,15 @@ $params = [ ]; $response = $client->indices()->putSettings($params); --------------------------------------------------- +---- {zwsp} + === Get Settings API -Get Settings API 可以让你知道一个或多个索引的当前配置参数: +Get Settings API will show you the currently configured settings for one or more indexes: [source,php] --------------------------------------------------- +---- // Get settings for one index $params = ['index' => 'my_index']; $response = $client->indices()->getSettings($params); @@ -184,15 +194,15 @@ $params = [ 'index' => [ 'my_index', 'my_index2' ] ]; $response = $client->indices()->getSettings($params); --------------------------------------------------- +---- {zwsp} + === Put Mappings API -Put Mappings API 允许你更改或增加一个索引的映射。 +The Put Mappings API allows you to modify or add to an existing index's mapping. [source,php] --------------------------------------------------- +---- // Set the index and type $params = [ 'index' => 'my_index', @@ -204,7 +214,7 @@ $params = [ ], 'properties' => [ 'first_name' => [ - 'type' => 'string', + 'type' => 'keyword', 'analyzer' => 'standard' ], 'age' => [ @@ -217,15 +227,15 @@ $params = [ // Update the index mapping $client->indices()->putMapping($params); --------------------------------------------------- +---- {zwsp} + === Get Mappings API -Get Mappings API 返回索引和类型的映射细节。你可以指定一些索引和类型,取决于你希望检索什么映射。 +The Get Mappings API will return the mapping details about your indexes and types. Depending on the mappings that you wish to retrieve, you can specify a number of combinations of index and type: [source,php] --------------------------------------------------- +---- // Get mappings for all indexes and types $response = $client->indices()->getMapping(); @@ -249,18 +259,16 @@ $params = [ 'index' => [ 'my_index', 'my_index2' ] ]; $response = $client->indices()->getMapping($params); --------------------------------------------------- +---- {zwsp} + -=== 索引命名空间下的其他 API +=== Other APIs in the Indices Namespace +There are a number of other APIs in the indices namespace that allow you to manage your elasticsearch indexes (add/remove templates, flush segments, close indexes, etc). -索引命名空间下还有一些 API 允许你管理你的索引(add/remove templates, flush segments, close indexes等)。 - -如果你使用一个自动检索的 IDE,你应该可以轻易发现索引的命名空间: +If you use an IDE with autocompletion, you should be able to easily explore the indices namespace by typing: [source,php] --------------------------------------------------- +---- $client->indices()-> --------------------------------------------------- - -这里可以查看可用方法清单。而浏览 `\Elasticsearch\Namespaces\Indices.php` 文件则会看到所有可调用的方法清单。 \ No newline at end of file +---- +And perusing the list of available methods. Alternatively, browsing the `\Elasticsearch\Namespaces\Indices.php` file will show you the full list of available method calls (as well as parameter lists in the comments for each method). diff --git a/installation.asciidoc b/installation.asciidoc index 803cdee..a7547e0 100644 --- a/installation.asciidoc +++ b/installation.asciidoc @@ -1,73 +1,80 @@ -[[_installation_2]] -== 安装 +== Installation -Elasticsearch-php 的安装需要满足以下 4 个需求: +Elasticsearch-php only has a three requirements that you need to worry about: -* PHP 7.0.0 或更高版本 -* https://www.phpcomposer.com/[Composer] -* http://php.net/manual/zh/book.curl.php[ext-curl]:PHP 的 Libcurl 扩展(详情查看下方注意事项) -* 原生 JSON 扩展 (`ext-json`) 1.3.7或更高版本 +* PHP 7.0.0 or higher +* http://getcomposer.org[Composer] +* http://php.net/manual/en/book.curl.php[ext-curl]: the Libcurl extension for PHP (see note below) +* Native JSON Extensions (`ext-json`) 1.3.7 or higher -其余的依赖会由 Composer 自动安装。Composer 是一个 PHP 包管理和依赖管理工具,使用 Composer 安装 elasticsearch-php 非常简单。 +The rest of the dependencies will automatically be downloaded and installed by Composer. Composer is a package and dependency manager for PHP. Installing elasticsearch-php with Composer is very easy [NOTE] -.Libcurl 是可替代的 +.Libcurl can be replaced ==== -与 Elasticsearch-php 客户端绑定的默认 HTTP handlers 需要 PHP 的 Libcurl 扩展,但客户端也并非一定要用 Libcurl 扩展。如果你有 -一台主机没有安装 Libcurl 扩展,你可以使用基于 PHP streams 的 HTTP handler 来替代。但是性能会变差,因为 Libcurl 扩展要快得多。 +The default HTTP handlers that ship with Elasticsearch-php require the PHP libcurl extension, but it is not technically +required for the client to operate. If you have a host that does not have libcurl installed, you can use an +alternate HTTP handler based on PHP streams. Performance _will_ suffer, as the libcurl extension is much faster ==== -=== 版本矩阵 +=== Version Matrix -Elasticsearch-PHP 的版本要和 Elasticsearch 版本适配。 +You need to match your version of Elasticsearch to the appropriate version of this library. -Elasticsearch-PHP 的 master 分支总是与 Elasticsearch 的 master 分支相一致,但不建议在生产环境代码中使用 dev-master 分支。 +The master branch will always track Elasticsearch master, but it is not recommended to use `dev-master` in your production code. [width="40%",options="header",frame="topbot"] |============================ -|Elasticsearch Version | Elasticsearch-PHP Branch +|Elasticsearch Version | Elasticsearch-PHP Branch | >= 6.0 | `6.0` | >= 5.0, <= 6.0 | `5.0` | >= 1.0, <= 5.0 | `1.0`, `2.0` | <= 0.90.* | `0.4` |============================ -=== Composer 安装 +=== Composer Installation -* 在 `composer.json` 文件中增加 elasticsearch-php。如果你是新建项目,那么把以下的代码复制粘贴到 `composer.json` 就行了。如果是在现有项目中添加 elasticsearch-php,那么把 elasticsearch-php 添加到其它的包名后面即可: +* Include elasticsearch-php in your `composer.json` file. If you are starting a new project, simply paste the following JSON snippet into a new file called `composer.json`. If you have an existing project, include this requirement under the rest of requirements already present: + -[source,js] --------------------------------------------------- +[source,json] +-------------------------- { "require": { "elasticsearch/elasticsearch": "~6.0" } } --------------------------------------------------- +-------------------------- -* 使用 composer 安装客户端:首先要用下面第一个命令来安装 `composer.phar` ,然后使用第二个命令来执行安装程序。composer 会自动下载所有的依赖,把下载的依赖存储在 /vendor/ 目录下,并且创建一个 autoloader: +* Install the client with composer. The first command download the `composer.phar` PHP package, and the second command invokes the installation. Composer will automatically download any required dependencies, store them in a /vendor/ directory and build an autoloader.: + -[source,sh] --------------------------------------------------- +[source,shell] +-------------------------- curl -s http://getcomposer.org/installer | php php composer.phar install --no-dev --------------------------------------------------- +-------------------------- + -关于 Composer 的详情请查看https://www.phpcomposer.com/[Composer 中文网]。 +More information about http://getcomposer.org/[Composer can be found at their website]. -* 最后加载 autoload.php。如果你现有项目是用 Composer 安装的,那么 autoload.php 也许已经在某处加载了,你就不必再加载。最后实例化一个客户端对象: +* Finally, include the generated autoloader in your main project. If your project is already based on Composer, the autoloader is likely already included somewhere and you don't need to add it again. Finally, instantiate a new client: + [source,php] --------------------------------------------------- +-------------------------- require 'vendor/autoload.php'; $client = Elasticsearch\ClientBuilder::create()->build(); --------------------------------------------------- +-------------------------- + -客户端对象的实例化主要是使用静态方法 `create()` ,这里会创建一个 ClientBuilder 对象,主要是用来设置一些自定义配置。如果你配置完了,你就可以调用 `build()` 方法来创建一个 `Client` 对象。我们会在配置一节中详细说明配置方法。 +Client instantiation is performed with a static helper function `create()`. This creates a ClientBuilder object, +which helps you to set custom configurations. When you are done configuring, you call the `build()` method to generate +a `Client` object. We'll discuss configuration more in the Configuration section -=== --no-dev标志 -你会注意到安装命令行指定了 `--no-dev` 。这里是防止 Composer 安装各种测试依赖包和开发依赖包。对于普通用户没有必要安装测试包。特别是开发依赖包包含了 Elasticsearch 的一套源码,这是为了以 REST API 的方式进行测试。然而这对于非开发者来说太大了,因此要使用 --no-dev。 +=== --no-dev flag +You'll notice that the installation command specified `--no-dev`. This prevents Composer +from installing the various testing and development dependencies. For average users, there +is no need to install the test suite. In particular, the development dependencies include +a full copy of Elasticsearch so that tests can be run against the REST specifications. This +is a rather large download for non-developers, hence the --no-dev flag -如果你想帮助完善这个客户端类库,那就删掉 `--no-dev` 标志来进行测试吧。 \ No newline at end of file +If you wish to contribute to development of this library, just omit the `--no-dev` flag to +be able to run tests. diff --git a/namespaces.asciidoc b/namespaces.asciidoc index 0649a35..05a18d1 100644 --- a/namespaces.asciidoc +++ b/namespaces.asciidoc @@ -1,21 +1,28 @@ -[[_namespaces]] -== 命名空间 -客户端有许多“命名空间”,通常是一些公开的可管理功能。命名空间对应 Elasticsearch 中各种可管理的 endpoint。下面是全部的命名空间: +== Namespaces -|=== -|*命名空间* |*功能* -|`indices()`|索引数据统计和显示索引信息 -|`nodes()` |节点数据统计和显示节点信息 -|`cluster()`|集群数据统计和显示集群信息 -|`snapshot()`|对集群和索引进行拍摄快照或恢复数据 -|`cat()` |执行Cat API命令(通常在命令行中使用) -|=== +The client has a number of "namespaces", which generally expose administrative +functionality. The namespaces correspond to the various administrative endpoints +in Elasticsearch. This is a complete list of namespaces: + + +[width="40%",options="header",frame="topbot"] +|============================ +| Namespace | Functionality +| `indices()` | Index-centric stats and info +| `nodes()` | Node-centric stats and info +| `cluster()` | Cluster-centric stats and info +| `snapshot()` | Methods to snapshot/restore your cluster and indices +| `cat()` | Access to the Cat API (which is generally used standalone from the command line +|============================ + +Some methods are available in several different namespaces, which give you +the same information but grouped into different contexts. To see how these +namespaces work, let's look at the `_stats` output: -一些方法在不同的命名空间下均可使用。虽然返回的是同样的信息但是却属于不同的上下文环境。想知道命名空间如何运行,请看 `_stats` 的输出信息: [source,php] --------------------------------------------------- +---- $client = ClientBuilder::create()->build(); // Index Stats @@ -29,14 +36,18 @@ $response = $client->nodes()->stats(); // Cluster Stats // Corresponds to curl -XGET localhost:9200/_cluster/stats $response = $client->cluster()->stats(); --------------------------------------------------- +---- +{zwsp} + -上面展示了在三个不同命名空间下都调用了 `stats()` 方法。有时这些方法需要参数,这些参数的写法跟客户端中其他方法的参数写法相同。 +As you can see, the same `stats()` call is made through three different +namespaces. Sometimes the methods require parameters. These parameters work +just like any other method in the library. -例如,我们可以请求一个索引或多个索引的统计信息: +For example, we can requests index stats about a specific index, or multiple +indices: [source,php] --------------------------------------------------- +---- $client = ClientBuilder::create()->build(); // Corresponds to curl -XGET localhost:9200/my_index/_stats @@ -46,12 +57,13 @@ $response = $client->indices()->stats($params); // Corresponds to curl -XGET localhost:9200/my_index1,my_index2/_stats $params['index'] = array('my_index1', 'my_index2'); $response = $client->indices()->stats($params); --------------------------------------------------- +---- +{zwsp} + -另外一个例子是在一个现有索引中添加别名: +As another example, here is how you might add an alias to an existing index: [source,php] --------------------------------------------------- +---- $params['body'] = array( 'actions' => array( array( @@ -63,6 +75,8 @@ $params['body'] = array( ) ); $client->indices()->updateAliases($params); --------------------------------------------------- +---- -注意上述例子中两个 `stats` 的调用和 `updateAlias` 的调用是接收不同格式的参数,每个方法的参数格式由相应的 API 需求来决定。`stats` API只需要一个 index 名,而 `updateAlias` 则需要一个 body,里面还要一个 actions 参数。 \ No newline at end of file +Notice how both the `stats` calls and the updateAlias took a variety of parameters, +each according to what the particular API requires. The `stats` API only requires +an index name(s), while the `updateAlias` requires a body of actions. \ No newline at end of file diff --git a/overview.asciidoc b/overview.asciidoc index 85d9697..f2dda5a 100644 --- a/overview.asciidoc +++ b/overview.asciidoc @@ -1,8 +1,8 @@ -[[_overview]] -== 概述 +== Overview -这是 Elasticsearch 官方的 PHP 客户端。我们把 Elasticsearch-PHP 设计成低级客户端(https://en.wikipedia.org/wiki/Low-level_design[低级设计模式]),使用时不会偏离 REST API 的用法。 +This is the official PHP client for Elasticsearch. It is designed to be a very low-level client that does not stray from the REST API. -客户端所有方法几乎都与 REST API 对应,而且也与其他编程语言的客户端(如 ruby, python 等)方法结构相似。我们希望这种对应方式可以方便开发者更加容易上手客户端,且以最小的代价快速从一种编程语言转换到另一种编程语言。 +All methods closely match the REST API, and furthermore, match the method structure of other language clients (ruby, python, etc). We hope that this consistency makes it easy to get started with a client, and to seamlessly switch from one language to the next with minimal effort. + +The client is designed to be "unopinionated". There are a few universal niceties added to the client (cluster state sniffing, round-robin requests, etc) but largely it is very barebones. This was intentional. We want a common base that more sophisticated libraries can build on top of. -本客户端设计得很“灵活”。虽然有一些通用的细节添加进了客户端(集群状态嗅探,轮询调度请求等),但总的来说它是十分基础的。这也是有意这样设计。我们只是设计了基础方法,更多的复杂类库可以在此衍生出来。 diff --git a/per-request-configuration.asciidoc b/per-request-configuration.asciidoc index a27baf1..939dce3 100644 --- a/per-request-configuration.asciidoc +++ b/per-request-configuration.asciidoc @@ -1,18 +1,24 @@ -[[_per_request_configuration]] -== 请求层配置(Per-request) -除了配置连接层和客户端层,还可以配置请求层。配置请求层是在请求体中指定参数。 +== Per-request configuration -=== 忽略异常 +There are several configurations that can be set on a per-request basis, rather than at a connection- or client-level. +These are specified as part of the request associative array. -Elasticsearch-PHP 的类库是会对普通的问题抛出异常的。这些异常跟 Elasticsearch 返回的 HTTP 响应码一一对应。例如,获取一个不存在的文档会抛出 `MissingDocument404Exception` 。 +=== Ignoring exceptions +The library attempts to throw exceptions for common problems. These exceptions match the HTTP response code provided +by Elasticsearch. For example, attempting to GET a nonexistent document will throw a `MissingDocument404Exception`. -异常对于处理一些问题(如找不到文档、语法错误、版本冲突等)十分有用。但是有时候你只是想要处理返回的数据而不想捕获异常。 +Exceptions are a useful and consistent way to deal with problems like missing documents, syntax errors, version +conflicts, etc. But sometimes you want to deal with the response body rather than catch exceptions (often useful +in test suites). + +If you need that behavior, you can configure an `ignore` parameter. This should be configured in the `client` parameter +of the request array. For example, this example will ignore the `MissingDocument404Exception` +exception and instead return the JSON provided by Elasticsearch. -如果你想忽略异常,你可以配置 `ignore` 参数。ignore 参数要作为 `client` 的参数配置在请求体中。例如下面的示例会忽略 `MissingDocument404Exception` ,返回的是 Elasticsearch 提供的 JSON 数据。 [source,php] --------------------------------------------------- +---- $client = ClientBuilder::create()->build(); $params = [ @@ -24,13 +30,15 @@ $params = [ echo $client->get($params); > {"_index":"test_missing","_type":"test","_id":"1","found":false} --------------------------------------------------- -<1> 这里会忽略 404 异常 +---- +<1> This will ignore just the 404 missing exception -你可以通过数组的方式指定忽略多个 HTTP 状态码: +You can specify multiple HTTP status codes to ignore, by providing an array of values: [source,php] --------------------------------------------------- +---- +$client = ClientBuilder::create()->build(); + $params = [ 'index' => 'test_missing', 'type' => 'test', @@ -39,21 +47,28 @@ $params = [ echo $client->get($params); > No handler found for uri [/test_missing/test/] and method [GET] --------------------------------------------------- -<1> `ignore` 参数也接收数组。在这个示例中, `BadRequest400Exception` 和 `MissingDocument404Exception` 都会被忽略。 -注意,返回的数据是字符串格式,而不是 JSON 数据。而在第一个示例中返回的是 JSON 数据,客户端会 decode 该 JSON 数据为数组。 +---- +<1> `ignore` also accepts an array of exceptions to ignore. In this example, +the `BadRequest400Exception` is being ignored + -一旦客户端无法得知返回的异常数据格式,客户端就不会 decode 返回结果。 +It should be noted that the response is simply a string, which may or may not be encoded as JSON. In the first example, +the response body was a complete JSON object which could be decoded. In the second example, it was simply a string. -=== 自定义查询参数 +Since the client has no way of knowing what the exception response will contain, no attempts to decode it are taken. -有时候你要自己提供自定义参数,比如为第三方插件或代理提供认证 token。在 Elasticsearch-php 的白名单中存储着所有的查询参数,这是为了防止你指定一个参数,而 Elasticsearch 却不接收。 +=== Providing custom query parameters -如果你要自定义参数,你就要忽略掉这种白名单机制。为了达到这种效果,请增加 `custom` 参数: +Sometimes you need to provide custom query params, such as authentication tokens for a third-party plugin or proxy. +All query parameters are white-listed in Elasticsearch-php, which is to protect you from specifying a param which is +not accepted by Elasticsearch. + +If you need custom parameters, you need to bypass this whitelisting mechanism. To do so, add them to the `custom` +parameter as an array of values: [source,php] --------------------------------------------------- +---- $client = ClientBuilder::create()->build(); $params = [ @@ -69,16 +84,19 @@ $params = [ ] ]; $exists = $client->exists($params); --------------------------------------------------- +---- + -=== 增加返回冗余 +=== Increasing the Verbosity of responses -客户端默认返回响应体数据。如果你需要更多信息(如头信息、相应状态码等),你可以让客户端返回更多信息。通过 `verbose` 参数可以开启这个功能。 +By default, the client will only return the response body. If you require more information (e.g. stats about the transfer, +headers, status codes, etc), you can tell the client to return a more verbose response. This is enabled via the +`verbose` parameter in the client options. -没有返回冗余,你看到的返回信息是这样的: +Without verbosity, all you see is the response body: [source,php] --------------------------------------------------- +---- $client = ClientBuilder::create()->build(); $params = [ @@ -103,12 +121,12 @@ Array ) ) --------------------------------------------------- +---- -如果加上返回冗余: +With verbosity turned on, you will see all of the transfer stats: [source,php] --------------------------------------------------- +---- $client = ClientBuilder::create()->build(); $params = [ @@ -194,18 +212,27 @@ Array ) ) ) --------------------------------------------------- +---- -=== Curl 超时设置 +=== Curl Timeouts -通过 `timeout` 和 `connect_timeout` 参数可以配置每个请求的 Curl 超时时间。这个配置主要是控制客户端的超时时间。 `connect_timeout` 参数控制在连接阶段完成前,curl 的等待时间。而 `timeout` 参数则控制整个请求完成前,最多等待多长时间。 +It is possible to configure per-request curl timeouts via the `timeout` and `connect_timeout` parameters. These +control the client-side, curl timeouts. The `connect_timeout` paramter controls how long curl should wait for the +"connect" phase to finish, while the `timeout` parameter controls how long curl should wait for the entire request +to finish. -如果超过超时时间,curl 会关闭连接并返回一个致命错误。两个参数都要用 *秒* 作为参数。 +If either timeout expires, curl will close the connection and return an error. Both parameters should be specified +in seconds. -注意:客户端超时并 *不* 意味着 Elasticsearch 中止请求。Elasticsearch 会继续执行请求直到请求完成。在慢查询或是 bulk 请求下,操作会在后台继续执行,对客户端来说这些动作是隐蔽的。如果客户端在超时后立即断开连接,然后又立刻发送另外一个请求。由于客户端没有处理服务端回压(译者注:这里国内翻译成背压,但是https://www.zhihu.com/question/49618581?from=profile_question_card[知乎]有文章指出这个翻译不够精准,会造成程序员难以理解,所以这里翻译成回压)的机制,这有可能会造成服务端过载。遇到这种情况,你会发现线程池队列会慢慢变大,当队列超出负荷,Elasticsearch 会发送 `EsRejectedExecutionException` 的异常。 +Note: client-side timeouts *do not* mean that Elasticsearch aborts the request. Elasticsearch will continue executing +the request until it completes. In the case of a slow query or bulk request, the operation will continue executing +"in the background", unknown to your client. If your client kills connections rapidly with a timeout, only to immediately +execute another request, it is possible to swamp the server with many connections because there is no "back-pressure" on the +client. In these situations, you will see the appropriate threadpool queue growing in size, and may start receiving +`EsRejectedExecutionException` exceptions from Elasticsearch when the queue finally reaches capacity. [source,php] --------------------------------------------------- +---- $client = ClientBuilder::create()->build(); $params = [ @@ -218,14 +245,15 @@ $params = [ ] ]; $response = $client->get($params); --------------------------------------------------- +---- -=== 开启 Future 模式 +=== Enabling Future Mode -客户端支持异步方式批量发送请求。通过 client 选项的 `future` 参数可以开启(HTTP handler 要支持异步模式): +The client supports asynchronous, batch processing of requests. This is enabled (if your HTTP handler supports it) on +a per-request basis via the `future` parameter in the client options: [source,php] --------------------------------------------------- +---- $client = ClientBuilder::create()->build(); $params = [ @@ -238,16 +266,21 @@ $params = [ ]; $future = $client->get($params); $results = $future->wait(); // resolve the future --------------------------------------------------- +---- + +Future mode supports two options: `true` or `'lazy'`. For more details about how asynchronous execution functions, and +how to work with the results, see the dedicated page on <<_future_mode>>. -Future 模式有两个参数可选: `true` 或 `lazy` 。关于异步执行方法以及如何处理返回结果的详情,请到<<_future_mode>>中查看。 +=== SSL Encryption -=== SSL 加密 +Normally, you will specify SSL configurations when you create the client (see <<_security>> for more details), since encryption typically +applies to all requests. However, it is possible to configure on a per-request basis too if you need that functionality. +For example, if you need to use a self-signed cert on a specific request, you can specify it via the `verify` parameter +in the client options: -在创建客户端时,一般需要指定 SSL 配置,因为通常所有的请求都需要加密(查询<<_security>>一节获取更多详情)。然而,在每个请求中配置 SSL 加密也是有可能的。例如,如果你需要在某个特定的请求中使用自签名证书,你可以通过在 client 选项中配置 `verify` 参数: [source,php] --------------------------------------------------- +---- $client = ClientBuilder::create()->build(); $params = [ @@ -259,4 +292,4 @@ $params = [ ] ]; $result = $client->get($params); --------------------------------------------------- +---- diff --git a/php-version-requirement.asciidoc b/php-version-requirement.asciidoc index 6dc8838..b45fb5a 100644 --- a/php-version-requirement.asciidoc +++ b/php-version-requirement.asciidoc @@ -1,4 +1,4 @@ -[[_php_version_requirement]] -== PHP版本需求 +== PHP Version Requirement -Elasticsearch-PHP6.0 需要 PHP 7.0.0或者更高版本。另外,它还需要原生 JSON 扩展的版本为1.3.7或者更高版本。。 \ No newline at end of file +Version 5.0 of Elasticsearch-PHP requires PHP version 5.6.6 or higher. In addition, it requires the native JSON +extension to be version 1.3.7 or higher. diff --git a/php_json_objects.asciidoc b/php_json_objects.asciidoc index 6f4830a..b237bf4 100644 --- a/php_json_objects.asciidoc +++ b/php_json_objects.asciidoc @@ -1,16 +1,20 @@ [[php_json_objects]] -== 用 PHP 处理 JSON 数组和 JSON 对象 +== Dealing with JSON Arrays and Objects in PHP -客户端在关于 JSON 数组和 JSON 对象的处理和定义方面总是令人疑惑不已。尤其是由空对象和对象数组引起的问题。本节会展示一些 Elasticsearch JSON API 常见的数据格式,还会说明如何以 PHP 的语法来表达这些数据格式。 +A common source of confusion with the client revolves around JSON arrays and objects, and how to specify them in PHP. +In particular, problems are caused by empty objects and arrays of objects. This page will show you some common patterns +used in Elasticsearch JSON API, and how to convert that to a PHP representation -=== 空对象 +=== Empty Objects -Elasticsearch API 在几个地方使用了空对象,这会对 PHP 造成影响。不像其它的语言,PHP 没有一个简便的符号来表示空对象,而许多开发者还不知道如何指定一个空对象。 +The Elasticsearch API uses empty JSON objects in several locations, and this can cause problems for PHP. Unlike other +languages, PHP does not have a "short" notation for empty objects and so many developers are unaware how to specify +an empty object. -设想在查询中增加 Highlight: +Consider adding a Highlight to a query: -[source,js] --------------------------------------------------- +[source,json] +---- { "query" : { "match" : { @@ -19,17 +23,19 @@ Elasticsearch API 在几个地方使用了空对象,这会对 PHP 造成影响 }, "highlight" : { "fields" : { - "content" : {}<1> + "content" : {} <1> } } } --------------------------------------------------- -<1> 这个空对象便会引起问题 +---- +<1> This empty JSON object is what causes problems. -问题就在于 PHP 会自动把 `"content" : {}` 转换成 `"content" : []` ,在 Elasticsearch DSL 中这样的数据格式是非法的。我们需要告诉 PHP 那个空对象就是一个空对象而非空数组。为了在查询中定义空对象,你需要这样做: +The problem is that PHP will automatically convert `"content" : {}` into `"content" : []`, which is no longer valid +Elasticsearch DSL. We need to tell PHP that the empty object is explicitly an object, not an array. To define this +query in PHP, you would do: -[source,php] --------------------------------------------------- +[source,json] +---- $params['body'] = array( 'query' => array( 'match' => array( @@ -38,59 +44,64 @@ $params['body'] = array( ), 'highlight' => array( 'fields' => array( - 'content' => new \stdClass()<1> + 'content' => new \stdClass() <1> ) ) ); $results = $client->search($params); --------------------------------------------------- -<1> 使用 PHP 的 stdClass 对象来代表空对象,现在就可以解析为正确的 JSON 数据了。 +---- +<1> We use the generic PHP stdClass object to represent an empty object. The JSON will now encode correctly. -通过使用一个 stdClass 对象,我们可以强制 `json_encode` 解析为空对象,而不是空数组。然而,这种冗余的写法是唯一解决 PHP 空对象的方法,没有简便的方法可以表示空对象。 +By using an explicit stdClass object, we can force the `json_encode` parser to correctly output an empty object, instead +of an empty array. Sadly, this verbose solution is the only way to acomplish the goal in PHP...there is no "short" +version of an empty object. -=== 对象数组 +=== Arrays of Objects -Elasticsearch DSL 的另一种常见的数据格式是对象数组。例如,假设在你的查询中增加排序: +Another common pattern in Elasticsearch DSL is an array of objects. For example, consider adding a sort to your query: -[source,js] --------------------------------------------------- +[source,json] +---- { "query" : { "match" : { "content" : "quick brown fox" } }, - "sort" : [ <1> + "sort" : [ <1> {"time" : {"order" : "desc"}}, {"popularity" : {"order" : "desc"}} ] } --------------------------------------------------- -<1> "sort" 内包含 JSON 对象数组。 +---- +<1> "sort" contains an array of JSON objects -这种形式很常见,但是在 PHP 中构建就稍微有些繁琐,因为这需要嵌套数组。用 PHP 写这种冗余的结构就让人读起来有点晦涩。为了构建对象数组,你要在数组中嵌套数组: +This arrangement is *very* common, but the construction in PHP can be tricky since it requires nesting arrays. The +verbosity of PHP tends to obscure what is actually going on. To construct an array of objects, you actually need +an array of arrays: -[source,php] --------------------------------------------------- +[source,json] +---- $params['body'] = array( 'query' => array( 'match' => array( 'content' => 'quick brown fox' ) ), - 'sort' => array( <1> - array('time' => array('order' => 'desc')), <2> + 'sort' => array( <1> + array('time' => array('order' => 'desc')), <2> array('popularity' => array('order' => 'desc')) <3> ) ); $results = $client->search($params); --------------------------------------------------- -<1> 这里 encode 为 `"sort" : []` -<2> 这里 encode 为 `{"time" : {"order" : "desc"}}` -<3> 这里 encode 为 `{"popularity" : {"order" : "desc"}}` +---- +<1> This array encodes the `"sort" : []` array +<2> This array encodes the `{"time" : {"order" : "desc"}}` object +<3> This array encodes the `{"popularity" : {"order" : "desc"}}` object -如果你用的是 PHP5.4 及以上版本,我强烈要求你使用 `[]` 构建数组。这会让多维数组看起来易读些: +If you are on PHP 5.4+, I would strongly encourage you to use the short array syntax. It makes these nested arrays +much simpler to read: -[source,php] --------------------------------------------------- +[source,json] +---- $params['body'] = [ 'query' => [ 'match' => [ @@ -103,16 +114,16 @@ $params['body'] = [ ] ]; $results = $client->search($params); --------------------------------------------------- +---- -=== 空对象数组 +=== Arrays of empty objects -偶尔你会看到 DSL 需要上述两种数据格式。score 查询便是一个很好的例子,该查询有时需要一个对象数组,而有一些对象可能是一个空的 JSON 对象。 +Occasionally, you'll encounter DSL that requires both of the previous patterns. The function score query is a good +example, it sometimes requires an array of objects, and some of those objects might be empty JSON objects. -请看如下查询: - -[source,js] --------------------------------------------------- +Given this query: +[source,json] +---- { "query":{ "function_score":{ @@ -125,17 +136,18 @@ $results = $client->search($params); } } } --------------------------------------------------- +---- + +We can build it using the following PHP code: -我们用下面的 PHP 代码来构建这个查询: -[source,php] --------------------------------------------------- +[source,json] +---- $params['body'] = array( 'query' => array( 'function_score' => array( - 'functions' => array( <1> - array( <2> + 'functions' => array( <1> + array( <2> 'random_score' => new \stdClass() <3> ) ) @@ -143,7 +155,7 @@ $params['body'] = array( ) ); $results = $client->search($params); --------------------------------------------------- -<1> 这里 encode 为 `"functions" : []` -<2> 这里 encode 为 `{ "random_score": {} }` -<3> 这里 encode 为 `"random_score": {}` +---- +<1> This encodes the array of objects: `"functions" : []` +<2> This encodes an object inside the array: `{ "random_score": {} }` +<3> This encodes the empty JSON object: `"random_score": {}` \ No newline at end of file diff --git a/quickstart.asciidoc b/quickstart.asciidoc index 7077e25..61aa73c 100644 --- a/quickstart.asciidoc +++ b/quickstart.asciidoc @@ -1,48 +1,51 @@ -[[_quickstart]] -== 快速开始 -这一节会概述一下客户端以及客户端的一些主要方法的使用规则。 +== Quickstart -=== 安装 +This section will give you a quick overview of the client and how the major functions work. -* 在 composer.json 文件中引入 elasticsearch-php: +=== Installation + +* Include elasticsearch-php in your `composer.json` file: + -[source,js] --------------------------------------------------- +[source,json] +---------------------------- { "require": { - "elasticsearch/elasticsearch": "~6.0" + "elasticsearch/elasticsearch": "~5.0" } } --------------------------------------------------- +---------------------------- -* 用 composer 安装客户端: +* Install the client with composer: + -[source,sh] --------------------------------------------------- +[source,shell] +---------------------------- curl -s http://getcomposer.org/installer | php php composer.phar install --no-dev --------------------------------------------------- +---------------------------- -* 在项目中引入自动加载文件(如果还没引入),并且实例化一个客户端: +* Include the autoloader in your main project (if you haven't already), and instantiate a new client : + [source,php] --------------------------------------------------- +---------------------------- require 'vendor/autoload.php'; use Elasticsearch\ClientBuilder; $client = ClientBuilder::create()->build(); --------------------------------------------------- +---------------------------- + -=== 索引一个文档 +=== Index a document -在 elasticsearch-php 中,几乎一切操作都是用关联数组来配置。REST 路径(endpoint)、文档和可选参数都是用关联数组来配置。 +In elasticsearch-php, almost everything is configured by associative arrays. The REST endpoint, document and optional parameters - everything is an associative array. -为了索引一个文档,我们要指定4部分信息:index,type,id 和一个 body。构建一个键值对的关联数组就可以完成上面的内容。body 的键值对格式与文档的数据保持一致性。(译者注:如 ["testField" => "abc"] 在文档中则为 {"testField" : "abc"}): +To index a document, we need to specify four pieces of information: index, type, id and a document body. This is done by +constructing an associative array of key:value pairs. The request body is itself an associative array with key:value pairs +corresponding to the data in your document: [source,php] --------------------------------------------------- +---------------------------- $params = [ 'index' => 'my_index', 'type' => 'my_type', @@ -52,37 +55,30 @@ $params = [ $response = $client->index($params); print_r($response); --------------------------------------------------- +---------------------------- -收到的响应数据表明,你指定的索引中已经创建好了文档。响应数据是一个关联数组,里面的内容是 Elasticsearch 返回的decoded JSON 数据: +The response that you get back indicates the document was created in the index that you specified. The response is an +associative array containing a decoded version of the JSON that Elasticsearch returns: [source,php] --------------------------------------------------- +---------------------------- Array ( [_index] => my_index [_type] => my_type [_id] => my_id [_version] => 1 - [result] => created - [_shards] => Array - ( - [total] => 2 - [successful] => 1 - [failed] => 0 - ) - - [_seq_no] => 0 - [_primary_term] => 1 + [created] => 1 ) --------------------------------------------------- -=== 获取一个文档 +---------------------------- + +=== Get a document -现在获取刚才索引的文档: +Let's get the document that we just indexed. This will simply return the document: [source,php] --------------------------------------------------- +---------------------------- $params = [ 'index' => 'my_index', 'type' => 'my_type', @@ -91,13 +87,13 @@ $params = [ $response = $client->get($params); print_r($response); --------------------------------------------------- +---------------------------- -响应数据包含一些元数据(如 index,type 等)和 `_source` 属性, -这是你发送给 Elasticsearch 的原始文档数据。 +The response contains some metadata (index, type, etc) as well as a `_source` field...this is the original document +that you sent to Elasticsearch. [source,php] --------------------------------------------------- +---------------------------- Array ( [_index] => my_index @@ -109,15 +105,16 @@ Array ( [testField] => abc ) + ) --------------------------------------------------- +---------------------------- -=== 搜索一个文档 +=== Search for a document -搜索是 elasticsearch 的一大特色,所以我们试一下执行一个搜索。我们准备用 Match 查询来作为示范: +Searching is a hallmark of elasticsearch, so let's perform a search. We are going to use the Match query as a demonstration: [source,php] --------------------------------------------------- +---------------------------- $params = [ 'index' => 'my_index', 'type' => 'my_type', @@ -132,28 +129,29 @@ $params = [ $response = $client->search($params); print_r($response); --------------------------------------------------- +---------------------------- -这个响应数据与前面例子的响应数据有所不同。这里有一些元数据(如 `took`, `timed_out` 等)和一个 `hits` 的数组,这代表了你的搜索结果。而 `hits` 内部也有一个 `hits` 数组,内部的 `hits` 包含特定的搜索结果: +The response is a little different from the previous responses. We see some metadata (`took`, `timed_out`, etc) and +an array named `hits`. This represents your search results. Inside of `hits` is another array named `hits`, which contains +individual search results: [source,php] --------------------------------------------------- +---------------------------- Array ( - [took] => 16 + [took] => 1 [timed_out] => [_shards] => Array ( [total] => 5 [successful] => 5 - [skipped] => 0 [failed] => 0 ) [hits] => Array ( [total] => 1 - [max_score] => 0.2876821 + [max_score] => 0.30685282 [hits] => Array ( [0] => Array @@ -161,7 +159,7 @@ Array [_index] => my_index [_type] => my_type [_id] => my_id - [_score] => 0.2876821 + [_score] => 0.30685282 [_source] => Array ( [testField] => abc @@ -170,14 +168,14 @@ Array ) ) ) --------------------------------------------------- +---------------------------- -=== 删除一个文档 +=== Delete a document -好了,现在我们看一下如何把之前添加的文档删除掉: +Alright, let's go ahead and delete the document that we added previously: [source,php] --------------------------------------------------- +---------------------------- $params = [ 'index' => 'my_index', 'type' => 'my_type', @@ -186,60 +184,54 @@ $params = [ $response = $client->delete($params); print_r($response); --------------------------------------------------- +---------------------------- -你会注意到删除文档的语法与获取文档的语法是一样的。唯一不同的是 `delete` 方法替代了 `get` 方法。下面响应数据代表文档已被删除: +You'll notice this is identical syntax to the `get` syntax. The only difference is the operation: `delete` instead of +`get`. The response will confirm the document was deleted: [source,php] --------------------------------------------------- +---------------------------- Array ( + [found] => 1 [_index] => my_index [_type] => my_type [_id] => my_id [_version] => 2 - [result] => deleted - [_shards] => Array - ( - [total] => 2 - [successful] => 1 - [failed] => 0 - ) - - [_seq_no] => 1 - [_primary_term] => 1 ) --------------------------------------------------- +---------------------------- -=== 删除一个索引 -由于 elasticsearch 的动态特性,我们创建的第一个文档会自动创建一个索引,同时也会把 settings 里面的参数设定为默认参数。由于我们在后面要指定特定的 settings,所以现在要删除掉这个索引: +=== Delete an index + +Due to the dynamic nature of elasticsearch, the first document we added automatically built an index with some default settings. Let's delete that index because we want to specify our own settings later: [source,php] --------------------------------------------------- - $deleteParams = [ - 'index' => 'my_index' - ]; - $response = $client->indices()->delete($deleteParams); - print_r($response); --------------------------------------------------- +---------------------------- +$deleteParams = [ + 'index' => 'my_index' +]; +$response = $client->indices()->delete($deleteParams); +print_r($response); +---------------------------- + +The response: -响应数据是: [source,php] --------------------------------------------------- - Array - ( - [acknowledged] => 1 - ) --------------------------------------------------- +---------------------------- +Array +( + [acknowledged] => 1 +) +---------------------------- -=== 创建一个索引 +=== Create an index -由于数据已被清空,我们可以重新开始了,现在要添加一个索引,同时要进行自定义 settings: +Now that we are starting fresh (no data or index), let's add a new index with some custom settings: [source,php] --------------------------------------------------- +---------------------------- $params = [ 'index' => 'my_index', 'body' => [ @@ -252,24 +244,26 @@ $params = [ $response = $client->indices()->create($params); print_r($response); --------------------------------------------------- +---------------------------- -Elasticsearch会创建一个索引,并配置你指定的参数值,然后返回一个消息确认: +Elasticsearch will now create that index with your chosen settings, and return an acknowledgement: [source,php] --------------------------------------------------- +---------------------------- Array ( [acknowledged] => 1 - [shards_acknowledged] => 1 - [index] => my_index ) --------------------------------------------------- +---------------------------- + +=== Wrap up -=== 本节结语 +That was just a crash-course overview of the client and it's syntax. If you are familiar with elasticsearch, you'll +notice that the methods are named just like REST endpoints. -这里只是概述了一下客户端以及它的语法。如果你很熟悉 elasticsearch,你会注意到这些方法的命名跟 REST 路径(endpoint)是一样的。 +You'll also notice that the client is configured in a manner that facilitates easy discovery via your IDE. All core +actions are available under the `$client` object (indexing, searching, getting, etc). Index and cluster management +are located under the `$client->indices()` and `$client->cluster()` objects, respectively. -你也注意到了客户端的参数配置从某种程度上讲也是方便你的IDE易于搜索。$client对象下的所有核心方法(索引,搜索,获取等)都是可用的。索引管理和集群管理分别在 `$client->indices()` 和 `$client->cluster()` 中。 +Check out the rest of the Documentation to see how the entire client works. -请查询文档的其余内容以便知道整个客户端的运作机制。 \ No newline at end of file diff --git a/search-operations.asciidoc b/search-operations.asciidoc index ee83964..9b95393 100644 --- a/search-operations.asciidoc +++ b/search-operations.asciidoc @@ -1,16 +1,16 @@ -[[_search_operations]] -== 搜索操作 +== Search Operations -呃......这个项目如果没有什么特别之处就不叫 elasticsearch 了(译者注:elasticsearch 直译过来就是“灵活搜索”)!现在一起来聊聊客户端的搜索操作。 +Well...it isn't called elasticsearch for nothing! Let's talk about search operations in the client. -在命名方案规范的前提下,客户端拥有一切的查询权限,也拥有获取 REST API 公开的一切参数的权限。现在来看看一些示例,方便你熟悉这些语法规则。 +The client gives you full access to every query and parameter exposed by the REST API, following the naming scheme as +much as possible. Let's look at a few examples so you can become familiar with the syntax. -=== Match查询 +=== Match Query -以下是 Match 查询的标准 curl 格式: +Here is a standard curl for a Match query: -[source,js] --------------------------------------------------- +[source,shell] +---- curl -XGET 'localhost:9200/my_index/my_type/_search' -d '{ "query" : { "match" : { @@ -18,12 +18,13 @@ curl -XGET 'localhost:9200/my_index/my_type/_search' -d '{ } } }' --------------------------------------------------- +---- +{zwsp} + -而这里则是客户端构建的同样的查询: +And here is the same query constructed in the client: [source,php] --------------------------------------------------- +---- $params = [ 'index' => 'my_index', 'type' => 'my_type', @@ -37,12 +38,15 @@ $params = [ ]; $results = $client->search($params); --------------------------------------------------- +---- +{zwsp} + -这里要注意 PHP 数组的结构与层次是怎样与 curl 中的 JSON 请求体格式相应对的。这种方式使得 JSON 的写法转换为 PHP 的写法变得十分简单。一个快速检测 PHP 数组是否为预期结果的方法,就是encode为 JSON 格式,然后进行检查: +Notice how the structure and layout of the PHP array is identical to that of the JSON request body. This makes it very +simple to convert JSON examples into PHP. A quick method to check your PHP array (for more complex examples) is to +encode it back to JSON and check by eye: [source,php] --------------------------------------------------- +---- $params = [ 'index' => 'my_index', 'type' => 'my_type', @@ -59,14 +63,17 @@ print_r(json_encode($params['body'])); {"query":{"match":{"testField":"abc"}}} --------------------------------------------------- +---- +{zwsp} + -.使用原生JSON -************************************************** -有时使用原生JSON来进行测试会十分方便,或者用原生JSON来进行不同系统的移植也同样方便。你可以在body中用原生JSON字符串,这样客户端会进行自动检查操作: + +.Using Raw JSON +**** +Sometimes it is convenient to use raw JSON for testing purposes, or when migrating from a different system. You can +use raw JSON as a string in the body, and the client will detect this automatically: [source,php] --------------------------------------------------- +---- $json = '{ "query" : { "match" : { @@ -82,13 +89,15 @@ $params = [ ]; $results = $client->search($params); --------------------------------------------------- -************************************************** +---- +**** +{zwsp} + -搜索结果与 Elasticsearch 的响应结果一致,唯一不同的是 JSON 格式会转换成 PHP 数组。处理这些数据与数组迭代一样简单: +Search results follow the same format as Elasticsearch search response, the only difference is that the JSON response is +serialized back into PHP arrays. Working with the search results is as simple as iterating over the array values: [source,php] --------------------------------------------------- +---- $params = [ 'index' => 'my_index', 'type' => 'my_type', @@ -108,14 +117,14 @@ $maxScore = $results['hits']['max_score']; $score = $results['hits']['hits'][0]['_score']; $doc = $results['hits']['hits'][0]['_source']; --------------------------------------------------- - -=== Bool查询 +---- +{zwsp} + -利用客户端可以轻松构建 Bool 查询。例如以下查询: +=== Bool Queries -[source,js] --------------------------------------------------- +Bool queries can be easily constructed using the client. For example, this query: +[source,shell] +---- curl -XGET 'localhost:9200/my_index/my_type/_search' -d '{ "query" : { "bool" : { @@ -130,12 +139,12 @@ curl -XGET 'localhost:9200/my_index/my_type/_search' -d '{ } } }' --------------------------------------------------- - -会构建为这样子(注意方括号位置): +---- +{zwsp} + +Would be structured like this (Note the position of the square brackets): [source,php] --------------------------------------------------- +---- $params = [ 'index' => 'my_index', 'type' => 'my_type', @@ -152,18 +161,22 @@ $params = [ ]; $results = $client->search($params); --------------------------------------------------- +---- +{zwsp} + -这里注意 must 语句接收的是数组。这里会转化为 JSON 数组,所以最后的响应结果与 curl 格式的响应结果一致。想了解 PHP 中数组和对象的转换,请查看link:php_json_objects.html[用PHP处理JSON数组和JSON对象]。 +Notice that the `must` clause accepts an array of arrays. This will be serialized into an array of JSON objects internally, +so the final resulting output will be identical to the curl example. For more details about arrays vs objects in PHP, +see <>. -=== 更为复杂的示例 +=== A more complicated example -这里构建一个有点复杂的例子:一个 bool 查询包含一个 filter 过滤器和一个普通查询。这在 elasticsearch 的查询中非常普遍,所以这个例子会非常有用。 +Let's construct a slightly more complicated example: a boolean query that contains both a filter and a query. +This is a very common activity in elasticsearch queries, so it will be a good demonstration. -curl 格式的查询: +The curl version of the query: -[source,js] --------------------------------------------------- +[source,shell] +---- curl -XGET 'localhost:9200/my_index/my_type/_search' -d '{ "query" : { "bool" : { @@ -176,12 +189,13 @@ curl -XGET 'localhost:9200/my_index/my_type/_search' -d '{ } } }' --------------------------------------------------- +---- +{zwsp} + -而在 PHP 中: +And in PHP: [source,php] --------------------------------------------------- +---- $params = [ 'index' => 'my_index', 'type' => 'my_type', @@ -201,20 +215,27 @@ $params = [ $results = $client->search($params); --------------------------------------------------- +---- +{zwsp} + + -=== Scrolling(游标)查询 +=== Scrolling -在用 bulk 时,经常要用 Scrolling 功能对文档进行分页处理,如输出一个用户的所有文档。这比常规的搜索要高效,因为这里不需要对文档执行性能消耗较大的排序操作。 +The Scrolling functionality of Elasticsearch is used to paginate over many documents in a bulk manner, such as exporting +all the documents belonging to a single user. It is more efficient than regular search because it doesn't need to maintain +an expensive priority queue ordering the documents. -Scrolling 会保留某个时间点的索引快照数据,然后用快照数据进行分页。游标查询窗口允许持续分页操作,即使后台正在执行索引文档、更新文档和删除文档。首先,你要在发送搜索请求时增加 scroll 参数。然后就会返回一个文档“页数”信息,还有一个用来获取 hits 分页数据的 scroll_id。 +Scrolling works by maintaining a "point in time" snapshot of the index which is then used to page over. +This window allows consistent paging even if there is background indexing/updating/deleting. First, you execute a search +request with `scroll` enabled. This returns a "page" of documents, and a scroll_id which is used to continue +paginating through the hits. -更多详情请查看https://www.elastic.co/guide/cn/elasticsearch/guide/current/scroll.html[游标查询]。 +More details about scrolling can be found in the https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-scroll.html[Link: reference documentation]. -以下代码更为深入的操作的示例: +This is an example which can be used as a template for more advanced operations: [source,php] --------------------------------------------------- +---- $client = ClientBuilder::create()->build(); $params = [ "scroll" => "30s", // how long between scroll requests. should be small! @@ -222,7 +243,7 @@ $params = [ "index" => "my_index", "body" => [ "query" => [ - "match_all" => new \stdClass() + "match_all" => new \stdClass() ] ] ]; @@ -250,4 +271,4 @@ while (isset($response['hits']['hits']) && count($response['hits']['hits']) > 0) ] ); } --------------------------------------------------- +---- diff --git a/security.asciidoc b/security.asciidoc index c6c05ab..f0e632a 100644 --- a/security.asciidoc +++ b/security.asciidoc @@ -1,14 +1,16 @@ -[[_security]] -== 安全 -Elasticsearch-PHP 客户端支持两种安全设置方式:HTTP 认证和 SSL 加密。 +== Security -=== HTTP 认证 +The Elasticsearch-PHP client supports two security features: HTTP Authentication and SSL encryption. -如果你的 Elasticsearch 是通过 HTTP 认证来维持安全,你就要为 Elasticsearch-PHP 客户端提供身份凭证(credentials),这样服务端才能认证客户端请求。在实例化客户端时,身份凭证(credentials)需要配置在 host 数组中: +=== HTTP Authentication + +If your Elasticsearch server is protected by HTTP Authentication, you need to provide the credentials to ES-PHP so +that requests can be authenticated server-side. Authentication credentials are provided as part of the host array +when instantiating the client: [source,php] --------------------------------------------------- +---- $hosts = [ 'http://user:pass@localhost:9200', // HTTP Basic Authentication 'http://user2:pass2@other-host.com:9200' // Different credentials on different host @@ -17,30 +19,35 @@ $hosts = [ $client = ClientBuilder::create() ->setHosts($hosts) ->build(); --------------------------------------------------- +---- -每个 host 都要添加身份凭证(credentials),这样的话每个 host 都拥有自己的身份凭证(credentials)。所有发送到集群中的请求都会根据访问节点来使用相应的身份凭证(credentials)。 +Credentials are provided per-host, which allows each host to have their own set of credentials. All requests sent to the +cluster will use the appropriate credentials depending on the node being talked to. -=== SSL 加密 +=== SSL Encryption -配置 SSL 会有些复杂。你要去识别 Certificate Authority (CA) 签名的证书或者自签名证书。 +Configuring SSL is a little more complex. You need to identify if your certificate has been signed by a public +Certificate Authority (CA), or if it is a self-signed certificate. [NOTE] -.libcurl版本注意事项 -==== -如果你觉得客户端已经正确配置SSL,但是没有起效,请检查你的libcurl版本。在某些平台上,一些设置可能有效也可能无效,这取决于 -libcurl版本号。例如直到libcurl 7.37.1,OSX平台的libcurl才添加 `--cacert` 选项。 `--cacert` 选项对应PHP的 `CURLOPT_CAINFO` 常量, -这就意味着自定义的证书在低版本下是无法使用的。 +.A note on libcurl version +================= +If you believe the client is configured to correctly use SSL, but it simply is not working, check your libcurl +version. On certain platforms, various features may or may not be available depending on version number of libcurl. +For example, the `--cacert` option was not added to the OSX version of libcurl until version 7.37.1. The `--cacert` +option is equivalent to PHP's `CURLOPT_CAINFO` constant, meaning that custom certificate paths will not work on lower +versions. -如果你现在正面临这个问题,请更新你的libcurl,然后/或者查看https://curl.haxx.se/changes.html[curl changelog]有无增加该选项。 -==== +If you are encountering problems, update your libcurl version and/or check the http://curl.haxx.se/changes.html[curl changelog]. +================= -==== 公共 CA 证书 +==== Public CA Certificates -如果你的证书是公共 CA 签名证书,且你的服务器用的是最新的根证书,你只需要在 host 中使用 https。客户端会自动识别 SSL 证书: +If your certificate has been signed by a public Certificate Authority and your server has up-to-date root certificates, +you only need to use `https` in the host path. The client will automatically verify SSL certificates: [source,php] --------------------------------------------------- +---- $hosts = [ 'https://localhost:9200' <1> ]; @@ -48,13 +55,17 @@ $hosts = [ $client = ClientBuilder::create() ->setHosts($hosts) ->build(); --------------------------------------------------- -<1> 注意:这里用的是 https 而非 http +---- +<1> Note that `https` is used, not `http` + + +If your server has out-dated root certificates, you may need to use a certificate bundle. For PHP clients, the best +way is to use https://github.com/composer/ca-bundle[composer/ca-bundle]. Once installed, you need to tell the client to +use your certificates instead of the system-wide bundle. To do this, specify the path to verify: -如果服务器的根证书已经过期,你就要用证书 bundle。对于客户端来说,最好的方法是使用https://github.com/composer/ca-bundle[composer/ca-bundle]。一旦安装好 ca-bundle,你要告诉客户端使用你提供的证书来替代系统的 bundle: [source,php] --------------------------------------------------- +---- $hosts = ['https://localhost:9200']; $caBundle = \Composer\CaBundle\CaBundle::getBundledCaBundlePath(); @@ -62,16 +73,20 @@ $client = ClientBuilder::create() ->setHosts($hosts) ->setSSLVerification($caBundle) ->build(); --------------------------------------------------- +---- -==== 自签名证书 +==== Self-signed Certificates -自签名证书是指没有被公共 CA 签名的证书。自签名证书由你自己的组织来签名。在你确保安全发送自己的根证书前提下,自签名证书可用作内部使用的。当自签名证书暴露给公众客户时就不应该使用了,因为客户端容易受到中间人攻击。 +Self-signed certificates are certs that have not been signed by a public CA. They are signed by your own organization. +Self-signed certificates are often used for internal purposes, when you can securely spread the root certificate +yourself. It should not be used when being exposed to public consumers, since this leaves the client vulnerable to +man-in-the-middle attacks. -如果你正使用自签名证书,你要给客户端提供证书路径。这与指定一个根 bundle 的语法一致,只是把根 bundle 替换为自签名证书: +If you are using a self-signed certificate, you need to provide the certificate to the client. This is the same syntax +as specifying a new root bundle, but instead you point to your certificate: [source,php] --------------------------------------------------- +---- $hosts = ['https://localhost:9200']; $myCert = 'path/to/cacert.pem'; @@ -79,14 +94,17 @@ $client = ClientBuilder::create() ->setHosts($hosts) ->setSSLVerification($myCert) ->build(); --------------------------------------------------- +---- + -=== 同时使用认证与 SSL +=== Using Authentication with SSL -同时使用认证与 SSL 也是有可能的。在 URI 中指定 `https` 与身份凭证(credentials),同时提供 SSL 所需的自签名证书。例如下面的代码段就同时使用了 HTTP 认证和自签名证书: +It is possible to use HTTP authentication with SSL. Simply specify `https` in the URI, configure SSL settings as +required and provide authentication credentials. For example, this snippet will authenticate using Basic HTTP auth +and a self-signed certificate: [source,php] --------------------------------------------------- +---- $hosts = ['https://user:pass@localhost:9200']; $myCert = 'path/to/cacert.pem'; @@ -94,4 +112,4 @@ $client = ClientBuilder::create() ->setHosts($hosts) ->setSSLVerification($myCert) ->build(); --------------------------------------------------- +---- \ No newline at end of file diff --git a/selectors.asciidoc b/selectors.asciidoc index 604494d..f091c1f 100644 --- a/selectors.asciidoc +++ b/selectors.asciidoc @@ -1,67 +1,78 @@ -[[_selectors]] -== 选择器 -连接池维持一份连接清单,它决定节点在什么时候从活节点转变为死节点(或死节点转变为活节点)。然而连接池选择连接对象时是没有逻辑的,这份工作属于 Selector 类。 +== Selectors -选择器(selector)的工作是从连接数组中返回一个连接。和连接池一样,也有几种选择器可供选择。 +The connection pool maintains the list of connections, and decides when nodes should transition from alive to dead (and +vice versa). It has no logic to choose connections, however. That job belongs to the Selector class. -=== RoundRobinSelector(默认) +The selector's job is to return a single connection from a provided array of connections. Like the Connection Pool, +there are several implementations to choose from. -选择器通过轮询调度的方式来返回连接。例如在第一个请求中选择节点1,在第二请求中选择节点 2,以此类推。这确保集群中的节点平均负担流量。轮询调度是基于每个请求来执行的(例如,一个PHP脚本的所有请求轮流发送到不同的节点中)。 +=== RoundRobinSelector (Default) -`RoundRobinSelector` 是默认选择器,但如果你想明确地配置该选择器,你可以这样做: +This selector returns connections in a round-robin fashion. Node #1 is selected on the first request, Node #2 on +the second request, etc. This ensures an even load of traffic across your cluster. Round-robin'ing happens on a +per-request basis (e.g. sequential requests go to different nodes). + +The `RoundRobinSelector` is default, but if you wish to explicitily configure it you can do: [source,php] --------------------------------------------------- +---- $client = ClientBuilder::create() ->setSelector('\Elasticsearch\ConnectionPool\Selectors\RoundRobinSelector') ->build(); --------------------------------------------------- +---- -注意:要通过命名空间加类名的方法来指定选择器。 +Note that the implementation is specified via a namespace path to the class. === StickyRoundRobinSelector -这个选择器具有“粘性”,它更喜欢重用同一个连接。例如,在第一个请求中选择节点1,选择器会重用节点1来发送随后的请求,直到节点请求失败。在节点1请求失败后,选择器会轮询至下一个可用节点,然后一直重用这个节点。 +This selector is "sticky", in that it prefers to reuse the same connection repeatedly. For example, Node #1 is chosen +on the first request. Node #1 will continue to be re-used for each subsequent request until that node fails. Upon failure, +the selector will round-robin to the next available node, then "stick" to that node. -对许多 PHP 脚本来说,这是一个理想的策略。由于 PHP 脚本是无共享架构且会快速退出,为每个请求创建新连接通常是一种次优策略且会引起大量的开销。相反,在脚本运行期间“黏住”单个节点会更好。 +This is an ideal strategy for many PHP scripts. Since PHP scripts are shared-nothing and tend to exit quickly, creating +new connections for each request is often a sub-optimal strategy and introduces a lot of overhead. Instead, it is +better to "stick" to a single connection for the duration of the script. -这个选择器会默认会在初始化时把 hosts 随机打乱,但仍然保证集群中的节点平均负担流量。它动态地更改轮询方式,把轮询每个请求变成轮询每个脚本。 +By default, this selector will randomize the hosts upon initialization, which will still guarantee an even distribution +of load across the cluster. It changes the round-robin dynamics from per-request to per-script. -如果你使用 <<_future_mode>> ,这种选择器的“粘性”行为就不理想了,因为所有并行的请求会发送到集群中的同一个节点而非多个节点。当使用 Future 模式时,默认的 `RoundRobinSelector` 选择器会更好。 +If you are using <<_future_mode>>, the "sticky" behavior of this selector will be non-ideal, since all parallel requests +will go to the same node instead of multiple nodes in your cluster. When using future mode, the default `RoundRobinSelector` +should be preferred. -如果你要使用该选择器,你要这样做: +If you wish to use this selector, you may do so with: [source,php] --------------------------------------------------- +---- $client = ClientBuilder::create() ->setSelector('\Elasticsearch\ConnectionPool\Selectors\StickyRoundRobinSelector') ->build(); --------------------------------------------------- +---- -注意:要通过命名空间加类名的方法来指定选择器。 +Note that the implementation is specified via a namespace path to the class. === RandomSelector -这种选择器仅仅返回一个随机的节点,不管节点是处于什么状态。这个选择器通常用做测试。 +This selector simply returns a random node, regardless of state. It is generally just for testing. -如果你要使用该选择器,你要这样做: +If you wish to use this selector, you may do so with: [source,php] --------------------------------------------------- +---- $client = ClientBuilder::create() ->setSelector('\Elasticsearch\ConnectionPool\Selectors\RandomSelector') ->build(); --------------------------------------------------- +---- -注意:要通过命名空间加类名的方法来指定选择器。 +Note that the implementation is specified via a namespace path to the class. -=== 自定义选择器 +=== Custom Selector -你可以实现自定义选择器。自定义选择器必须实现 `SelectorInterface` 接口。 +You can implement your own custom selector. Custom selectors must implement `SelectorInterface` [source,php] --------------------------------------------------- +---- namespace MyProject\Selectors; use Elasticsearch\Connections\ConnectionInterface; @@ -83,16 +94,17 @@ class MyCustomSelector implements SelectorInterface } } --------------------------------------------------- +---- +{zwsp} + -然后你可以通过对象注入或命名空间实例化方式来使用自定义选择器: +You can then use your custom selector either via object injection or namespace instantiation: [source,php] --------------------------------------------------- +---- $mySelector = new MyCustomSelector(); $client = ClientBuilder::create() ->setSelector($mySelector) // object injection ->setSelector('\MyProject\Selectors\FirstSelector') // or namespace ->build(); --------------------------------------------------- \ No newline at end of file +---- diff --git a/serializers.asciidoc b/serializers.asciidoc index c67bb7d..4b90fd5 100644 --- a/serializers.asciidoc +++ b/serializers.asciidoc @@ -1,97 +1,115 @@ -[[_serializers]] -== 序列化器 -客户端有 3 种序列化器可用。你可能永远都不会更改序列化器,除非你有特殊需求或者要实现一个新的协议。 +== Serializers -序列化器的工作是 encode 发送的请求体和 decode 返回的响应体。在 99% 的例子中,这就是一种简单转换为JSON数据或解析JSON数据的工具。 +The client has three serializers available. You will most likely never need +to change the serializer, unless you have special requirements or are +implementing a new protocol. -默认的序列化器是 `SmartSerializer` 。 +The job of the serializer is to encode the outgoing request body and decode +the incoming response body. In 99% of cases, this is a simple conversion +to/from JSON. -=== SmartSerializer +The default serializer is the `SmartSerializer` +=== SmartSerializer ==== Serialize() +The `SmartSerializer` inspects the data to be encoded. If the request body +is provided as a string, it is passed directly to Elasticsearch as a string. +This allows users to provide raw JSON, or raw strings for certain endpoints that +dont have structure (such as the Analyze endpoint). -`SmartSerializer` 会先检查需要 encode 的数据。如果请求体是字符串,那么会直接发送到 Elasticsearch。这种方式允许用户提供原生JSON数据,或是字符串(提供给某些没有结构的 endpoint,例如 Analyze endpoint)。 - -如果数据是数组,则会被转换为 JSON 数据。如果数据是空数组,那么序列化器需要手动转换空数组( `[]` )为空对象( `{}` ),这样发送给 Elasticsearch 的请求体数据才是有效的 JSON 数据。 +If the data is an array, it is converted to json. If the data provided was an +empty array, the serializer manually converts the JSON from an empty array (`[]`) +to an empty object (`{}`) so that it is valid JSON for Elasticsearch request +bodies. ==== Deserialize() +When decoding the response body, the `SmartSerializer` introspects the +`content_type` headers to determine the appropriate encoding. If the data is +encoded as JSON, it is decoded into an array using `json_decode`. Otherwise, +it is returned as a string. -当 decode 响应体数据时, `SmartSerializer` 会检测响应头的 `content_type` 来判断是否为合适的encode数据。假如数据 encode 为 JSON 数据,那么会用 `json_decode` 来解析 JSON 数据为数组。否则会以字符串的格式返回给客户端。 +This functionality is required to cooperate with endpoints such as the `Cat` +endpoints, which return tabular text instead of JSON. -这个功能需要与 endpoint 协作,例如 `Cat` endpoints 会返回表格文本而非 JSON 数据。 +==== Selecting the SmartSerializer -==== 选择SmartSerializer - -客户端默认选择 `SmartSerializer` ,但如果你想手动地配置这个选择器,你可以在 ClientBuilder 对象中使用 `setSerializer()` 方法: +The SmartSerializer is selected by default, but if you wish to manually configure it for explicitness, you can +do so by using the `setSerializer()` method on the ClientBuilder object: [source,php] --------------------------------------------------- +---- $client = ClientBuilder::create() ->setSerializer('\Elasticsearch\Serializers\SmartSerializer'); ->build(); --------------------------------------------------- +---- -注意:要通过命名空间加类名的方法来配置序列化器。 +Note that the serializer is configured by specifying a namespace path to the serializer. === ArrayToJSONSerializer - ==== Serialize() +The `ArrayToJSONSerializer` inspects the data to be encoded. If the request body +is provided as a string, it is passed directly to Elasticsearch as a string. +This allows users to provide raw JSON, or raw strings for certain endpoints that +dont have structure (such as the Analyze endpoint). -`ArrayToJSONSerializer` 会先检查需要 encode 的数据。如果请求体是字符串,那么会直接发送到 Elasticsearch。这种方式允许用户提供原生 JSON 数据,或是字符串(提供给某些没有结构的 endpoint,例如 Analyze endpoint)。 - -如果数据是数组,则会被转换为 JSON 数据。如果数据是空数组,那么序列化器需要手动转换空数组( `[]` )为空对象( `{}` ),这样发送给 Elasticsearch 的请求体数据才是有效的 JSON 数据。 +If the data is an array, it is converted to json. If the data provided was an +empty array, the serializer manually converts the JSON from an empty array (`[]`) +to an empty object (`{}`) so that it is valid JSON for Elasticsearch request +bodies. ==== Deserialize() +When decoding the response body, everything is decoded to JSON from JSON. If +the data is not valid JSON, `null` will be returned. -当 decode 响应体数据时,所有数据都会 encode 由 JSON 数据 decode 为 JSON 数据。如果数据不是有效的 JSON 数据,那么会返回 `null` 给客户端。 +==== Selecting the ArrayToJSONSerializer -==== 选择 ArrayToJSONSerializer - -你可以通过使用 ClientBuilder 对象的 `setSerializer()` 方法来选择 `ArrayToJSONSerializer` : +You can select `ArrayToJSONSerializer` by using the `setSerializer()` method on the ClientBuilder object: [source,php] --------------------------------------------------- +---- $client = ClientBuilder::create() ->setSerializer('\Elasticsearch\Serializers\ArrayToJSONSerializer'); ->build(); --------------------------------------------------- +---- -注意:要通过命名空间加类名的方法来配置序列化器。 +Note that the serializer is configured by specifying a namespace path to the serializer. === EverythingToJSONSerializer - ==== Serialize() +The `EverythingToJSONSerializer` tries to convert everything to JSON. -`EverythingToJSONSerializer` 会把一切数据转换为JSON数据。 - -如果数据是空数组,那么序列化器需要手动转换空数组( `[]` )为空对象( `{}` ),这样发送给 Elasticsearch 的请求体数据才是有效的 JSON 数据。 +If the data provided was an empty array, the serializer manually converts the +JSON from an empty array (`[]`) to an empty object (`{}`) so that it is valid +JSON for Elasticsearch request bodies. -如果数据不是数组且(或)没有转换为 JSON 数据,那么这个方法会返回 `null` 给客户端。 +If the data was not an array and/or not convertible to JSON, the method returns +`null`. ==== Deserialize() +When decoding the response body, everything is decoded to JSON from JSON. If +the data is not valid JSON, `null` will be returned. -当 decode 响应体数据时,所有数据都会 encode 由 JSON 数据 decode 为 JSON 数据。如果数据不是有效的 JSON 数据,那么会返回 `null` 给客户端。 +==== Selecting the EverythingToJSONSerializer -==== 选择 EverythingToJSONSerializer - -你可以通过使用 ClientBuilder 对象的 `setSerializer()` 方法来选择 `EverythingToJSONSerializer` : +You can select `EverythingToJSONSerializer` by using the `setSerializer()` method on the ClientBuilder object: [source,php] --------------------------------------------------- +---- $client = ClientBuilder::create() ->setSerializer('\Elasticsearch\Serializers\EverythingToJSONSerializer'); ->build(); --------------------------------------------------- +---- -注意:要通过命名空间加类名的方法来配置序列化器。 +Note that the serializer is configured by specifying a namespace path to the serializer. -=== 实现自定义序列化器 +=== Implementing your own Serializer +If you want to use your own custom serializer, you need to implement the `SerializerInterface` interface. Please +keep in mind that the client uses a single Serializer object for all endpoints and all connections. -如果你想使用自定义序列器,你需要实现 `SerializerInterface` 接口。请记住,对于所有的 endpoint 和连接来说,客户端只使用一个序列器对象。 [source,php] --------------------------------------------------- +---- class MyCustomSerializer implements SerializerInterface { @@ -120,25 +138,30 @@ class MyCustomSerializer implements SerializerInterface // code here } } --------------------------------------------------- +---- +{zwsp} + -然后为了使用你自定义的序列化器,你可以通过使用 ClientBuilder 对象的 `setSerializer()` 方法来配置序列化器(命名空间加类名格式): +To then use your custom serializer, you can specify the namespace path in the `setSerializer()` method of the ClientBuilder +object: [source,php] --------------------------------------------------- +---- $client = ClientBuilder::create() ->setSerializer('\MyProject\Serializers\MyCustomSerializer'); ->build(); --------------------------------------------------- +---- -如果你的序列化器在注入到客户端前已经实例化,或者序列化器对象需要进一步初始化,你可以通过以下方式来实例化序列化器对象并注入到客户端: +Alternatively, if your serializer has a constructor or further initialization that should occur before given to the +client, you can instantiate an object and provide that instead: [source,php] --------------------------------------------------- +---- $mySerializer = new MyCustomSerializer($a, $b, $c); $mySerializer->setFoo("bar"); $client = ClientBuilder::create() ->setSerializer($mySerializer); ->build(); --------------------------------------------------- +---- + +