Merge pull request #706 from Gschiavon/feature/in-outputs-update

Update documentation of inputs and outputs
Stratio · Aug 27, 2015 · 80c0817 · 80c0817
2 parents 346f438 + d1516ac
commit 80c0817
Show file tree

Hide file tree

Showing 4 changed files with 175 additions and 24 deletions.
diff --git a/doc/src/site/sphinx/inputs.rst b/doc/src/site/sphinx/inputs.rst
@@ -33,11 +33,10 @@ Read events from apache-flume
       "type": "Flume",
       "configuration": {
         "type": "pull",
-        "addresses": "localhost:10999"
+        "addresses": "localhost:10999",
         "maxBatchSize": 500
       }
     }
-  ]
 
 +-----------------+------------------------------------------------------------------+------------+
 | Property        | Description                                                      | Optional   |
@@ -74,7 +73,7 @@ Read events from apache-kafka
         "kafkaParams.group.id": "kafka-pruebas"
       }
     }
-   ]
+
 +--------------------------------+----------------------------------------------------------+------------+
 | Property                       | Description                                              | Optional   |
 +================================+==========================================================+============+
@@ -93,6 +92,42 @@ Read events from apache-kafka
 | kafkaParams.group.id           | within the same consumer group                           |            |
 +--------------------------------+----------------------------------------------------------+------------+
 
+
+.. _kafkaDirect-label:
+
+Input-KafkaDirect
+=========
+Read events from KafkaDirect
+* Sample:
+::
+
+  "input":
+    {
+      "name": "in",
+      "type": "KafkaDirect",
+      "configuration": {
+        "topics": "test",
+        "kafkaParams.metadata.broker.list": "localhost:9092",
+        "kafkaParams.group.id": "my-spt-grp"
+      }
+    }
++----------------------------------+----------------------------------------------------------+------------+
+| Property                         | Description                                              | Optional   |
++==================================+==========================================================+============+
+| name                             | Name of the input                                        | No         |
++----------------------------------+----------------------------------------------------------+------------+
+| type                             | The Type of the input it's used to instantiate specific  | No         |
+|                                  | classes                                                  |            |
++----------------------------------+----------------------------------------------------------+------------+
+| topics                           | Kafka topic parameter is needed to connect to it and get | No         |
+|                                  | the data that generates                                  |            |
++----------------------------------+----------------------------------------------------------+------------+
+| kafkaParams.metadata.broker.list | Defines where the Producer can find a one or more        | No         |
+|                                  | Brokers to determine the Leader for each topic           |            |
++----------------------------------+----------------------------------------------------------+------------+
+| kafkaParams.group.id             | It's a string that uniquely identifies a set of consumers| No         |
+|                                  | within the same consumer group                           |            |
++----------------------------------+----------------------------------------------------------+------------+
 .. _rabbitMQ-label:
 
 Input-rabbitMQ
@@ -110,13 +145,13 @@ Reads events from rabbitMQ
                 "queue": "test",
                 "host": "localhost",
                 "port": 5672,
-                "exchangeName": "twitterExchange"
+                "exchangeName": "twitterExchange",
                 "routingKeys": [
                     "routingKey3"
                 ]
             }
         }
-       ]
+
 +------------------+-----------------------------------------------------------------+-----------------------------------+
 | Property         | Description                                                     | Optional                          |
 +==================+=================================================================+===================================+
@@ -161,7 +196,7 @@ Reads events from a socket
            "port": "10666"
           }
        }
-      ]
+
 +------------------+---------------------------------------------------------+-----------+
 | Property         | Description                                             | Optional  |
 +==================+=========================================================+===========+
@@ -197,7 +232,6 @@ Reads events from Twitter API
            "termsOfSearch": "#Your,search,#terms,could be,#whatever"
       }
     }
-  ]
 
 +-------------------+-----------------------------------------------------------+------------+
 | Property          | Description                                               | Optional   |

diff --git a/doc/src/site/sphinx/outputs.rst b/doc/src/site/sphinx/outputs.rst
@@ -16,6 +16,8 @@ Outputs Configurations
 
 - :ref:`parquet-label`
 
+- :ref:`csv-label`
+
 
 .. image:: images/outputs.png
    :height: 400 px
@@ -26,15 +28,14 @@ Outputs Configurations
 
 
 
-
 .. _generic-label:
 
 Generic Configuration
 =======================
 
-In the SDK you can find the model that must follow an output to be implemented.It has several settings that can modify system operation.
+In the SDK you can find the model that an output has to follow to be implemented.It has several settings that can modify system operation.
 
-These parameters can be completed in the policy file:
+These parameters can be set in the policy file:
 
 
 +-----------------------+----------------------------------------------------------+----------+-----------------------+
@@ -68,8 +69,21 @@ These parameters can be completed in the policy file:
 
 MongoDB Configuration
 ==========================
-
-The output of MongoDB use the generic implementation with DataFrames, it has multiple configuration
+* Sample:
+::
+
+  "outputs": [
+    {
+      "name": "out-mongo",
+      "type": "MongoDb",
+      "configuration": {
+        "hosts": "localhost",
+        "dbName": "sparkta",
+        "identitiesSaved": "true",
+        "idAsField": "true"
+      }
+    }
+The output of MongoDB uses the generic implementation with DataFrames, it has multiple configuration
 parameters to connect to the DB and self-creation of indexes.
 
 
@@ -89,7 +103,7 @@ parameters to connect to the DB and self-creation of indexes.
 |                       | number of threads that may be waiting for a        |          |                           |
 |                       | connection to become available from the pool.      |          |                           |
 +-----------------------+----------------------------------------------------+----------+---------------------------+
-| idAsField             | Is possible to save all fields that compound the   | Yes      | false                     |
+| idAsField             | It's possible to save all fields that compound the | Yes      | false                     |
 |                       | unique key as a independent field.                 |          |                           |
 +-----------------------+----------------------------------------------------+----------+---------------------------+
 | textIndexFields       | The system is capable of insert data in a full-text| Yes      |                           |
@@ -100,7 +114,7 @@ parameters to connect to the DB and self-creation of indexes.
 |                       | inserted must have this key-value.                 |          |                           |
 +-----------------------+----------------------------------------------------+----------+---------------------------+
 | retrySleep            | The number of milliseconds to wait for reconnect   | Yes      | 1000                      |
-|                       | with MongoDb nodes when the last client fails. It  |          |                           |
+|                       | with MongoDB nodes when the last client fails. It  |          |                           |
 |                       | is recommendable to set less time to the slide     |          |                           |
 |                       | interval of the streaming window.                  |          |                           |
 +-----------------------+----------------------------------------------------+----------+---------------------------+
@@ -111,8 +125,22 @@ parameters to connect to the DB and self-creation of indexes.
 
 Cassandra Configuration
 ==============================
-
-The output of Cassandra use the generic implementation with DataFrames, this implementation transform each
+* Sample:
+::
+
+  "outputs": [
+    {
+      "name": "out-cassandra",
+      "type": "Cassandra",
+      "configuration": {
+        "connectionHost": "127.0.0.1",
+        "connectionPort": "9142",
+        "cluster": "Test Cluster",
+        "keyspace": "sparkta"
+      }
+    }
+  ]
+The output of Cassandra uses the generic implementation with DataFrames, this implementation transform each
 UpdateMetricOperation to Row type of Spark and identify each row with his schema.
 
 
@@ -159,7 +187,7 @@ UpdateMetricOperation to Row type of Spark and identify each row with his schema
 |                       | or clustering column field.                              |          |                       |
 +-----------------------+----------------------------------------------------------+----------+-----------------------+
 | textIndexFields       | The text index fields, this feature is for the Stratio's | Yes      |                       |
-|                       |  Cassandra Lucene Index                                  |          |                       |
+|                       | Cassandra Lucene Index                                   |          |                       |
 +-----------------------+----------------------------------------------------------+----------+-----------------------+
 | analyzer              | The analyzer for text index fields, this feature is for  | Yes      | None                  |
 |                       | the Stratio's Cassandra Lucene Index                     |          |                       |
@@ -176,8 +204,24 @@ UpdateMetricOperation to Row type of Spark and identify each row with his schema
 
 ElasticSearch Configuration
 ==============================
-
-The output of ElasticSearch use the generic implementation with DataFrames, this implementation transform each
+* Sample:
+::
+
+
+   "outputs": [
+    {
+      "name": "out-elasticsearch",
+      "type": "ElasticSearch",
+      "configuration": {
+        "nodes": "localhost",
+        "defaultPort": "9200",
+
+        "isAutoCalculateId": "true",
+        "indexMapping": "day"
+      }
+    }
+   ]
+The output of ElasticSearch uses the generic implementation with DataFrames, this implementation transform each
 UpdateMetricOperation to Row type of Spark and identify each row with his schema.
 
 
@@ -191,7 +235,7 @@ UpdateMetricOperation to Row type of Spark and identify each row with his schema
 +--------------------------+-----------------------------------------------+----------+-----------------------+
 | idField                  | Field used as unique id for the row.          | Yes      | "id"                  |
 +--------------------------+-----------------------------------------------+----------+-----------------------+
-| indexMapping             | Field used as mapping for the index.          | Yes      | "sparkta"              |
+| indexMapping             | Field used as mapping for the index.          | Yes      | "sparkta"             |
 +--------------------------+-----------------------------------------------+----------+-----------------------+
 | dateType                 | The type of the date fields.                  | Yes      | None                  |
 +--------------------------+-----------------------------------------------+----------+-----------------------+
@@ -201,7 +245,19 @@ UpdateMetricOperation to Row type of Spark and identify each row with his schema
 
 Redis Configuration
 ====================
-
+* Sample:
+::
+
+  "outputs": [
+    {
+      "name": "out-redis",
+      "type": "Redis",
+      "configuration": {
+        "hostname": "localhost",
+        "port": 63790
+      }
+    }
+  ]
 The output of Redis doesn't use the generic implementation with DataFrames.
 
 
@@ -219,14 +275,40 @@ The output of Redis doesn't use the generic implementation with DataFrames.
 
 Print Configuration
 ====================
-
+* Sample:
+::
+
+  "outputs": [
+    {
+      "name": "out-print",
+      "type": "Print",
+      "configuration": {
+        "isAutoCalculateId": "false"
+      }
+    }
+  ]
 The print output uses the generic implementation with DataFrames, this implementation print each dataframe with his
  schema.
 
 .. _parquet-label:
 
 Parquet Configuration
 ====================
+* Sample:
+::
+
+  "outputs": [
+    {
+      "name": "out-parquet",
+      "type": "Parquet",
+      "jarFile" : "output-parquet-plugin.jar",
+      "configuration": {
+        "path": "/tmp/sparkta/operators/parquet",
+        "datePattern": "yyyy/MM/dd",
+        "multiplexer": "false"
+      }
+    }
+  ]
 
 The parquet output uses generic implementation of DataFrames.
 
@@ -235,3 +317,38 @@ The parquet output uses generic implementation of DataFrames.
 +==========================+===============================================+==========+=======================+
 | path                     | Destination path to store info.               | No       |                       |
 +--------------------------+-----------------------------------------------+----------+-----------------------+
+
+
+.. _csv-label:
+
+Csv Configuration
+============
+* Sample:
+::
+
+  "outputs": [
+    {
+      "name": "out-csv",
+      "type": "Csv",
+      "configuration": {
+        "isAutoCalculateId": "false",
+        "path": "/tmp/sparkta/operators/csv/",
+        "header": "true",
+        "delimiter": ","
+      }
+    }
+  ]
+
++--------------------------+-----------------------------------------------+----------+-----------------------+
+| Property                 | Description                                   | Optional | Default               |
++==========================+===============================================+==========+=======================+
+| path                     | Destination path to store info.               | Yes      | None                  |
++--------------------------+-----------------------------------------------+----------+-----------------------+
+| header                   | Indicates if the file has header or not.      | Yes      | false                 |
++--------------------------+-----------------------------------------------+----------+-----------------------+
+| delimiter                | Fields are separated by the delimiter.        | Yes      | ","                   |
++--------------------------+-----------------------------------------------+----------+-----------------------+
+| datePattern              | Indicates the date pattern of the file.       | Yes      | None                  |
++--------------------------+-----------------------------------------------+----------+-----------------------+
+| dateGranularity          | Specify the granularity from second to year   | Yes      | Day                   |
++--------------------------+-----------------------------------------------+----------+-----------------------+
diff --git a/serving-api/src/main/resources/templates/input/kafkaDirect.json b/serving-api/src/main/resources/templates/input/kafkaDirect.json
@@ -4,7 +4,7 @@
   "description": {
     "short": "Reads events from apache-kafka",
     "long": "Reads events from apache-kafka",
-    "learnMore": "http://docs.stratio.com"
+    "learnMore": "http://docs.stratio.com/modules/sparkta/0.6/inputs.html#kafkadirect-label"
   },
   "icon": {
     "url": "logo_kafka.png"

diff --git a/serving-api/src/main/resources/templates/output/csv.json b/serving-api/src/main/resources/templates/output/csv.json
@@ -4,7 +4,7 @@
   "description": {
     "short": "",
     "long": "",
-    "learnMore": "docs.stratio.com/modules/sparkta/0.6/outputs.html#csv-label"
+    "learnMore": "http://docs.stratio.com/modules/sparkta/0.6/outputs.html#csv-label"
   },
   "icon": {
     "url": "logo_csv.png"