chore: regen docs

MFMemon · Aug 28, 2024 · 09491f8 · 09491f8
1 parent 3c6854a
commit 09491f8
Show file tree

Hide file tree

Showing 2 changed files with 364 additions and 32 deletions.
diff --git a/docs/modules/components/pages/processors/ollama_chat.adoc b/docs/modules/components/pages/processors/ollama_chat.adoc
@@ -38,9 +38,15 @@ Common::
 # Common config fields, showing default values
 label: ""
 ollama_chat:
-  server_address: http://127.0.0.1:11434 # No default (optional)
   model: llama3.1 # No default (required)
   prompt: "" # No default (optional)
+  response_format: text
+  max_tokens: 0 # No default (optional)
+  temperature: 0 # No default (optional)
+  runner:
+    context_size: 0 # No default (optional)
+    batch_size: 0 # No default (optional)
+  server_address: http://127.0.0.1:11434 # No default (optional)
 ```
 
 --
@@ -52,10 +58,28 @@ Advanced::
 # All config fields, showing default values
 label: ""
 ollama_chat:
-  server_address: http://127.0.0.1:11434 # No default (optional)
   model: llama3.1 # No default (required)
   prompt: "" # No default (optional)
   system_prompt: "" # No default (optional)
+  response_format: text
+  max_tokens: 0 # No default (optional)
+  temperature: 0 # No default (optional)
+  num_keep: 0 # No default (optional)
+  seed: 0 # No default (optional)
+  top_k: 0 # No default (optional)
+  top_p: 0 # No default (optional)
+  repeat_penalty: 0 # No default (optional)
+  presence_penalty: 0 # No default (optional)
+  frequency_penalty: 0 # No default (optional)
+  stop: [] # No default (optional)
+  runner:
+    context_size: 0 # No default (optional)
+    batch_size: 0 # No default (optional)
+    gpu_layers: 0 # No default (optional)
+    threads: 0 # No default (optional)
+    use_mmap: false # No default (optional)
+    use_mlock: false # No default (optional)
+  server_address: http://127.0.0.1:11434 # No default (optional)
   cache_directory: /opt/cache/connect/ollama # No default (optional)
   download_url: "" # No default (optional)
 ```
@@ -71,20 +95,6 @@ For more information, see the https://github.com/ollama/ollama/tree/main/docs[Ol
 
 == Fields
 
-=== `server_address`
-
-The address of the Ollama server to use. Leave the field blank and the processor starts and runs a local Ollama server or specify the address of your own local or remote server.
-
-
-*Type*: `string`
-
-
-```yml
-# Examples
-
-server_address: http://127.0.0.1:11434
-```
-
 === `model`
 
 The name of the Ollama LLM to use. For a full list of models, see the https://ollama.com/models[Ollama website].
@@ -123,6 +133,170 @@ This field supports xref:configuration:interpolation.adoc#bloblang-queries[inter
 *Type*: `string`
 
 
+=== `response_format`
+
+The response format of generated type, the model must also be prompted to output the appropriate response type.
+
+
+*Type*: `string`
+
+*Default*: `"text"`
+
+Options:
+`text`
+, `json`
+.
+
+=== `max_tokens`
+
+The maximum number of tokens to predict and output.
+
+
+*Type*: `int`
+
+
+=== `temperature`
+
+The temperature of the model. Increasing the temperature will make the model answer more creatively.
+
+
+*Type*: `int`
+
+
+=== `num_keep`
+
+Specify the number of tokens from the initial prompt to retain when the model resets its internal context. By default, this value is set to 4. Use -1 to retain all tokens from the initial prompt.
+
+
+*Type*: `int`
+
+
+=== `seed`
+
+Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt.
+
+
+*Type*: `int`
+
+
+=== `top_k`
+
+Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative.
+
+
+*Type*: `int`
+
+
+=== `top_p`
+
+Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text.
+
+
+*Type*: `float`
+
+
+=== `repeat_penalty`
+
+Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient.
+
+
+*Type*: `float`
+
+
+=== `presence_penalty`
+
+Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
+
+
+*Type*: `float`
+
+
+=== `frequency_penalty`
+
+Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
+
+
+*Type*: `float`
+
+
+=== `stop`
+
+Sets the stop sequences to use. When this pattern is encountered the LLM will stop generating text and return.
+
+
+*Type*: `array`
+
+
+=== `runner`
+
+Options for the model runner that are used when the model is first loaded into memory.
+
+
+*Type*: `object`
+
+
+=== `runner.context_size`
+
+Sets the size of the context window used to generate the next token.
+
+
+*Type*: `int`
+
+
+=== `runner.batch_size`
+
+The maximum number of requests to process in parallel.
+
+
+*Type*: `int`
+
+
+=== `runner.gpu_layers`
+
+This option allows offloading some layers to the GPU for computation. Generally results in increased performance. By default the runtime decides the number of layers dynamically.
+
+
+*Type*: `int`
+
+
+=== `runner.threads`
+
+Set the number of threads to use during generation. For optimal performance, it is recommended to set this value to the number of physical CPU cores your system has. By default the runtime decides the optimal number of threads.
+
+
+*Type*: `int`
+
+
+=== `runner.use_mmap`
+
+If supported, map the model into memory. This allows the system to load only the necessary parts of the model as needed.
+
+
+*Type*: `bool`
+
+
+=== `runner.use_mlock`
+
+Lock the model in memory, preventing it from being swapped out when memory-mapped. This can improve performance but trades away some of the advantages of memory-mapping by requiring more RAM to run and potentially slowing down load times as the model loads into RAM.
+
+
+*Type*: `bool`
+
+
+=== `server_address`
+
+The address of the Ollama server to use. Leave the field blank and the processor starts and runs a local Ollama server or specify the address of your own local or remote server.
+
+
+*Type*: `string`
+
+
+```yml
+# Examples
+
+server_address: http://127.0.0.1:11434
+```
+
 === `cache_directory`
 
 If `server_address` is not set - the directory to download the ollama binary and use as a model cache.