Merge branch 'stanfordnlp:main' into main

bakebrain · Mar 13, 2024 · 0d4e094 · 0d4e094
2 parents 9ec79e6 + 65551ef
commit 0d4e094
Show file tree

Hide file tree

Showing 48 changed files with 6,398 additions and 981 deletions.
diff --git a/.github/workflows/pr_comment.yml b/.github/workflows/pr_comment.yml
diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
@@ -1,6 +1,9 @@
 name: Lint, Test, and Build
 
 on:
+  push:
+    branches:
+      - main
   pull_request:
     types: [opened, synchronize, reopened]
 
@@ -24,21 +27,6 @@ jobs:
           args: --fix-only --exit-non-zero-on-fix
         continue-on-error: true
 
-      - name: Determine Ruff Fix Outcome
-        run: |
-          if [ ${{ steps.ruff_fix.outcome }} == 'failure' ]; then
-            echo "RUFF_FAILED=true" >> $GITHUB_ENV
-            echo ${{ steps.ruff_fix.outcome }} > ruff_fix_outcome.txt
-          else
-            echo "RUFF_FAILED=false" >> $GITHUB_ENV
-            echo ${{ steps.ruff_fix.outcome }} > ruff_fix_outcome.txt
-          fi
-
-      - uses: actions/upload-artifact@v2
-        with:
-          name: ruff-fix-outcome
-          path: ruff_fix_outcome.txt
-
       - name: Fail Workflow if Ruff Fix Failed
         if: steps.ruff_fix.outcome == 'failure'
         run: |
@@ -52,7 +40,6 @@ jobs:
     strategy:
       matrix:
         python-version: ["3.9"]
-    if: github.event_name == 'pull_request'
     steps:
       - uses: actions/checkout@v4
       - name: Load cached Poetry installation
@@ -84,7 +71,6 @@ jobs:
     strategy:
       matrix:
         python-version: ["3.9"]
-    if: github.event_name == 'pull_request'
     steps:
       - uses: actions/checkout@v4
       - name: Load cached Poetry installation
@@ -116,7 +102,6 @@ jobs:
     strategy:
       matrix:
         python-version: ["3.9"]
-    if: github.event_name == 'pull_request'
     steps:
       - uses: actions/checkout@v4
       - name: Load cached Poetry installation

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -5,15 +5,15 @@ default_stages: [commit]
 default_install_hook_types: [pre-commit, commit-msg]
 
 repos:
-  - repo: https://github.com/astral-sh/ruff-pre-commit
-    # Ruff version.
-    rev: v0.1.11
-    hooks:
-      # Run the linter.
-      - id: ruff
-        args: [--fix]
-      # Run the formatter.
-      - id: ruff-format
+  #  - repo: https://github.com/astral-sh/ruff-pre-commit
+  #    # Ruff version.
+  #    rev: v0.1.11
+  #    hooks:
+  #      # Run the linter.
+  #      - id: ruff
+  #        args: [--fix]
+  #      # Run the formatter.
+  #      - id: ruff-format
 
   - repo: https://github.com/timothycrosley/isort
     rev: 5.12.0
@@ -50,14 +50,14 @@ repos:
         args:
           - "--autofix"
           - "--indent=2"
-  - repo: local
-    hooks:
-      - id: validate-commit-msg
-        name: Commit Message is Valid
-        language: pygrep
-        entry: ^(break|build|ci|docs|feat|fix|perf|refactor|style|test|ops|hotfix|release|maint|init|enh|revert)\([\w,\.,\-,\(,\),\/]+\)(!?)(:)\s{1}([\w,\W,:]+)
-        stages: [commit-msg]
-        args: [--negate]
+  #  - repo: local
+  #    hooks:
+  #      - id: validate-commit-msg
+  #        name: Commit Message is Valid
+  #        language: pygrep
+  #        entry: ^(break|build|ci|docs|feat|fix|perf|refactor|style|test|ops|hotfix|release|maint|init|enh|revert)\([\w,\.,\-,\(,\),\/]+\)(!?)(:)\s{1}([\w,\W,:]+)
+  #        stages: [commit-msg]
+  #        args: [--negate]
 
   - repo: https://github.com/pre-commit/mirrors-prettier
     rev: v3.0.3

diff --git a/README.md b/README.md
@@ -33,7 +33,8 @@ If you need help thinking about your task, we recently created a [Discord server
 1. **[Tutorials & Documentation](#2-documentation)**
 1. **[Framework Syntax](#3-syntax-youre-in-charge-of-the-workflowits-free-form-python-code)**
 1. **[Compiling: Two Powerful Concepts](#4-two-powerful-concepts-signatures--teleprompters)**
-1. **[FAQ: Is DSPy right for me?](#5-faq-is-dspy-right-for-me)**
+1. **[Pydantic Types](#5-pydantic-types)** 
+1. **[FAQ: Is DSPy right for me?](#6-faq-is-dspy-right-for-me)**
 
 
 

diff --git a/docs/api/functional/_category_.json b/docs/api/functional/_category_.json
@@ -0,0 +1,8 @@
+{
+    "label": "Functional",
+    "position": 2,
+    "link": {
+      "type": "generated-index",
+      "description": "This documentation provides an overview of the Typed Predictors."
+    }
+}
diff --git a/docs/api/functional/dspy_TypedCoT.md b/docs/api/functional/dspy_TypedCoT.md
@@ -0,0 +1,41 @@
+---
+sidebar_position: 2
+---
+
+# dspy.TypedChainOfThought
+
+### Overview
+
+#### `def TypedChainOfThought(signature, max_retries=3) -> dspy.Module`
+
+Adds a Chain of Thoughts `dspy.OutputField` to the `dspy.TypedPredictor` module by prepending it to the Signature. Similar to `dspy.TypedPredictor` but automatically adds a "reasoning" output field.
+
+* **Inputs**:
+    * `signature`: The `dspy.Signature` specifying the input/output fields
+    * `max_retries`: Maximum number of retries if outputs fail validation
+* **Output**: A dspy.Module instance capable of making predictions.
+
+### Example
+
+```python
+from dspy import InputField, OutputField, Signature
+from dspy.functional import TypedChainOfThought
+from pydantic import BaseModel
+
+# We define a pydantic type that automatically checks if it's argument is valid python code.
+class CodeOutput(BaseModel):
+    code: str
+    api_reference: str
+
+class CodeSignature(Signature):
+    function_description: str = InputField()
+    solution: CodeOutput = OutputField()
+
+cot_predictor = TypedChainOfThought(CodeSignature)
+prediction = cot_predictor(
+    function_description="Write a function that adds two numbers."
+)
+
+print(prediction["code"])
+print(prediction["api_reference"])
+```
diff --git a/docs/api/functional/dspy_TypedPredictor.md b/docs/api/functional/dspy_TypedPredictor.md
@@ -0,0 +1,78 @@
+---
+sidebar_position: 1
+---
+
+# dspy.TypedPredictor
+
+The `TypedPredictor` class is a sophisticated module designed for making predictions with strict type validations. It leverages a signature to enforce type constraints on inputs and outputs, ensuring that the data follows to the expected schema.
+
+### Constructor
+
+```python
+TypedPredictor(
+    CodeSignature
+    max_retries=3
+)
+```
+
+Parameters:
+* `signature` (dspy.Signature): The signature that defines the input and output fields along with their types.
+* `max_retries` (int, optional): The maximum number of retries for generating a valid prediction output. Defaults to 3.
+
+### Methods
+
+#### `copy() -> "TypedPredictor"`
+
+Creates and returns a deep copy of the current TypedPredictor instance.
+
+**Returns:** A new instance of TypedPredictor that is a deep copy of the original instance.
+
+#### `_make_example(type_: Type) -> str`
+
+A static method that generates a JSON object example based pn the schema of the specified Pydantic model type. This JSON object serves as an example for the expected input or output format.
+
+**Parameters:**
+* `type_`: A Pydantic model class for which an example JSON object is to be generated.
+
+**Returns:** A string that represents a JSON object example, which validates against the provided Pydantic model's JSON schema. If the method is unable to generate a valid example, it returns an empty string.
+
+#### `_prepare_signature() -> dspy.Signature`
+
+Prepares and returns a modified version of the signature associated with the TypedPredictor instance. This method iterates over the signature's fields to add format and parser functions based on their type annotations.
+
+**Returns:** A dspy.Signature object that has been enhanced with formatting and parsing specifications for its fields.
+
+#### `forward(**kwargs) -> dspy.Prediction`
+
+Executes the prediction logic, making use of the `dspy.Predict` component to generate predictions based on the input arguments. This method handles type validation, parsing of output data, and implements retry logic in case the output does not initially follow to the specified output schema.
+
+**Parameters:**
+
+* `**kwargs`: Keyword arguments corresponding to the input fields defined in the signature.
+
+**Returns:** A dspy.Prediction object containing the prediction results. Each key in this object corresponds to an output field defined in the signature, and its value is the parsed result of the prediction.
+
+### Example
+
+```python
+from dspy import InputField, OutputField, Signature
+from dspy.functional import TypedPredictor
+from pydantic import BaseModel
+
+# We define a pydantic type that automatically checks if it's argument is valid python code.
+class CodeOutput(BaseModel):
+    code: str
+    api_reference: str
+
+class CodeSignature(Signature):
+    function_description: str = InputField()
+    solution: CodeOutput = OutputField()
+
+cot_predictor = TypedPredictor(CodeSignature)
+prediction = cot_predictor(
+    function_description="Write a function that adds two numbers."
+)
+
+print(prediction["code"])
+print(prediction["api_reference"])
+```
diff --git a/docs/api/functional/dspy_cot.md b/docs/api/functional/dspy_cot.md
@@ -0,0 +1,30 @@
+---
+sidebar_position: 4
+---
+
+# dspy.cot
+
+### Overview
+
+#### `def cot(func) -> dspy.Module`
+
+The `@cot` decorator is used to create a Chain of Thoughts module based on the provided function. It automatically generates a `dspy.TypedPredictor` and from the function's type annotations and docstring. Similar to predictor, but adds a "Reasoning" output field to capture the model's step-by-step thinking.
+
+* **Input**: Function with input parameters and return type annotation.
+* **Output**: A dspy.Module instance capable of making predictions.
+
+### Example
+
+```python
+import dspy
+
+context = ["Roses are red.", "Violets are blue"]
+question = "What color are roses?"
+
+@dspy.cot
+def generate_answer(self, context: list[str], question) -> str:
+    """Answer questions with short factoid answers."""
+    pass
+
+generate_answer(context=context, question=question)
+```
diff --git a/docs/api/functional/dspy_predictor.md b/docs/api/functional/dspy_predictor.md
@@ -0,0 +1,30 @@
+---
+sidebar_position: 3
+---
+
+# dspy.predictor
+
+### Overview
+
+#### `def predictor(func) -> dspy.Module`
+
+The `@predictor` decorator is used to create a predictor module based on the provided function. It automatically generates a `dspy.TypedPredictor` and from the function's type annotations and docstring.
+
+* **Input**: Function with input parameters and return type annotation.
+* **Output**: A dspy.Module instance capable of making predictions.
+
+### Example
+
+```python
+import dspy
+
+context = ["Roses are red.", "Violets are blue"]
+question = "What color are roses?"
+
+@dspy.predictor
+def generate_answer(self, context: list[str], question) -> str:
+    """Answer questions with short factoid answers."""
+    pass
+
+generate_answer(context=context, question=question)
+```
diff --git a/docs/api/language_model_clients/_category_.json b/docs/api/language_model_clients/_category_.json
@@ -1,6 +1,6 @@
 {
     "label": "Language Model API Clients",
-    "position": 4,
+    "position": 5,
     "link": {
       "type": "generated-index",
       "description": "This documentation provides an overview of the DSPy Language Model Clients."

diff --git a/docs/api/modules/ProgramOfThought.md b/docs/api/modules/ProgramOfThought.md
@@ -82,16 +82,18 @@ Main method to execute the code generation and refinement process.
 
 ```python
 #Define a simple signature for basic question answering
-generate_answer_signature = dspy.Signature("question -> answer")
-generate_answer_signature.attach(question=("Question:", "")).attach(answer=("Answer:", "often between 1 and 5 words"))
+class GenerateAnswer(dspy.Signature):
+    """Answer questions with short factoid answers."""
+    question = dspy.InputField()
+    answer = dspy.OutputField(desc="often between 1 and 5 words")
 
 # Pass signature to ProgramOfThought Module
-pot = dspy.ProgramOfThought(generate_answer_signature)
+pot = dspy.ProgramOfThought(GenerateAnswer)
 
 #Call the ProgramOfThought module on a particular input
 question = 'Sarah has 5 apples. She buys 7 more apples from the store. How many apples does Sarah have now?'
 result = pot(question=question)
 
 print(f"Question: {question}")
 print(f"Final Predicted Answer (after ProgramOfThought process): {result.answer}")
-```
+```
diff --git a/docs/api/optimizers/_category_.json b/docs/api/optimizers/_category_.json
@@ -1,6 +1,6 @@
 {
     "label": "Optimizers",
-    "position": 2,
+    "position": 3,
     "link": {
         "type": "generated-index",
         "description": "Teleprompters are powerful optimizers (included in DSPy) that can learn to bootstrap and select effective prompts for the modules of any program. (The \"tele-\" in the name means \"at a distance\", i.e., automatic prompting at a distance.)\n\nThis documentation provides an overview of the DSPy Teleprompters."

diff --git a/docs/api/retrieval_model_clients/ChromadbRM.md b/docs/api/retrieval_model_clients/ChromadbRM.md
@@ -34,7 +34,7 @@ Search the chromadb collection for the top `k` passages matching the given query
 - `k` (_Optional[int]_, _optional_): The number of results to retrieve. If not specified, defaults to the value set during initialization.
 
 **Returns:**
-- `dspy.Prediction`: Contains the retrieved passages, each represented as a `dotdict` with a `long_text` attribute.
+- `dspy.Prediction`: Contains the retrieved passages, each represented as a `dotdict` with schema `[{"id": str, "score": float, "long_text": str, "metadatas": dict }]`
 
 ### Quickstart with OpenAI Embeddings