Merge branch 'stanfordnlp:main' into main

bakebrain · Mar 7, 2024 · 9ec79e6 · 9ec79e6
2 parents 189f858 + 91c9ddf
commit 9ec79e6
Show file tree

Hide file tree

Showing 14 changed files with 511 additions and 168 deletions.
diff --git a/.github/workflows/pr_comment.yml b/.github/workflows/pr_comment.yml
@@ -0,0 +1,37 @@
+name: Comment for PR
+
+on:
+  workflow_run:
+    workflows: ["Check for Ruff Fix, Test, and Build"]
+    types:
+      - completed
+
+jobs:
+  comment:
+    runs-on: ubuntu-latest
+    steps:
+      - name: "Download Ruff Fix Outcome Artifact"
+        uses: actions/download-artifact@v2
+        with:
+          name: ruff-fix-outcome
+          path: artifacts
+
+      - name: "Read Ruff Fix Outcome"
+        id: ruff_outcome
+        run: |
+          outcome=$(cat artifacts/ruff_fix_outcome.txt)
+          echo "RUFF_FIX_OUTCOME=$outcome" >> $GITHUB_ENV
+
+      - name: "Comment on PR if Ruff Fix Failed"
+        if: env.RUFF_FIX_OUTCOME == 'true'
+        uses: actions/github-script@v5
+        with:
+          script: |
+            const pr_number = ${{ github.event.workflow_run.pull_requests[0].number }};
+            const message = 'It seems like there are issues with the formatting. Please run `ruff check . --fix-only` and commit to address these issues.';
+            github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: pr_number,
+              body: message
+            });
diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
@@ -1,40 +1,60 @@
-name: Fix, Test, and Build
+name: Lint, Test, and Build
 
 on:
-  push:
-    branches:
-      - main
   pull_request:
+    types: [opened, synchronize, reopened]
 
 env:
   POETRY_VERSION: "1.6.1"
 
 jobs:
   fix:
-    name: Apply Ruff Fix
+    name: Check Ruff Fix
     runs-on: ubuntu-latest
     permissions:
       contents: write
+      pull-requests: write
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - uses: actions/setup-python@v5
-      - uses: chartboost/ruff-action@v1
+      - name: Ruff Fix Attempt
+        id: ruff_fix
+        uses: chartboost/ruff-action@v1
         with:
-          args: --fix-only
-      - uses: stefanzweifel/git-auto-commit-action@v5
+          args: --fix-only --exit-non-zero-on-fix
+        continue-on-error: true
+
+      - name: Determine Ruff Fix Outcome
+        run: |
+          if [ ${{ steps.ruff_fix.outcome }} == 'failure' ]; then
+            echo "RUFF_FAILED=true" >> $GITHUB_ENV
+            echo ${{ steps.ruff_fix.outcome }} > ruff_fix_outcome.txt
+          else
+            echo "RUFF_FAILED=false" >> $GITHUB_ENV
+            echo ${{ steps.ruff_fix.outcome }} > ruff_fix_outcome.txt
+          fi
+
+      - uses: actions/upload-artifact@v2
         with:
-          commit_message: "Automatic Style fixes"
+          name: ruff-fix-outcome
+          path: ruff_fix_outcome.txt
+
+      - name: Fail Workflow if Ruff Fix Failed
+        if: steps.ruff_fix.outcome == 'failure'
+        run: |
+          echo "Ruff fix failed, failing the workflow."
+          echo "Please run 'ruff check . --fix-only' locally and push the changes."
+          exit 1
 
   test:
     name: Run Tests
     runs-on: ubuntu-latest
     strategy:
       matrix:
         python-version: ["3.9"]
+    if: github.event_name == 'pull_request'
     steps:
       - uses: actions/checkout@v4
-        with:
-          ref: ${{ github.head_ref }}
       - name: Load cached Poetry installation
         id: cached-poetry
         uses: actions/cache@v3
@@ -64,10 +84,9 @@ jobs:
     strategy:
       matrix:
         python-version: ["3.9"]
+    if: github.event_name == 'pull_request'
     steps:
       - uses: actions/checkout@v4
-        with:
-          ref: ${{ github.head_ref }}
       - name: Load cached Poetry installation
         id: cached-poetry
         uses: actions/cache@v3
@@ -97,10 +116,9 @@ jobs:
     strategy:
       matrix:
         python-version: ["3.9"]
+    if: github.event_name == 'pull_request'
     steps:
       - uses: actions/checkout@v4
-        with:
-          ref: ${{ github.head_ref }}
       - name: Load cached Poetry installation
         id: cached-poetry
         uses: actions/cache@v3

diff --git a/docs/api/local_language_model_clients/HFModel.md b/docs/api/local_language_model_clients/HFModel.md
@@ -0,0 +1,7 @@
+# dspy.HFModel
+
+Initialize `HFModel` within your program with the desired model to load in. Here's an example call:
+
+```python
+llama = dspy.HFModel(model = 'meta-llama/Llama-2-7b-hf')
+```
diff --git a/docs/api/local_language_model_clients/MLC.md b/docs/api/local_language_model_clients/MLC.md
@@ -0,0 +1,41 @@
+# dspy.ChatModuleClient
+
+## Prerequisites
+
+1. Install the required packages using the following commands:
+
+   ```shell
+   pip install --no-deps --pre --force-reinstall mlc-ai-nightly-cu118 mlc-chat-nightly-cu118 -f https://mlc.ai/wheels
+   pip install transformers
+   git lfs install
+   ```
+
+   Adjust the pip wheels according to your OS/platform by referring to the provided commands in [MLC packages](https://mlc.ai/package/).
+
+## Running MLC Llama-2 models
+
+1. Create a directory for prebuilt models:
+
+   ```shell
+   mkdir -p dist/prebuilt
+   ```
+
+2. Clone the necessary libraries from the repository:
+
+   ```shell
+   git clone https://github.com/mlc-ai/binary-mlc-llm-libs.git dist/prebuilt/lib
+   cd dist/prebuilt
+   ```
+
+3. Choose a Llama-2 model from [MLC LLMs](https://huggingface.co/mlc-ai) and clone the model repository:
+
+   ```shell
+   git clone https://huggingface.co/mlc-ai/mlc-chat-Llama-2-7b-chat-hf-q4f16_1
+   ```
+
+4. Initialize the `ChatModuleClient` within your program with the desired parameters. Here's an example call:
+
+   ```python
+   llama = dspy.ChatModuleClient(model='dist/prebuilt/mlc-chat-Llama-2-7b-chat-hf-q4f16_1', model_path='dist/prebuilt/lib/Llama-2-7b-chat-hf-q4f16_1-cuda.so')
+   ```
+Please refer to the [official MLC repository](https://github.com/mlc-ai/mlc-llm) for more detailed information and [documentation](https://mlc.ai/mlc-llm/docs/get_started/try_out.html).
diff --git a/docs/api/local_language_model_clients/Ollama.md b/docs/api/local_language_model_clients/Ollama.md
@@ -0,0 +1,45 @@
+# dspy.OllamaLocal
+
+:::note
+Adapted from documentation provided by https://github.com/insop
+:::
+
+Ollama is a good software tool that allows you to run LLMs locally, such as Mistral, Llama2, and Phi.
+The following are the instructions to install and run Ollama.
+
+### Prerequisites
+
+Install Ollama by following the instructions from this page:
+
+- https://ollama.ai
+
+Download model: `ollama pull`
+
+Download a model by running the `ollama pull` command. You can download Mistral, Llama2, and Phi.
+
+```bash
+# download mistral
+ollama pull mistral
+```
+
+Here is the list of other models you can download:
+- https://ollama.ai/library
+
+### Running Ollama model
+
+Run model: `ollama run`
+
+You can test a model by running the model with the `ollama run` command.
+
+```bash
+# run mistral
+ollama run mistral
+```
+
+### Sending requests to the server
+
+Here is the code to load a model through Ollama:
+
+```python
+lm = dspy.OllamaLocal(model='mistral')
+```
diff --git a/docs/api/local_language_model_clients/TGI.md b/docs/api/local_language_model_clients/TGI.md
@@ -0,0 +1,61 @@
+# dspy.HFClientTGI
+
+## Prerequisites
+
+- Docker must be installed on your system. If you don't have Docker installed, you can get it from [here](https://docs.docker.com/get-docker/).
+
+## Setting up the Text-Generation-Inference Server
+
+1. Clone the Text-Generation-Inference repository from GitHub by executing the following command:
+
+   ```
+   git clone https://github.com/huggingface/text-generation-inference.git
+   ```
+
+2. Change into the cloned repository directory:
+
+   ```
+   cd text-generation-inference
+   ```
+
+3. Execute the Docker command under the "Get Started" section to run the server:
+
+
+   ```
+   model=meta-llama/Llama-2-7b-hf # set to the specific Hugging Face model ID you wish to use.
+   num_shard=2 # set to the number of shards you wish to use.
+   volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
+
+   docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:0.9 --model-id $model --num-shard $num_shard
+   ```
+
+   This command will start the server and make it accessible at `http://localhost:8080`.
+
+If you want to connect to [Meta Llama 2 models](https://huggingface.co/meta-llama), make sure to use version 9.3 (or higher) of the docker image (ghcr.io/huggingface/text-generation-inference:0.9.3) and pass in your huggingface token as an environment variable.
+
+```
+   docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data -e HUGGING_FACE_HUB_TOKEN={your_token} ghcr.io/huggingface/text-generation-inference:0.9.3 --model-id $model --num-shard $num_shard
+```
+
+## Sending requests to the server
+
+After setting up the text-generation-inference server and ensuring that it displays "Connected" when it's running, you can interact with it using the `HFClientTGI`.
+
+Initialize the `HFClientTGI` within your program with the desired parameters. Here is an example call:
+
+   ```python
+   lm = dspy.HFClientTGI(model="meta-llama/Llama-2-7b-hf", port=8080, url="http://localhost")
+   ```
+
+   Customize the `model`, `port`, and `url` according to your requirements. The `model` parameter should be set to the specific Hugging Face model ID you wish to use. 
+
+
+### FAQs
+
+1. If your model doesn't require any shards, you still need to set a value for `num_shard`, but you don't need to include the parameter `--num-shard` on the command line.
+
+2. If your model runs into any "token exceeded" issues, you can set the following parameters on the command line to adjust the input length and token limit:
+   - `--max-input-length`: Set the maximum allowed input length for the text.
+   - `--max-total-tokens`: Set the maximum total tokens allowed for text generation.
+
+Please refer to the [official Text-Generation-Inference repository](https://github.com/huggingface/text-generation-inference) for more detailed information and documentation.
diff --git a/docs/api/local_language_model_clients/_category_.json b/docs/api/local_language_model_clients/_category_.json
@@ -0,0 +1,8 @@
+{
+    "label": "Local Language Model Clients",
+    "position": 6,
+    "link": {
+      "type": "generated-index",
+      "description": "DSPy supports various methods including `built-in wrappers`, `server integration`, and `external package integration` for model loading. This documentation provides a concise introduction on how to load in models within DSPy extending these capabilities for your specific needs."
+    }
+}
diff --git a/docs/api/local_language_model_clients/vLLM.md b/docs/api/local_language_model_clients/vLLM.md
@@ -0,0 +1,31 @@
+# dspy.HFClientVLLM
+
+### Setting up the vLLM Server
+
+Follow these steps to set up the vLLM Server:
+
+1. Build the server from source by following the instructions provided in the [Build from Source guide](https://vllm.readthedocs.io/en/latest/getting_started/installation.html#build-from-source).
+
+2. Start the server by running the following command, and specify your desired model, host, and port using the appropriate arguments. The default server address is http://localhost:8000.
+
+Example command:
+
+```bash
+   python -m vllm.entrypoints.openai.api_server --model mosaicml/mpt-7b --port 8000
+```
+
+This will launch the vLLM server.
+
+### Sending requests to the server
+
+After setting up the vLLM server and ensuring that it displays "Connected" when it's running, you can interact with it using the `HFClientVLLM`.
+
+Initialize the `HFClientVLLM` within your program with the desired parameters. Here is an example call:
+
+```python
+   lm = dspy.HFClientVLLM(model="mosaicml/mpt-7b", port=8000, url="http://localhost")
+```
+
+Customize the `model`, `port`, `url`, and `max_tokens` according to your requirements. The `model` parameter should be set to the specific Hugging Face model ID you wish to use.
+
+Please refer to the [official vLLM repository](https://github.com/vllm-project/vllm) for more detailed information and documentation.
diff --git a/docs/docs/cheatsheet.md b/docs/docs/cheatsheet.md
@@ -282,8 +282,13 @@ your_dspy_program_compiled = fewshot_optimizer.compile(student = your_dspy_progr
 
 #### Compiling a compiled program - bootstrapping a bootstraped program
 
-your_dspy_program_compiledx2 = teleprompter.compile(your_dspy_program, teacher=your_dspy_program_compiled, trainset=trainset)
-
+```python
+your_dspy_program_compiledx2 = teleprompter.compile(
+    your_dspy_program,
+    teacher=your_dspy_program_compiled,
+    trainset=trainset,
+)
+```
 
 ### dspy.BootstrapFewShotWithRandomSearch
 
@@ -364,6 +369,20 @@ kwargs = dict(num_threads=NUM_THREADS, display_progress=True, display_table=0)
 compiled_program_optimized_bayesian_signature = teleprompter.compile(your_dspy_program, devset=devset[:DEV_NUM], optuna_trials_num=100, max_bootstrapped_demos=3, max_labeled_demos=5, eval_kwargs=kwargs)
 ```
 
+### Signature Optimizer with Types
+
+```python
+from dspy.teleprompt.signature_opt_typed import optimize_signature
+from dspy.evaluate.metrics import answer_exact_match
+from dspy.functional import TypedChainOfThought
+
+compiled_program = optimize_signature(
+    student=TypedChainOfThought("question -> answer"),
+    evaluator=Evaluate(devset=devset, metric=answer_exact_match, num_threads=10, display_progress=True),
+    n_iterations=50,
+).program
+```
+
 ### dspy.KNNFewShot
 
 ```python

diff --git a/dsp/modules/azure_openai.py b/dsp/modules/azure_openai.py
@@ -107,9 +107,6 @@ def __init__(
             kwargs["model"] = model
 
         self.kwargs = {
-            "api_base": api_base,
-            "api_version": api_version,
-            "api_key": api_key,
             "temperature": 0.0,
             "max_tokens": 150,
             "top_p": 1,