From 3b654d7bf5a7d43908dd51318ebfb43a104e8ad3 Mon Sep 17 00:00:00 2001
From: Omar Khattab <omar.a.khattab@gmail.com>
Date: Thu, 4 Jan 2024 21:56:03 +0000
Subject: [PATCH] Initial new docs

---
 README.md                                     | 117 +++---
 docs/guides/README.md                         |   0
 docs/guides/assertions.ipynb                  |  77 ++++
 .../language_model_details/launching_mlc.md   |  48 +++
 .../language_model_details/launching_tgi.md   |  60 ++++
 .../language_model_details/launching_vllm.md  |  31 ++
 docs/guides/language_models.ipynb             | 254 +++++++++++++
 docs/guides/metrics.ipynb                     |  25 ++
 docs/guides/modules.ipynb                     | 287 +++++++++++++++
 docs/guides/optimizers.ipynb                  | 168 +++++++++
 docs/guides/retrieval_models.ipynb            |  40 +++
 docs/guides/signatures.ipynb                  | 334 ++++++++++++++++++
 dspy/__init__.py                              |   3 +
 13 files changed, 1379 insertions(+), 65 deletions(-)
 create mode 100644 docs/guides/README.md
 create mode 100644 docs/guides/assertions.ipynb
 create mode 100644 docs/guides/language_model_details/launching_mlc.md
 create mode 100644 docs/guides/language_model_details/launching_tgi.md
 create mode 100644 docs/guides/language_model_details/launching_vllm.md
 create mode 100644 docs/guides/language_models.ipynb
 create mode 100644 docs/guides/metrics.ipynb
 create mode 100644 docs/guides/modules.ipynb
 create mode 100644 docs/guides/optimizers.ipynb
 create mode 100644 docs/guides/retrieval_models.ipynb
 create mode 100644 docs/guides/signatures.ipynb
diff --git a/README.md b/README.md
index fe5adcdc2..3f4f3b074 100644
--- a/README.md
+++ b/README.md
@@ -6,9 +6,12 @@
 
 ## DSPy: _Programming_—not prompting—Foundation Models
 
-Paper —— **[DSPy: Compiling Declarative Language Model Calls into Self-Improving Pipelines](https://arxiv.org/abs/2310.03714)**
+Main Paper (Oct 2023) —— **[DSPy: Compiling Declarative Language Model Calls into Self-Improving Pipelines](https://arxiv.org/abs/2310.03714)**     
+New Features (Dec 2023) —— **[DSPy Assertions: Computational Constraints for Self-Refining Language Model Pipelines](https://arxiv.org/abs/2312.13382)**     
+Old Abstractions (Dec 2022) —— **[Demonstrate-Search-Predict: Composing Retrieval and Language Models for Knowledge-Intensive NLP](https://arxiv.org/abs/2212.14024.pdf)**
 
-[<img align="center" src="https://colab.research.google.com/assets/colab-badge.svg" />](https://colab.research.google.com/github/stanfordnlp/dspy/blob/main/intro.ipynb)
+
+**Getting Started:** &nbsp; [<img align="center" src="https://colab.research.google.com/assets/colab-badge.svg" />](https://colab.research.google.com/github/stanfordnlp/dspy/blob/main/intro.ipynb)
 
 
 **DSPy** is the framework for solving advanced tasks with language models (LMs) and retrieval models (RMs). **DSPy** unifies techniques for **prompting** and **fine-tuning** LMs — and approaches for **reasoning**, **self-improvement**, and **augmentation with retrieval and tools**. All of these are expressed through modules that compose and learn.
@@ -31,9 +34,9 @@ If you want to see **DSPy** in action, **[open our intro tutorial notebook](intr
 
 
 1. **[Installation](#1-installation)**
-1. **[Framework Syntax](#2-syntax-youre-in-charge-of-the-workflowits-free-form-python-code)**
-1. **[Compiling: Two Powerful Concepts](#3-two-powerful-concepts-signatures--teleprompters)**
-1. **[Tutorials & Documentation](#4-documentation--tutorials)**
+1. **[Tutorials & Documentation](#2-documentation)**
+1. **[Framework Syntax](#3-syntax-youre-in-charge-of-the-workflowits-free-form-python-code)**
+1. **[Compiling: Two Powerful Concepts](#4-two-powerful-concepts-signatures--teleprompters)**
 1. **[FAQ: Is DSPy right for me?](#5-faq-is-dspy-right-for-me)**
 
 
@@ -66,7 +69,44 @@ For the optional Pinecone, Qdrant, [chromadb](https://github.com/chroma-core/chr
 pip install dspy-ai[pinecone]  # or [qdrant] or [chromadb] or [marqo]
 ```
 
-## 2) Syntax: You're in charge of the workflow—it's free-form Python code!
+## 2) Documentation
+
+The DSPy documentation is divided into **tutorials**, **guides**, and **examples**.
+
+### A) Tutorials:
+
+DSPy Tutorials illustate how to start from scratch and build a powerful DSPy program on a specific task.
+
+If you're new here, start with **[our intro tutorial](intro.ipynb)**. We recommend you open it directly in free Google Colab: [<img align="center" src="https://colab.research.google.com/assets/colab-badge.svg" />](https://colab.research.google.com/github/stanfordnlp/dspy/blob/main/intro.ipynb)
+
+
+### B) Guides:
+
+Each DSPy guide presents how to use specific parts of the library.
+
+You will probably refer to these guides frequently, first to understand each concept in DSPy in isolation and then to copy/paste snippets that you can edit for your work. If you're new to DSPy, it's _usually_ best to go in this order. It's not necessary, though.
+
+1. **[DSPy Signatures](docs/guides/signatures.ipynb)**
+
+2. **[Language Models](docs/guides/language_models.ipynb)** and **[Retrieval Models](docs/guides/retrieval_models.ipynb)**
+
+3. **[DSPy Modules](docs/guides/modules.ipynb)**
+
+4. **[DSPy Optimizers](docs/guides/optimizers.ipynb)**
+
+5. **[DSPy Metrics](docs/guides/metrics.ipynb)**
+
+6. **[DSPy Assertions](docs/guides/assertions.ipynb)**
+
+
+### C) Examples:
+
+These are self-contained programs that illustrate how to build programs with DSPy.
+
+You can find many individual **examples** inside _and outside!_ the `examples/` folder and a bunch others tweeted by [@lateinteraction](https://twitter.com/lateinteraction) on Twitter/X.
+
+
+## 3) Syntax: You're in charge of the workflow—it's free-form Python code!
 
 **DSPy** hides tedious prompt engineering, but it cleanly exposes the important decisions you need to make: **[1]** what's your system design going to look like? **[2]** what are the important constraints on the behavior of your program?
 
@@ -108,12 +148,12 @@ The next section will discuss how to compile our simple `RAG` program. When we c
 If you later decide you need another step in your pipeline, just add another module and compile again. Maybe add a module that takes the chat history into account during search?
 
 
-## 3) Two Powerful Concepts: Signatures & Teleprompters
+## 4) Two Powerful Concepts: Signatures & Teleprompters
 
 To make it possible to compile any program you write, **DSPy** introduces two simple concepts: Signatures and Teleprompters.
 
 
-#### 3.a) Declaring the input/output behavior of LMs with `dspy.Signature`
+#### 4.a) Declaring the input/output behavior of LMs with `dspy.Signature`
 
 When we assign tasks to LMs in **DSPy**, we specify the behavior we need as a **Signature**. A signature is a declarative specification of input/output behavior of a **DSPy module**.
 
@@ -151,7 +191,7 @@ self.generate_answer = dspy.ChainOfThought(GenerateSearchQuery)
 You can optionally provide a `prefix` and/or `desc` key for each input or output field to refine or constraint the behavior of modules using your signature. The description of the sub-task itself is specified as the docstring (i.e., `"""Write a simple..."""`).
 
 
-#### 3.b) Asking **DSPy** to automatically optimize your program with `dspy.teleprompt.*`
+#### 4.b) Asking **DSPy** to automatically optimize your program with `dspy.teleprompt.*`
 
 After defining the `RAG` program, we can **compile** it. Compiling a program will update the parameters stored in each module. For large LMs, this is primarily in the form of creating and validating good demonstrations for inclusion in your prompt(s).
 
@@ -196,61 +236,6 @@ compiled_rag = teleprompter.compile(RAG(), trainset=my_rag_trainset)
 If we now use `compiled_rag`, it will invoke our LM with rich prompts with few-shot demonstrations of chain-of-thought retrieval-augmented question answering on our data.
 
 
-## 4) Documentation & Tutorials
-
-While we work on new tutorials, please check out **[our intro notebook](intro.ipynb)**.
-
-Or open it directly in free Google Colab: [<img align="center" src="https://colab.research.google.com/assets/colab-badge.svg" />](https://colab.research.google.com/github/stanfordnlp/dspy/blob/main/intro.ipynb)
-
-For module documentation, please refer to our [documentation folder](https://github.com/stanfordnlp/dspy/tree/main/docs).
-
-
-#### Language Model Clients
-
-- [`dspy.OpenAI`](docs/language_models_client.md#openai)
-- [`dspy.Cohere`](docs/language_models_client.md#cohere)
-- [`dspy.TGI`](docs/language_models_client.md#tgi)
-- [`dspy.VLLM`](docs/language_models_client.md#vllm)
-
-#### Retrieval Model Clients
-
-- [`dspy.ColBERTv2`](docs/retrieval_models_client.md#colbertv2)
-- [`dspy.AzureCognitiveSearch`](docs/retrieval_models_client.md#azurecognitivesearch)
-- `dspy.Pyserini`
-- `dspy.Pinecone`
-- `dspy.Qdrant`
-- `dspy.Chromadb`
-- `dspy.Marqo`
-
-#### Signatures
-
-- `dspy.Signature`
-- `dspy.InputField`
-- `dspy.OutputField`
-
-#### Modules
-
-- [`dspy.Predict`](docs/modules.md#dspypredict)
-- [`dspy.Retrieve`](docs/modules.md#dspyretrieve)
-- [`dspy.ChainOfThought`](docs/modules.md#dspychainofthought)
-- `dspy.ProgramOfThought`
-- [`dspy.ReAct`](docs/modules.md#dspyreact)
-- [`dspy.MultiChainComparison`](docs/modules.md#dspymultichaincomparison)
-- `dspy.SelfCritique` [coming soon]
-- `dspy.SelfRevision` [coming soon]
-- `dspy.majority` (self-consistency)
-
-  
-#### Teleprompters
-
-- [`dspy.teleprompt.LabeledFewShot`](docs/teleprompters.md#telepromptlabeledfewshot)
-- [`dspy.teleprompt.BootstrapFewShot`](docs/teleprompters.md#telepromptbootstrapfewshot)
-- [`dspy.teleprompt.BootstrapFewShotWithRandomSearch`](docs/teleprompters.md#telepromptbootstrapfewshotwithrandomsearch)
-- `dspy.teleprompt.LabeledFinetune` [coming soon]
-- [`dspy.teleprompt.BootstrapFinetune`](docs/teleprompters.md#telepromptbootstrapfinetune)
-- [`dspy.teleprompt.Ensemble`](docs/teleprompters.md#telepromptensemble)
-- `dspy.teleprompt.kNN`
-
 
 
 ## 5) FAQ: Is DSPy right for me?
@@ -341,7 +326,9 @@ If you use DSPy or DSP in a research paper, please cite our work as follows:
 ```
 
 You can also read more about the evolution of the framework from Demonstrate-Search-Predict to DSPy:
-* [**DSPy: Compiling Declarative Language Model Calls into Self-Improving Pipelines**](https://arxiv.org/abs/2310.03714) (Academic Paper, Oct 2023)
+
+* [**DSPy Assertions: Computational Constraints for Self-Refining Language Model Pipelines**](https://arxiv.org/abs/2312.13382)   (Academic Paper, Dec 2023) 
+* [**DSPy: Compiling Declarative Language Model Calls into Self-Improving Pipelines**](https://arxiv.org/abs/2310.03714) (Academic Paper, Oct 2023) 
 * [**Releasing DSPy, the latest iteration of the framework**](https://twitter.com/lateinteraction/status/1694748401374490946) (Twitter Thread, Aug 2023)
 * [**Releasing the DSP Compiler (v0.1)**](https://twitter.com/lateinteraction/status/1625231662849073160)  (Twitter Thread, Feb 2023)
 * [**Introducing DSP**](https://twitter.com/lateinteraction/status/1617953413576425472)  (Twitter Thread, Jan 2023)
diff --git a/docs/guides/README.md b/docs/guides/README.md
new file mode 100644
index 000000000..e69de29bb
diff --git a/docs/guides/assertions.ipynb b/docs/guides/assertions.ipynb
new file mode 100644
index 000000000..711e6e3d6
--- /dev/null
+++ b/docs/guides/assertions.ipynb
@@ -0,0 +1,77 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "import sys; sys.path.append('/future/u/okhattab/repos/public/tmp/dspy')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<img src=\"../../docs/images/DSPy8.png\" alt=\"DSPy7 Image\" height=\"150\"/>\n",
+    "\n",
+    "## Guide: **DSPy Assertions**\n",
+    "\n",
+    "[<img align=\"center\" src=\"https://colab.research.google.com/assets/colab-badge.svg\" />](https://colab.research.google.com/github/stanfordnlp/dspy/blob/main/docs/guides/signatures.ipynb)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Quick Recap\n",
+    "\n",
+    "This guide assumes you followed the [intro tutorial]() to build your first few DSPy programs.\n",
+    "\n",
+    "Remember that a **DSPy program** is just Python code that calls one or more DSPy modules, like `dspy.Predict` or `dspy.ChainOfThought`, to use LMs."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1) What is a DSPy Assertion?\n",
+    "\n",
+    "While we prepare this guide, please [read the DSPy assertions paper](https://arxiv.org/abs/2312.13382) and follow the examples in it."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Install `dspy-ai` if needed. Then set up a default language model.\n",
+    "# TODO: Add a graceful line for OPENAI_API_KEY.\n",
+    "\n",
+    "try: import dspy\n",
+    "except ImportError:\n",
+    "    %pip install dspy-ai\n",
+    "    import dspy\n",
+    "\n",
+    "dspy.configure(lm=dspy.OpenAI(model='gpt-3.5-turbo-1106'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/docs/guides/language_model_details/launching_mlc.md b/docs/guides/language_model_details/launching_mlc.md
new file mode 100644
index 000000000..87bf65d0d
--- /dev/null
+++ b/docs/guides/language_model_details/launching_mlc.md
@@ -0,0 +1,48 @@
+## Setting up an MLC language model
+
+### Prerequisites
+
+Install the required packages using the following commands:
+   
+```shell
+pip install --no-deps --pre --force-reinstall mlc-ai-nightly-cu118 mlc-chat-nightly-cu118 -f https://mlc.ai/wheels
+pip install transformers
+git lfs install
+```
+
+Adjust the pip wheels according to your OS/platform by referring to the provided commands in [MLC packages](https://mlc.ai/package/).
+
+
+### Running MLC Llama-2 models
+
+1. Create a directory for prebuilt models:
+
+```shell
+mkdir -p dist/prebuilt
+```
+
+2. Clone the necessary libraries from the repository:
+
+```shell
+git clone https://github.com/mlc-ai/binary-mlc-llm-libs.git dist/prebuilt/lib
+cd dist/prebuilt
+```
+
+3. Choose a Llama-2 model from [MLC LLMs](https://huggingface.co/mlc-ai) and clone the model repository:
+
+```shell
+git clone https://huggingface.co/mlc-ai/mlc-chat-Llama-2-7b-chat-hf-q4f16_1
+```
+
+### Sending requests to the server
+
+Initialize the `ChatModuleClient` within your program with the desired parameters. Here's an example call:
+
+```python
+model = 'dist/prebuilt/mlc-chat-Llama-2-7b-chat-hf-q4f16_1'
+model_path = 'dist/prebuilt/lib/Llama-2-7b-chat-hf-q4f16_1-cuda.so'
+
+llama = dspy.ChatModuleClient(model=model, model_path=model_path)
+```
+
+Please refer to the [official MLC repository](https://github.com/mlc-ai/mlc-llm) for more detailed [docs](https://mlc.ai/mlc-llm/docs/get_started/try_out.html).
diff --git a/docs/guides/language_model_details/launching_tgi.md b/docs/guides/language_model_details/launching_tgi.md
new file mode 100644
index 000000000..d45c2dea5
--- /dev/null
+++ b/docs/guides/language_model_details/launching_tgi.md
@@ -0,0 +1,60 @@
+## Launching a Text Generation Inference (TGI) Server
+
+### Prerequisites
+
+- Docker must be installed on your system. If you don't have Docker installed, you can get it from [here](https://docs.docker.com/get-docker/).
+
+### Setting up the Text-Generation-Inference Server
+
+1. Clone the Text-Generation-Inference repository from GitHub by executing the following command:
+
+```bash
+git clone https://github.com/huggingface/text-generation-inference.git
+```
+
+2. Change into the cloned repository directory:
+
+```bash
+cd text-generation-inference
+```
+
+3. Execute the Docker command under the "Get Started" section to run the server:
+
+```bash
+model=meta-llama/Llama-2-7b-hf # set to the specific Hugging Face model ID you wish to use.
+num_shard=1 # set to the number of shards you wish to use.
+volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
+
+docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:latest --model-id $model --num-shard $num_shard
+```
+
+This command will start the server and make it accessible at `http://localhost:8080`.
+
+If you want to connect to [Meta Llama 2 models](https://huggingface.co/meta-llama), make sure to use version 9.3 (or higher) of the docker image (ghcr.io/huggingface/text-generation-inference:0.9.3) and pass in your huggingface token as an environment variable.
+
+```bash
+docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data -e HUGGING_FACE_HUB_TOKEN={your_token} ghcr.io/huggingface/text-generation-inference:latest --model-id $model --num-shard $num_shard
+```
+
+### Sending requests to the server
+
+After setting up the text-generation-inference server and ensuring that it displays "Connected" when it's running, you can interact with it using the `HFClientTGI`.
+
+Initialize the `HFClientTGI` within your program with the desired parameters. Here is an example call:
+
+   ```python
+   lm = dspy.HFClientTGI(model="meta-llama/Llama-2-7b-hf", port=8080, url="http://localhost")
+   ```
+
+   Customize the `model`, `port`, and `url` according to your requirements. The `model` parameter should be set to the specific Hugging Face model ID you wish to use. 
+
+
+### FAQs
+
+1. If your model doesn't require any shards, you still need to set a value for `num_shard`, but you don't need to include the parameter `--num-shard` on the command line.
+
+2. If your model runs into any "token exceeded" issues, you can set the following parameters on the command line to adjust the input length and token limit:
+   - `--max-input-length`: Set the maximum allowed input length for the text.
+   - `--max-total-tokens`: Set the maximum total tokens allowed for text generation.
+
+Please refer to the [official TGI repository](https://github.com/huggingface/text-generation-inference) for detailed docs.
diff --git a/docs/guides/language_model_details/launching_vllm.md b/docs/guides/language_model_details/launching_vllm.md
new file mode 100644
index 000000000..4ac911f85
--- /dev/null
+++ b/docs/guides/language_model_details/launching_vllm.md
@@ -0,0 +1,31 @@
+## Launching a vLLM Server
+
+### Setting up the vLLM Server
+
+Follow these steps to set up the vLLM Server:
+
+1. Build the server from source by following the instructions provided in the [Build from Source guide](https://vllm.readthedocs.io/en/latest/getting_started/installation.html#build-from-source).
+
+2. Start the server by running the following command, and specify your desired model, host, and port using the appropriate arguments. The default server address is http://localhost:8000.
+
+Example command:
+
+```bash
+   python -m vllm.entrypoints.api_server --model mosaicml/mpt-7b --port 8000
+```
+
+This will launch the vLLM server.
+
+### Sending requests to the server
+
+After setting up the vLLM server and ensuring that it displays "Connected" when it's running, you can interact with it using the `HFClientVLLM`.
+
+Initialize the `HFClientVLLM` within your program with the desired parameters. Here is an example call:
+
+```python
+   lm = dspy.HFClientVLLM(model="mosaicml/mpt-7b", port=8000, url="http://localhost")
+```
+
+Customize the `model`, `port`, `url`, and `max_tokens` according to your requirements. The `model` parameter should be set to the specific Hugging Face model ID you wish to use.
+
+Please refer to the [official vLLM repository](https://github.com/vllm-project/vllm) for more detailed information and documentation.
diff --git a/docs/guides/language_models.ipynb b/docs/guides/language_models.ipynb
new file mode 100644
index 000000000..eaf9f2003
--- /dev/null
+++ b/docs/guides/language_models.ipynb
@@ -0,0 +1,254 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "import sys; sys.path.append('/future/u/okhattab/repos/public/tmp/dspy')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<img src=\"../../docs/images/DSPy8.png\" alt=\"DSPy7 Image\" height=\"150\"/>\n",
+    "\n",
+    "## Guide: **Language Models**\n",
+    "\n",
+    "[<img align=\"center\" src=\"https://colab.research.google.com/assets/colab-badge.svg\" />](https://colab.research.google.com/github/stanfordnlp/dspy/blob/main/docs/guides/signatures.ipynb)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Quick Recap\n",
+    "\n",
+    "This guide assumes you followed the [intro tutorial]() to build your first few DSPy programs.\n",
+    "\n",
+    "Remember that a **DSPy program** is just Python code that calls one or more DSPy modules, like `dspy.Predict` or `dspy.ChainOfThought`, to use LMs."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1) Short Intro to LMs in DSPy\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Install `dspy-ai` if needed.\n",
+    "\n",
+    "try: import dspy\n",
+    "except ImportError:\n",
+    "    %pip install dspy-ai\n",
+    "    import dspy"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2) Supported LM clients.\n",
+    "\n",
+    "#### Remote LMs.\n",
+    "\n",
+    "These models are managed services. You just need to sign up and obtain an API key.\n",
+    "\n",
+    "1. `dspy.OpenAI` for GPT-3.5 and GPT-4.\n",
+    "\n",
+    "2. `dspy.Cohere`\n",
+    "\n",
+    "3. `dspy.Anyscale` for hosted Llama2 models.\n",
+    "\n",
+    "\n",
+    "\n",
+    "#### Local LMs.\n",
+    "\n",
+    "You need to host these models on your own GPU(s). Below, we include pointers for how to do that.\n",
+    "\n",
+    "4. `dspy.HFClientTGI`: for HuggingFace models through the Text Generation Inference (TGI) system. [Tutorial: How do I install and launch the TGI server?](language_model_details/launching_tgi.md)\n",
+    "\n",
+    "5. `dspy.HFClientVLLM`: for HuggingFace models through vLLM. [Tutorial: How do I install and launch the vLLM server?](language_model_details/launching_vllm.md)\n",
+    "\n",
+    "6. `dspy.HFModel` (experimental)\n",
+    "\n",
+    "7. `dspy.Ollama` (experimental)\n",
+    "\n",
+    "8. `dspy.ChatModuleClient` (experimental): [How do I install and use MLC?](language_model_details/launching_mlc.md)\n",
+    "\n",
+    "\n",
+    "\n",
+    "If there are other clients you want added, let us know!"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3) Setting up the LM client.\n",
+    "\n",
+    "You can just call the constructor that connects to the LM. Then, use `dspy.configure` to declare this as the default LM.\n",
+    "\n",
+    "For example, for OpenAI, you can do it as follows."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# TODO: Add a graceful line for OPENAI_API_KEY.\n",
+    "\n",
+    "gpt3_turbo = dspy.OpenAI(model='gpt-3.5-turbo-1106', max_tokens=300)\n",
+    "gpt4_turbo = dspy.OpenAI(model='gpt-4-1106-preview', max_tokens=300)\n",
+    "\n",
+    "# cohere = dspy.Cohere(...)\n",
+    "# anyscale = dspy.Anyscale(...)\n",
+    "# tgi_llama2 = dspy.HFClientTGI(model=\"meta-llama/Llama-2-7b-hf\", port=8080, url=\"http://localhost\")\n",
+    "\n",
+    "dspy.configure(lm=gpt3_turbo)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4) Using a different LM within a code block.\n",
+    "\n",
+    "The default LM above is GPT-3.5, `gpt3_turbo`. What if I want to run a piece of code with, say, GPT-4 or LLama-2?\n",
+    "\n",
+    "Instead of changing the default LM, you can just change it inside a block of code.\n",
+    "\n",
+    "**Tip:** Using `dspy.configure` and `dspy.context` is thread-safe!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The castle David Gregory inherited has 7 floors.\n",
+      "The number of floors in the castle David Gregory inherited cannot be determined with the information provided.\n"
+     ]
+    }
+   ],
+   "source": [
+    "qa = dspy.ChainOfThought('question -> answer')\n",
+    "\n",
+    "response = qa(question=\"How many floors are in the castle David Gregory inherited?\")\n",
+    "print(response.answer)\n",
+    "\n",
+    "with dspy.context(lm=gpt4_turbo):\n",
+    "    response = qa(question=\"How many floors are in the castle David Gregory inherited?\")\n",
+    "    print(response.answer)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5) Tips and Tricks.\n",
+    "\n",
+    "In DSPy, all LM calls are cached. If you repeat the same call, you will get the same outputs. (If you change the inputs or configurations, you will get new outputs.)\n",
+    "\n",
+    "To generate 5 outputs, you can use `n=5` in the module constructor, or pass `config=dict(n=5)` when invoking the module."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[\"The specific number of floors in David Gregory's inherited castle is not provided here, so further research would be needed to determine the answer.\",\n",
+       " 'The castle David Gregory inherited has 4 floors.',\n",
+       " 'The castle David Gregory inherited has 5 floors.',\n",
+       " 'David Gregory inherited 10 floors in the castle.',\n",
+       " 'The castle David Gregory inherited has 5 floors.']"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "qa = dspy.ChainOfThought('question -> answer', n=5)\n",
+    "\n",
+    "response = qa(question=\"How many floors are in the castle David Gregory inherited?\")\n",
+    "response.completions.answer"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If you just call `qa(...)` in a loop with the same input, it will always return the same value! That's by design.\n",
+    "\n",
+    "To loop and generate one output at a time with the same input, bypass the cache by making sure each request is (slightly) unique, as below."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The specific number of floors in David Gregory's inherited castle is not provided here, so further research would be needed to determine the answer.\n",
+      "It is not possible to determine the exact number of floors in the castle David Gregory inherited without specific information about the castle's layout and history.\n",
+      "The castle David Gregory inherited has 5 floors.\n",
+      "We need more information to determine the number of floors in the castle David Gregory inherited.\n",
+      "The castle David Gregory inherited has a total of 6 floors.\n"
+     ]
+    }
+   ],
+   "source": [
+    "for idx in range(5):\n",
+    "    response = qa(question=\"How many floors are in the castle David Gregory inherited?\", config=dict(temperature=0.7+0.0001*idx))\n",
+    "    print(response.answer)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "py39_nov2023",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/docs/guides/metrics.ipynb b/docs/guides/metrics.ipynb
new file mode 100644
index 000000000..70b5bf290
--- /dev/null
+++ b/docs/guides/metrics.ipynb
@@ -0,0 +1,25 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Coming soon."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/docs/guides/modules.ipynb b/docs/guides/modules.ipynb
new file mode 100644
index 000000000..e75531241
--- /dev/null
+++ b/docs/guides/modules.ipynb
@@ -0,0 +1,287 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "import sys; sys.path.append('/future/u/okhattab/repos/public/tmp/dspy')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<img src=\"../../docs/images/DSPy8.png\" alt=\"DSPy7 Image\" height=\"150\"/>\n",
+    "\n",
+    "## Guide: **DSPy Modules**\n",
+    "\n",
+    "[<img align=\"center\" src=\"https://colab.research.google.com/assets/colab-badge.svg\" />](https://colab.research.google.com/github/stanfordnlp/dspy/blob/main/docs/guides/signatures.ipynb)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Quick Recap\n",
+    "\n",
+    "This guide assumes you followed the [intro tutorial]() to build your first few DSPy programs.\n",
+    "\n",
+    "Remember that **DSPy program** is just Python code that calls one or more **DSPy modules**, like `dspy.Predict` or `dspy.ChainOfThought`, to use LMs."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1) What is a DSPy Module?\n",
+    "\n",
+    "A **DSPy module** is a building block for programs that use LMs.\n",
+    "\n",
+    "- Each built-in module abstracts a **prompting technique** (like chain of thought or ReAct). Crucially, they are generalized to handle any [DSPy Signature]().\n",
+    "\n",
+    "- A DSPy module has **learnable parameters** (i.e., the little pieces comprising the prompt and the LM weights) and can be invoked (called) to process inputs and return outputs.\n",
+    "\n",
+    "- Multiple modules can be composed into bigger modules (programs). DSPy modules are inspired directly by NN modules in PyTorch, but applied to LM programs."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2) Why should I use a DSPy Module?\n",
+    "\n",
+    "TODO. I typically take this as self-evident, but I'll spell it out here."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Install `dspy-ai` if needed. Then set up a default language model.\n",
+    "# TODO: Add a graceful line for OPENAI_API_KEY.\n",
+    "\n",
+    "try: import dspy\n",
+    "except ImportError:\n",
+    "    %pip install dspy-ai\n",
+    "    import dspy\n",
+    "\n",
+    "dspy.configure(lm=dspy.OpenAI(model='gpt-3.5-turbo-1106'))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3) What DSPy Modules are currently built-in?\n",
+    "\n",
+    "1. **`dspy.Predict`**:\n",
+    "\n",
+    "2. **`dspy.ChainOfThought`**: \n",
+    "\n",
+    "3. **`dspy.ProgramOfThought`**:\n",
+    "\n",
+    "4. **`dspy.ReAct`**:\n",
+    "\n",
+    "5. **`dspy.MultiChainComparison`**:\n",
+    "\n",
+    "\n",
+    "We also have some function-style modules:\n",
+    "\n",
+    "6. **`dspy.majority`**:"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4) How do I use a built-in module, like `dspy.Predict` or `dspy.ChainOfThought`?\n",
+    "\n",
+    "Let's start with the most fundamental one, `dspy.Predict`. Internally, all of the others are just built using it!\n",
+    "\n",
+    "We'll assume you are already at least a little familiar with [DSPy signatures](), which are declarative specs for defining the behavior of any module we use in DSPy.\n",
+    "\n",
+    "To use a module, we first **declare** it by giving it a signature. Then we **call** the module with the input arguments, and extract the output fields!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Positive\n"
+     ]
+    }
+   ],
+   "source": [
+    "sentence = \"it's a charming and often affecting journey.\"  # example from the SST-2 dataset.\n",
+    "\n",
+    "# 1) Declare with a signature.\n",
+    "classify = dspy.Predict('sentence -> sentiment')\n",
+    "\n",
+    "# 2) Call with input argument(s). \n",
+    "response = classify(sentence=sentence)\n",
+    "\n",
+    "# 3) Access the output.\n",
+    "print(response.sentiment)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "When we declare a module, we can pass configuration keys to it.\n",
+    "\n",
+    "Below, we'll pass `n=5` to request five completions. We can also pass `temperature` or `max_len`, etc.\n",
+    "\n",
+    "Let's use `dspy.ChainOfThought`. In many cases, simply swapping `dspy.ChainOfThought` in place of `dspy.Predict` improves quality."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['One great thing about the ColBERT retrieval model is its superior efficiency and effectiveness compared to other models.',\n",
+       " 'Its ability to efficiently retrieve relevant information from large document collections.',\n",
+       " 'One great thing about the ColBERT retrieval model is its superior performance compared to other models and its efficient use of pre-trained language models.',\n",
+       " 'One great thing about the ColBERT retrieval model is its superior efficiency and accuracy compared to other models.',\n",
+       " 'One great thing about the ColBERT retrieval model is its ability to incorporate user feedback and support complex queries.']"
+      ]
+     },
+     "execution_count": 40,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "question = \"What's something great about the ColBERT retrieval model?\"\n",
+    "\n",
+    "# 1) Declare with a signature, and pass some config.\n",
+    "classify = dspy.ChainOfThought('question -> answer', n=5)\n",
+    "\n",
+    "# 2) Call with input argument.\n",
+    "response = classify(question=question)\n",
+    "\n",
+    "# 3) Access the outputs.\n",
+    "response.completions.answer"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's dicuss the output object here.\n",
+    "\n",
+    "The `dspy.ChainOfThought` module will generally inject a `rationale` before the output field(s) of your signature.\n",
+    "\n",
+    "Let's inspect the (first) rationale and answer!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Rationale: produce the answer. We can consider the fact that ColBERT has shown to outperform other state-of-the-art retrieval models in terms of efficiency and effectiveness. It uses contextualized embeddings and performs document retrieval in a way that is both accurate and scalable.\n",
+      "Answer: One great thing about the ColBERT retrieval model is its superior efficiency and effectiveness compared to other models.\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(f\"Rationale: {response.rationale}\")\n",
+    "print(f\"Answer: {response.answer}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This is accessible whether we request one or many completions.\n",
+    "\n",
+    "We can also access the different completions as a list of `Prediction`s or as several lists, one for each field."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 45,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "response.completions[3].rationale == response.completions.rationale[3]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5) How do I use more complex built-in modules?\n",
+    "\n",
+    "The others are very similar, `dspy.ReAct` and `dspy.ProgramOfThough` etc. They mainly change the internal behavior with which your signature is implemented!\n",
+    "\n",
+    "More example soon!"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 6) How do I compose multiple modules into a bigger program?\n",
+    "\n",
+    "DSPy is just Python code that uses modules in any control flow you like. (There's some magic internally at `compile` time to trace your LM calls.)\n",
+    "\n",
+    "What this means is that, you can just call the modules freely. No weird abstractions for chaining calls.\n",
+    "\n",
+    "This is basically PyTorch's design approach for define-by-run / dynamic computation graphs. Refer to the intro tutorials for examples."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "py39_nov2023",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/docs/guides/optimizers.ipynb b/docs/guides/optimizers.ipynb
new file mode 100644
index 000000000..dcdb8c3d0
--- /dev/null
+++ b/docs/guides/optimizers.ipynb
@@ -0,0 +1,168 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The autoreload extension is already loaded. To reload it, use:\n",
+      "  %reload_ext autoreload\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "UsageError: unrecognized arguments: import sys; sys.path.append('/future/u/okhattab/repos/public/tmp/dspy')\n"
+     ]
+    }
+   ],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "import sys; sys.path.append('/future/u/okhattab/repos/public/tmp/dspy')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<img src=\"../../docs/images/DSPy8.png\" alt=\"DSPy7 Image\" height=\"150\"/>\n",
+    "\n",
+    "## Guide: **DSPy Optimizers**\n",
+    "\n",
+    "Formerly called **DSPy Teleprompters**. We will be making an official name update.\n",
+    "\n",
+    "[<img align=\"center\" src=\"https://colab.research.google.com/assets/colab-badge.svg\" />](https://colab.research.google.com/github/stanfordnlp/dspy/blob/main/docs/guides/signatures.ipynb)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Quick Recap\n",
+    "\n",
+    "This guide assumes you followed the [intro tutorial]() to build your first few DSPy programs.\n",
+    "\n",
+    "Remember that a **DSPy program** is just Python code that calls one or more DSPy modules, like `dspy.Predict` or `dspy.ChainOfThought`, to use LMs."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1) What is a DSPy Optimizer?\n",
+    "\n",
+    "A **DSPy optimizer** is an algorithm that can tune the parameters of a DSPy program (i.e., the prompts and the LM weights) to maximize the metrics you specify, like accuracy.\n",
+    "\n",
+    "There are many built-in optimizers in DSPy. They apply different strategies to tune your programs. A typical DSPy optimizer takes three things:\n",
+    "\n",
+    "- Your **DSPy program**. This may be a single module (e.g., `dspy.Predict`) or a complex multi-module program.\n",
+    "\n",
+    "- Your **metric**. This is a function that evaluates the output of your program, and assigns it a score (higher is better).\n",
+    "\n",
+    "- A few **training inputs**. This may be very small (i.e., only 5 or 10 examples) or incomplete (only inputs to your program, without any labels).\n",
+    "\n",
+    "Your training data could also be large or complete. DSPy can leverage having a lot of data, but you can start small."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2) **What** does a DSPy Optimizer tune? **How** does it tune them?\n",
+    "\n",
+    "Traditional deep neural networks (DNNs) can be optimized with gradient descent, given a loss function and some training data.\n",
+    "\n",
+    "DSPy programs consist of multiple calls to LMs, stacked togther as [DSPy modules](). Each DSPy module has internal parameters of three kinds: (1) the LM weights, (2) the instructions, and (3) demonstrations of the input/output behavior.\n",
+    "\n",
+    "Given a metric, DSPy can optimize all of these three with multi-stage optimization algorithms. These can combine gradient descent (for LM weights) and LM-driven optimization (for the instructions), but primarily rely on discrete optimization for creating and validating demonstrations. DSPy Demonstrations are like few-shot examples, but they're far more powerful. They can be created from scratch, given your program, and their creation and selection can be optimized in many effective ways.\n",
+    "\n",
+    "In many cases, we found that compiling leads to better prompts than humans write. Not because DSPy optimizers are more creative than humans, but simply because they can try more things and tune the metrics directly."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Install `dspy-ai` if needed. Then set up a default language model.\n",
+    "# TODO: Add a graceful line for OPENAI_API_KEY.\n",
+    "\n",
+    "try: import dspy\n",
+    "except ImportError:\n",
+    "    %pip install dspy-ai\n",
+    "    import dspy\n",
+    "\n",
+    "dspy.configure(lm=dspy.OpenAI(model='gpt-3.5-turbo-1106'))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2) What DSPy Optimizers are currently available?\n",
+    "\n",
+    "All of these can be accessed via `from dspy.teleprompt import *`.\n",
+    "\n",
+    "#### Automatic Few-Shot Learning\n",
+    "\n",
+    "1. **`LabeledFewShot`**:\n",
+    "\n",
+    "2. **`BootstrapFewShot`**: \n",
+    "\n",
+    "3. **`BootstrapFewShotWithRandomSearch`**:\n",
+    "\n",
+    "4. **`BootstrapFewShotWithOptuna`**:\n",
+    "\n",
+    "\n",
+    "#### Automatic Instruction Optimization\n",
+    "\n",
+    "5. **`SignatureOptimizer`**:\n",
+    "\n",
+    "\n",
+    "#### Automatic Finetuning\n",
+    "\n",
+    "6. **`BootstrapFinetune`**:\n",
+    "\n",
+    "\n",
+    "#### Program Transformations\n",
+    "\n",
+    "7. **`KNNFewShot`**:\n",
+    "\n",
+    "8. **`Ensemble`**:\n",
+    "\n",
+    "\n",
+    "#### Which one should I use?\n",
+    "\n",
+    "As a rule of thumb, if you don't know where to start, use `BootstrapFewShotWithRandomSearch`.\n",
+    "\n",
+    "There are some old docs for:\n",
+    "\n",
+    "- [`dspy.teleprompt.LabeledFewShot`](docs/teleprompters.md#telepromptlabeledfewshot)\n",
+    "- [`dspy.teleprompt.BootstrapFewShot`](docs/teleprompters.md#telepromptbootstrapfewshot)\n",
+    "- [`dspy.teleprompt.BootstrapFewShotWithRandomSearch`](docs/teleprompters.md#telepromptbootstrapfewshotwithrandomsearch)\n",
+    "- [`dspy.teleprompt.BootstrapFinetune`](docs/teleprompters.md#telepromptbootstrapfinetune)\n",
+    "- [`dspy.teleprompt.Ensemble`](docs/teleprompters.md#telepromptensemble)\n",
+    "- `dspy.teleprompt.kNN`\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": []
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/docs/guides/retrieval_models.ipynb b/docs/guides/retrieval_models.ipynb
new file mode 100644
index 000000000..3b2c8368f
--- /dev/null
+++ b/docs/guides/retrieval_models.ipynb
@@ -0,0 +1,40 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Coming soon.\n",
+    "\n",
+    "There are some old docs for:\n",
+    "\n",
+    "#### Modules\n",
+    "\n",
+    "- [`dspy.Retrieve`](docs/modules.md#dspyretrieve)\n",
+    "\n",
+    "\n",
+    "#### Retrieval Model Clients\n",
+    "\n",
+    "- [`dspy.ColBERTv2`](docs/retrieval_models_client.md#colbertv2)\n",
+    "- [`dspy.AzureCognitiveSearch`](docs/retrieval_models_client.md#azurecognitivesearch)\n",
+    "- `dspy.Pyserini`\n",
+    "- `dspy.Pinecone`\n",
+    "- `dspy.Qdrant`\n",
+    "- `dspy.Chromadb`\n",
+    "- `dspy.Marqo`\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": []
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/docs/guides/signatures.ipynb b/docs/guides/signatures.ipynb
new file mode 100644
index 000000000..7b3d1a82f
--- /dev/null
+++ b/docs/guides/signatures.ipynb
@@ -0,0 +1,334 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "import sys; sys.path.append('/future/u/okhattab/repos/public/tmp/dspy')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<img src=\"../../docs/images/DSPy8.png\" alt=\"DSPy7 Image\" height=\"150\"/>\n",
+    "\n",
+    "## Guide: **DSPy Signatures**\n",
+    "\n",
+    "[<img align=\"center\" src=\"https://colab.research.google.com/assets/colab-badge.svg\" />](https://colab.research.google.com/github/stanfordnlp/dspy/blob/main/docs/guides/signatures.ipynb)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Quick Recap\n",
+    "\n",
+    "This guide assumes you followed the [intro tutorial]() to build your first few DSPy programs.\n",
+    "\n",
+    "Remember that a **DSPy program** is just Python code that calls one or more **DSPy modules**, like `dspy.Predict` or `dspy.ChainOfThought`, to use LMs."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1) What is a DSPy Signature?\n",
+    "\n",
+    "When we assign tasks to LMs in DSPy, we specify the behavior we need as a Signature.\n",
+    "\n",
+    "**A signature is a declarative specification of input/output behavior of a DSPy module.**\n",
+    "\n",
+    "You're probably familiar with function signatures. The differences are that:\n",
+    "\n",
+    "- While typical function signatures just _describe_ things, DSPy Signatures _define and control the behavior_ of modules.\n",
+    "\n",
+    "- The field names matter in DSPy Signatures. You express semantic roles in plain English: a `question` is different from an `answer`, a `sql_query` is different from `python_code`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Install `dspy-ai` if needed. Then set up a default language model.\n",
+    "# TODO: Add a graceful line for OPENAI_API_KEY.\n",
+    "\n",
+    "try: import dspy\n",
+    "except ImportError:\n",
+    "    %pip install dspy-ai\n",
+    "    import dspy\n",
+    "\n",
+    "dspy.configure(lm=dspy.OpenAI(model='gpt-3.5-turbo-1106', max_tokens=300))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2) Why should I use a DSPy Signature?\n",
+    "\n",
+    "**tl;dr** For modular and clean code, in which LM calls can be optimized into high-quality prompts (or automatic finetunes).\n",
+    "\n",
+    "**Long Answer:** Most people coerce LMs to do tasks by hacking long, brittle prompts. Or by collecting/generating data for fine-tuning.\n",
+    "\n",
+    "Writing signatures is far more modular, adaptive, and reproducible than hacking at prompts or finetunes. The DSPy compiler will figure out how to build a highly-optimized prompt for your LM (or finetune your small LM) for your signature, on your data, and within your pipeline. In many cases, we found that compiling leads to better prompts than humans write. Not because DSPy optimizers are more creative than humans, but simply because they can try more things and tune the metrics directly."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3) **Short** DSPy Signatures\n",
+    "\n",
+    "Signatures can be defined as a short string, with argument names that define semantic roles for inputs/outputs.\n",
+    "\n",
+    "1. Question Answering: `\"question -> answer\"`\n",
+    "\n",
+    "2. Sentiment Classification: `\"sentence -> sentiment\"`\n",
+    "\n",
+    "3. Summarization: `\"document -> summary\"`\n",
+    "\n",
+    "Your signatures can also have multiple input/output fields.\n",
+    "\n",
+    "4. Retrieval-Augmented Question Answering: `\"context, question -> answer\"`\n",
+    "\n",
+    "5. Multiple-Choice Question Answering with Reasoning: `\"question, choices -> reasoning, selection\"`\n",
+    "\n",
+    "\n",
+    "**Tip:** For fields, any valid variable names work! Field names should be semantically meaningful, but start simple and don't prematurely optimize keywords! Leave that kind of hacking to the DSPy compiler. For example, for summarization, it's probably fine to say `\"document -> summary\"`, `\"text -> gist\"`, or `\"long_context -> tldr\"`."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4) Example 1: Sentiment Classification"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'Positive'"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sentence = \"it's a charming and often affecting journey.\"  # example from the SST-2 dataset.\n",
+    "\n",
+    "classify = dspy.Predict('sentence -> sentiment')\n",
+    "classify(sentence=sentence).sentiment"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Above, we covered a simple example with `dspy.Predict`.\n",
+    "\n",
+    "Below, let's use `dspy.ChainOfThought`."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5) Example 2: Summarization"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The 21-year-old Lee made seven appearances and scored one goal for West Ham last season. He had loan spells in League One with Blackpool and Colchester United, scoring twice for the latter. He has now signed a contract with Barnsley, but the length of the contract has not been revealed.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Example from the XSum dataset.\n",
+    "document = \"\"\"The 21-year-old made seven appearances for the Hammers and netted his only goal for them in a Europa League qualification round match against Andorran side FC Lustrains last season. Lee had two loan spells in League One last term, with Blackpool and then Colchester United. He scored twice for the U's but was unable to save them from relegation. The length of Lee's contract with the promoted Tykes has not been revealed. Find all the latest football transfers on our dedicated page.\"\"\"\n",
+    "\n",
+    "summarize = dspy.ChainOfThought('document -> summary')\n",
+    "response = summarize(document=document)\n",
+    "\n",
+    "print(response.summary)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Many DSPy modules (except `dspy.Predict`) return auxiliary information by expanding your signature under the hood.\n",
+    "\n",
+    "For example, `dspy.ChainOfThought` also adds a `rationale` field that includes the LM's reasoning before it generates the output `summary`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Rationale: produce the summary. We need to highlight the key points about Lee's performance for West Ham, his loan spells in League One, and his new contract with Barnsley. We also need to mention that his contract length has not been disclosed.\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"Rationale:\", response.rationale)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 6) Examples of _long_ DSPy Signatures\n",
+    "\n",
+    "For some advanced tasks, you need more verbose signatures. This is typically to:\n",
+    "\n",
+    "1. Clarify something about the nature of the task (expressed below as a `docstring`).\n",
+    "\n",
+    "2. Supply hints on the nature of an input field, expressed as a `desc` keyword argument for `dspy.InputField`.\n",
+    "\n",
+    "2. Supply constraints on an output field, expressed as a `desc` keyword argument for `dspy.OutputField."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 7) Example C: Classification\n",
+    "\n",
+    "Notice how the docstring contains (minimal) instructions, which in this case are necessary to have a fully-defined task.\n",
+    "\n",
+    "Some optimizers in DSPy, like `SignatureOptimizer`, can take this simple docstring and then generate more effective variants if needed."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Prediction(\n",
+       "    sentiment='Fear'\n",
+       ")"
+      ]
+     },
+     "execution_count": 37,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "class Emotion(dspy.Signature):\n",
+    "    \"\"\"Classify emotion among sadness, joy, love, anger, fear, surprise.\"\"\"\n",
+    "    \n",
+    "    sentence = dspy.InputField()\n",
+    "    sentiment = dspy.OutputField()\n",
+    "\n",
+    "sentence = \"i started feeling a little vulnerable when the giant spotlight started blinding me\"  # from dair-ai/emotion\n",
+    "\n",
+    "classify = dspy.Predict(Emotion)\n",
+    "classify(sentence=sentence)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 8) Example D: A metric that evaluates faithfulness to citations"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Prediction(\n",
+       "    rationale=\"produce the faithfulness. We know that Lee had two loan spells in League One last term, with Blackpool and then Colchester United. He scored twice for the U's but was unable to save them from relegation. However, there is no mention of him scoring three goals for Colchester United.\",\n",
+       "    faithfulness='False'\n",
+       ")"
+      ]
+     },
+     "execution_count": 40,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "class CheckCitationFaithfulness(dspy.Signature):\n",
+    "    \"\"\"Verify that the text is based on the provided context.\"\"\"\n",
+    "\n",
+    "    context = dspy.InputField(desc=\"facts here are assumed to be true\")\n",
+    "    text = dspy.InputField()\n",
+    "    faithfulness = dspy.OutputField(desc=\"True/False indicating if text is faithful to context\")\n",
+    "\n",
+    "context = \"The 21-year-old made seven appearances for the Hammers and netted his only goal for them in a Europa League qualification round match against Andorran side FC Lustrains last season. Lee had two loan spells in League One last term, with Blackpool and then Colchester United. He scored twice for the U's but was unable to save them from relegation. The length of Lee's contract with the promoted Tykes has not been revealed. Find all the latest football transfers on our dedicated page.\"\n",
+    "\n",
+    "text = \"Lee scored 3 goals for Colchester United.\"\n",
+    "\n",
+    "faithfulness = dspy.ChainOfThought(CheckCitationFaithfulness)\n",
+    "faithfulness(context=context, text=text)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 9) Building modules & compiling them\n",
+    "\n",
+    "While signatures are covenient for prototyping with structured inputs/outputs, that's not the main reason to use them!\n",
+    "\n",
+    "You should compose multiple signatures into bigger [DSPy modules]() and [compile]() these modules into optimized prompts and finetunes."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "py39_nov2023",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/dspy/__init__.py b/dspy/__init__.py
index 0cbfe86b1..f91aac594 100644
--- a/dspy/__init__.py
+++ b/dspy/__init__.py
@@ -24,3 +24,6 @@
 
 Anyscale = dsp.Anyscale
 HFModel = dsp.HFModel
+
+configure = settings.configure
+context = settings.context
\ No newline at end of file