Initial commit

habedi · Jun 30, 2024 · 387633b · 387633b
commit 387633b
Show file tree

Hide file tree

Showing 1,133 changed files with 4,828 additions and 0 deletions.
diff --git a/.editorconfig b/.editorconfig
@@ -0,0 +1,29 @@
+# EditorConfig is awesome: https://EditorConfig.org
+
+# Top-most EditorConfig file
+root = true
+
+# Python specific settings, complying with PEP 8 style guide
+[*.py]
+indent_size = 4
+max_line_length = 80
+
+# Markdown files
+[*.md]
+trim_trailing_whitespace = false
+
+# Bash scripts
+[*.sh]
+indent_size = 2
+
+# SQL files
+[*.sql]
+indent_size = 2
+
+# YAML files
+[*.yml]
+indent_size = 2
+
+# JSON files
+[*.json]
+indent_size = 2
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1,8 @@
+# Adding the following lines to the .gitattributes file will tell Git to treat the files as binary data.
+*.text filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.jpeg filter=lfs diff=lfs merge=lfs -text
+*.gif filter=lfs diff=lfs merge=lfs -text
+*.csv filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -0,0 +1,11 @@
+# To get started with Dependabot version updates, you'll need to specify which
+# package ecosystems to update and where the package manifests are located.
+# Please see the documentation for all configuration options:
+# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
+
+version: 2
+updates:
+  - package-ecosystem: "pip" # See documentation for possible values
+    directory: "/" # Location of package manifests
+    schedule:
+      interval: "weekly"
diff --git a/.github/workflows/publish_to_pypi.yml b/.github/workflows/publish_to_pypi.yml
@@ -0,0 +1,40 @@
+name: Publish to PyPI
+
+on:
+  workflow_dispatch: # Enable manual runs
+
+jobs:
+
+  # Run tests before publishing
+  call_tests:
+    uses: ./.github/workflows/tests.yml
+
+  publish_to_pypi:
+    runs-on: ubuntu-latest
+    needs: call_tests
+
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+
+      - name: Set Up Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: "3.10"
+
+      - name: Install Poetry
+        run: |
+          pip install poetry
+
+      - name: Install Dependencies
+        run: |
+          poetry install
+
+      - name: Update Version
+        run: |
+          poetry version patch # Use 'minor' or 'major' for minor or major version bumps
+
+      - name: Build and Publish Package
+        run: |
+          poetry config pypi-token.pypi ${{ secrets.PYPI_API_TOKEN }}
+          poetry publish --build
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -0,0 +1,47 @@
+name: Tests
+
+on:
+  #  push:
+  #    branches:
+  #      - main
+  workflow_dispatch: # Enable manual runs
+  workflow_call: # Make this workflow available to be called by other workflows
+
+jobs:
+  run_tests:
+    runs-on: ubuntu-latest
+
+    strategy:
+      matrix:
+        python-version: [ "3.10", "3.11", "3.12" ]
+
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+
+      - name: Set Up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install Poetry
+        run: |
+          pip install poetry
+
+      - name: Install Dependencies
+        run: |
+          poetry install --with dev
+
+      - name: Run Tests
+        shell: bash
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        run: |
+          poetry run pytest tests/ --cov --doctest-modules --junitxml=junit/test-results-${{ matrix.python-version }}.xml
+        continue-on-error: false
+
+      - name: Upload Test Results
+        uses: actions/upload-artifact@v2
+        with:
+          name: pytest-results-${{ matrix.python-version }}
+          path: junit/test-results-${{ matrix.python-version }}.xml
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,75 @@
+# Python specific
+__pycache__/
+*.py[cod]
+*$py.class
+
+# Virtual environments
+.env/
+env/
+.venv/
+venv/
+
+# Packaging and distribution files
+.Python
+build/
+dist/
+*.egg-info/
+*.egg
+MANIFEST
+
+# Dependency directories
+develop-eggs/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+.installed.cfg
+
+# Test and coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# IDE specific files and directories
+.idea/
+*.iml
+.vscode/
+
+# Jupyter Notebook files
+.ipynb_checkpoints
+
+# Temporary files created by editors and the system and folders to ignore
+*.swp
+*~
+*.bak
+*.tmp
+temp/
+tmp/
+
+# Database files (SQLite, DuckDB, etc.)
+*.duckdb
+*.db
+*.wal
+*.sqlite
+
+# Dependency lock files (uncomment to ignore)
+poetry.lock
+
+# Miscellaneous files
+# Add any additional file and directory patterns that you wish to ignore
+openai_api_key.json
+.env
+output/
+notebooks/output/
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 Hassan Abedi
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1,65 @@
+# Easy Letters
+
+[![Tests](https://github.com/habedi/easy-letters/actions/workflows/tests.yml/badge.svg)](https://github.com/habedi/easy-letters/actions/workflows/tests.yml)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+[![PyPI version](https://badge.fury.io/py/easy-letters.svg)](https://badge.fury.io/py/easy-letters)
+[![Downloads](https://pepy.tech/badge/easy-letters)](https://pepy.tech/project/easy-letters)
+
+Easy Letters is a Python package that helps job seekers write application letters. A simple retrieval
+augmented generation (RAG) pipeline is used to generate the letters. The user can then edit the draft letter to suit
+their needs.
+
+See the `notebooks/README.md` file for how easy letters works.
+
+## Installation
+
+You can install Easy Letters using pip:
+
+```bash
+pip install easy-letters
+```
+
+## Getting Started
+
+### API Key Setup
+
+At the moment, Easy Letters gets the API key for supported services from the environment variables.
+So you need to set the following environment variables to be able to use Easy Letters:
+
+- `OPENAI_API_KEY`: The OpenAI API key (required)
+
+### Sample Notebooks
+
+You can find Jupyter notebooks with example code in the `notebooks` directory.
+The notebooks demonstrate how to use Easy Letters to generate application letter drafts.
+
+### Supported Models
+
+Easy Letters currently supports the following models:
+
+| Model                            | Type            |
+|----------------------------------|-----------------|
+| GPT-3.5 Turbo                    | Text Generation |
+| GPT-4o                           | Text Generation |
+| Text Embedding 3 (Small Variant) | Text Embedding  |
+| Text Embedding 3 (Large Variant) | Text Embedding  |
+
+### Installing from Source
+
+You can also install Easy Letters from the source code in this repository. The main benefit of this approach is that
+you might find it easier to run the sample notebooks and modify the code as you wish this way.
+
+After cloning this repository, you can navigate to the `easy-letters` directory and install the
+dependencies using [Poetry](https://python-poetry.org/):
+
+```bash
+git clone https://github.com/habedi/easy-letters.git && cd easy-letters
+
+# Assuming you have Poetry installed on your system
+poetry install --with dev
+```
+
+## TODO
+
+- [ ] Add support for Anthropic models and API
+- [ ] Add support for locally served models via Ollama
diff --git a/easy_letters/__init__.py b/easy_letters/__init__.py
@@ -0,0 +1,3 @@
+from easy_letters.connectors import EmbeddingModels, LanguageModels
+from easy_letters.connectors import OpenAIConnector
+from easy_letters.similarity_search import Ranker
diff --git a/easy_letters/connectors.py b/easy_letters/connectors.py
@@ -0,0 +1,80 @@
+from typing import Any, List
+
+import numpy as np
+import openai
+from numpy import ndarray, dtype
+
+
+class LanguageModels:
+    """List of supported language models."""
+    OPENAI_GPT35TURBO = 'gpt-3.5-turbo'
+    OPENAI_GPT4O = 'gpt-4o'
+
+
+class EmbeddingModels:
+    """List of supported text embedding models."""
+    OPENAPI_EMS = 'text-embedding-3-small'
+    OPENAPI_EML = 'text-embedding-3-large'
+
+
+class OpenAIConnector:
+    """
+    Connector class to interact with OpenAI API for embeddings and
+    chat completions.
+
+    Attributes:
+        client (openai.Client): The OpenAI client used for API interactions.
+    """
+
+    def __init__(self, api_key: str, **kwargs):
+        """
+        Initialize the OpenAIConnector with an API key and optional parameters.
+
+        Args:
+            api_key (str): The API key for authenticating with the OpenAI API.
+            **kwargs: Additional keyword arguments to pass to the OpenAI client.
+        """
+        self.client = openai.Client(api_key=api_key, **kwargs)
+
+    def embed(self, documents: List[str], model: str) -> List[
+        ndarray[Any, dtype[Any]]]:
+        """
+        Generate embeddings for a list of documents using a specified model.
+
+        Args:
+            documents (List[str]): A list of documents to embed.
+            model (str): The model to use for generating embeddings.
+
+        Returns:
+            List[ndarray[Any, dtype[Any]]]: A list of numpy arrays containing
+             the embeddings.
+        """
+        embeddings = self.client.embeddings.create(input=documents,
+                                                   model=model)
+        return [np.array(d.embedding) for d in embeddings.data]
+
+    def chat(self, prompt: str, model: str, temperature: float = 0.0,
+             max_tokens: int = 512) -> str:
+        """
+        Generate a chat completion for a given prompt using a specified model.
+
+        Args:
+            prompt (str): The input prompt for the chat model.
+            model (str): The model to use for generating the chat completion.
+            temperature (float, optional): The sampling temperature.
+            Defaults to 0.0.
+            max_tokens (int, optional): The maximum number of tokens for the
+            model to generate. Defaults to 512.
+
+        Returns:
+            str: The generated chat response.
+        """
+        completion = self.client.chat.completions.create(
+            model=model,
+            messages=[
+                {"role": "user", "content": prompt},
+            ],
+            max_tokens=max_tokens,
+            temperature=temperature
+        )
+        return completion.choices[0].message.content