Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
habedi committed Jun 30, 2024
0 parents commit 387633b
Show file tree
Hide file tree
Showing 1,133 changed files with 4,828 additions and 0 deletions.
29 changes: 29 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# EditorConfig is awesome: https://EditorConfig.org

# Top-most EditorConfig file
root = true

# Python specific settings, complying with PEP 8 style guide
[*.py]
indent_size = 4
max_line_length = 80

# Markdown files
[*.md]
trim_trailing_whitespace = false

# Bash scripts
[*.sh]
indent_size = 2

# SQL files
[*.sql]
indent_size = 2

# YAML files
[*.yml]
indent_size = 2

# JSON files
[*.json]
indent_size = 2
8 changes: 8 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Adding the following lines to the .gitattributes file will tell Git to treat the files as binary data.
*.text filter=lfs diff=lfs merge=lfs -text
*.png filter=lfs diff=lfs merge=lfs -text
*.jpg filter=lfs diff=lfs merge=lfs -text
*.jpeg filter=lfs diff=lfs merge=lfs -text
*.gif filter=lfs diff=lfs merge=lfs -text
*.csv filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
11 changes: 11 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# To get started with Dependabot version updates, you'll need to specify which
# package ecosystems to update and where the package manifests are located.
# Please see the documentation for all configuration options:
# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file

version: 2
updates:
- package-ecosystem: "pip" # See documentation for possible values
directory: "/" # Location of package manifests
schedule:
interval: "weekly"
40 changes: 40 additions & 0 deletions .github/workflows/publish_to_pypi.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
name: Publish to PyPI

on:
workflow_dispatch: # Enable manual runs

jobs:

# Run tests before publishing
call_tests:
uses: ./.github/workflows/tests.yml

publish_to_pypi:
runs-on: ubuntu-latest
needs: call_tests

steps:
- name: Checkout Repository
uses: actions/checkout@v4

- name: Set Up Python
uses: actions/setup-python@v2
with:
python-version: "3.10"

- name: Install Poetry
run: |
pip install poetry
- name: Install Dependencies
run: |
poetry install
- name: Update Version
run: |
poetry version patch # Use 'minor' or 'major' for minor or major version bumps
- name: Build and Publish Package
run: |
poetry config pypi-token.pypi ${{ secrets.PYPI_API_TOKEN }}
poetry publish --build
47 changes: 47 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
name: Tests

on:
# push:
# branches:
# - main
workflow_dispatch: # Enable manual runs
workflow_call: # Make this workflow available to be called by other workflows

jobs:
run_tests:
runs-on: ubuntu-latest

strategy:
matrix:
python-version: [ "3.10", "3.11", "3.12" ]

steps:
- name: Checkout Repository
uses: actions/checkout@v4

- name: Set Up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}

- name: Install Poetry
run: |
pip install poetry
- name: Install Dependencies
run: |
poetry install --with dev
- name: Run Tests
shell: bash
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
poetry run pytest tests/ --cov --doctest-modules --junitxml=junit/test-results-${{ matrix.python-version }}.xml
continue-on-error: false

- name: Upload Test Results
uses: actions/upload-artifact@v2
with:
name: pytest-results-${{ matrix.python-version }}
path: junit/test-results-${{ matrix.python-version }}.xml
75 changes: 75 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Python specific
__pycache__/
*.py[cod]
*$py.class

# Virtual environments
.env/
env/
.venv/
venv/

# Packaging and distribution files
.Python
build/
dist/
*.egg-info/
*.egg
MANIFEST

# Dependency directories
develop-eggs/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
.installed.cfg

# Test and coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/

# IDE specific files and directories
.idea/
*.iml
.vscode/

# Jupyter Notebook files
.ipynb_checkpoints

# Temporary files created by editors and the system and folders to ignore
*.swp
*~
*.bak
*.tmp
temp/
tmp/

# Database files (SQLite, DuckDB, etc.)
*.duckdb
*.db
*.wal
*.sqlite

# Dependency lock files (uncomment to ignore)
poetry.lock

# Miscellaneous files
# Add any additional file and directory patterns that you wish to ignore
openai_api_key.json
.env
output/
notebooks/output/
21 changes: 21 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2023 Hassan Abedi

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
65 changes: 65 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Easy Letters

[![Tests](https://github.com/habedi/easy-letters/actions/workflows/tests.yml/badge.svg)](https://github.com/habedi/easy-letters/actions/workflows/tests.yml)
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
[![PyPI version](https://badge.fury.io/py/easy-letters.svg)](https://badge.fury.io/py/easy-letters)
[![Downloads](https://pepy.tech/badge/easy-letters)](https://pepy.tech/project/easy-letters)

Easy Letters is a Python package that helps job seekers write application letters. A simple retrieval
augmented generation (RAG) pipeline is used to generate the letters. The user can then edit the draft letter to suit
their needs.

See the `notebooks/README.md` file for how easy letters works.

## Installation

You can install Easy Letters using pip:

```bash
pip install easy-letters
```

## Getting Started

### API Key Setup

At the moment, Easy Letters gets the API key for supported services from the environment variables.
So you need to set the following environment variables to be able to use Easy Letters:

- `OPENAI_API_KEY`: The OpenAI API key (required)

### Sample Notebooks

You can find Jupyter notebooks with example code in the `notebooks` directory.
The notebooks demonstrate how to use Easy Letters to generate application letter drafts.

### Supported Models

Easy Letters currently supports the following models:

| Model | Type |
|----------------------------------|-----------------|
| GPT-3.5 Turbo | Text Generation |
| GPT-4o | Text Generation |
| Text Embedding 3 (Small Variant) | Text Embedding |
| Text Embedding 3 (Large Variant) | Text Embedding |

### Installing from Source

You can also install Easy Letters from the source code in this repository. The main benefit of this approach is that
you might find it easier to run the sample notebooks and modify the code as you wish this way.

After cloning this repository, you can navigate to the `easy-letters` directory and install the
dependencies using [Poetry](https://python-poetry.org/):

```bash
git clone https://github.com/habedi/easy-letters.git && cd easy-letters

# Assuming you have Poetry installed on your system
poetry install --with dev
```

## TODO

- [ ] Add support for Anthropic models and API
- [ ] Add support for locally served models via Ollama
3 changes: 3 additions & 0 deletions easy_letters/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from easy_letters.connectors import EmbeddingModels, LanguageModels
from easy_letters.connectors import OpenAIConnector
from easy_letters.similarity_search import Ranker
80 changes: 80 additions & 0 deletions easy_letters/connectors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from typing import Any, List

import numpy as np
import openai
from numpy import ndarray, dtype


class LanguageModels:
"""List of supported language models."""
OPENAI_GPT35TURBO = 'gpt-3.5-turbo'
OPENAI_GPT4O = 'gpt-4o'


class EmbeddingModels:
"""List of supported text embedding models."""
OPENAPI_EMS = 'text-embedding-3-small'
OPENAPI_EML = 'text-embedding-3-large'


class OpenAIConnector:
"""
Connector class to interact with OpenAI API for embeddings and
chat completions.
Attributes:
client (openai.Client): The OpenAI client used for API interactions.
"""

def __init__(self, api_key: str, **kwargs):
"""
Initialize the OpenAIConnector with an API key and optional parameters.
Args:
api_key (str): The API key for authenticating with the OpenAI API.
**kwargs: Additional keyword arguments to pass to the OpenAI client.
"""
self.client = openai.Client(api_key=api_key, **kwargs)

def embed(self, documents: List[str], model: str) -> List[
ndarray[Any, dtype[Any]]]:
"""
Generate embeddings for a list of documents using a specified model.
Args:
documents (List[str]): A list of documents to embed.
model (str): The model to use for generating embeddings.
Returns:
List[ndarray[Any, dtype[Any]]]: A list of numpy arrays containing
the embeddings.
"""
embeddings = self.client.embeddings.create(input=documents,
model=model)
return [np.array(d.embedding) for d in embeddings.data]

def chat(self, prompt: str, model: str, temperature: float = 0.0,
max_tokens: int = 512) -> str:
"""
Generate a chat completion for a given prompt using a specified model.
Args:
prompt (str): The input prompt for the chat model.
model (str): The model to use for generating the chat completion.
temperature (float, optional): The sampling temperature.
Defaults to 0.0.
max_tokens (int, optional): The maximum number of tokens for the
model to generate. Defaults to 512.
Returns:
str: The generated chat response.
"""
completion = self.client.chat.completions.create(
model=model,
messages=[
{"role": "user", "content": prompt},
],
max_tokens=max_tokens,
temperature=temperature
)
return completion.choices[0].message.content
Loading

0 comments on commit 387633b

Please sign in to comment.