Skip to content

Commit

Permalink
cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
neonwatty committed Nov 8, 2024
1 parent dc19d7e commit 31f471b
Show file tree
Hide file tree
Showing 21 changed files with 243 additions and 35 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pro-app-test.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: rails tests for meme search app
name: rails tests for meme search pro app

on:
workflow_dispatch:
Expand Down
37 changes: 24 additions & 13 deletions .github/workflows/standard-app-test.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
# This workflow will install Python dependencies, run tests and lint with a single version of Python
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python

name: Python application
name: python tests for meme search standard app

on:
workflow_dispatch:
Expand All @@ -16,31 +13,45 @@ jobs:
runs-on: ubuntu-latest
timeout-minutes: 3
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v2
- uses: chartboost/ruff-action@v1
- name: Checkout repository
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v4
with:
args: "format --check"
config: .ruff.toml
python-version: "3.12"

- name: Install Ruff and dependencies
run: |
python -m pip install --upgrade pip
pip install ruff
- name: Run Ruff linting
run: |
ruff format --check --config ./meme_search/.ruff.toml ./meme_search/meme_search_app
test:
name: run pytest
runs-on: ubuntu-latest
timeout-minutes: 5
steps:
- name: Checkout code
uses: actions/checkout@v2

- name: Set up Python
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: "3.10"

- name: Install dependencies
run: |
cd meme_search
python -m pip install --upgrade pip
pip install -r requirements.test
pip install -r requirements.txt
- name: Run pytest
run: |
PYTHONPATH=. python3.10 -m pytest tests/test_app.py &&
PYTHONPATH=. python3.10 -m pytest tests/utilities/test_imgs.py &&
PYTHONPATH=. python3.10 -m pytest tests/utilities/test_query.py
cd meme_search &&
PYTHONPATH=. python3.10 -m pytest tests/test_app.py
# PYTHONPATH=. python3.10 -m pytest tests/utilities/test_add_remove.py ## requires large runner
6 changes: 3 additions & 3 deletions meme_search/meme_search_app/app.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import time
from meme_search import base_dir
from meme_search.utilities.query import complete_query
from meme_search.utilities.create import process
from meme_search_app import base_dir
from meme_search_app.utilities.query import complete_query
from meme_search_app.utilities.create import process
import streamlit as st

st.set_page_config(page_title="Meme Search")
Expand Down
6 changes: 3 additions & 3 deletions meme_search/meme_search_app/utilities/add.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import os
import sqlite3
import faiss
from meme_search.utilities import model
from meme_search.utilities.text_extraction import extract_text_from_imgs
from meme_search.utilities.chunks import create_all_img_chunks
from meme_search_app.utilities import model
from meme_search_app.utilities.text_extraction import extract_text_from_imgs
from meme_search_app.utilities.chunks import create_all_img_chunks


def add_to_chunk_db(img_chunks: list, sqlite_db_path: str) -> None:
Expand Down
8 changes: 4 additions & 4 deletions meme_search/meme_search_app/utilities/create.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from meme_search.utilities.status import get_input_directory_status
from meme_search.utilities.remove import remove
from meme_search.utilities.add import add
from meme_search.utilities import img_dir, sqlite_db_path, vector_db_path
from meme_search_app.utilities.status import get_input_directory_status
from meme_search_app.utilities.remove import remove
from meme_search_app.utilities.add import add
from meme_search_app.utilities import img_dir, sqlite_db_path, vector_db_path


def process() -> bool:
Expand Down
4 changes: 2 additions & 2 deletions meme_search/meme_search_app/utilities/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import numpy as np
from typing import Tuple, Union
import argparse
from meme_search.utilities import model
from meme_search.utilities import vector_db_path, sqlite_db_path
from meme_search_app.utilities import model
from meme_search_app.utilities import vector_db_path, sqlite_db_path


def query_vector_db(query: str, db_file_path: str, k: int = 10) -> Tuple[list, list]:
Expand Down
2 changes: 1 addition & 1 deletion meme_search/meme_search_app/utilities/status.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import sqlite3
from meme_search.utilities.imgs import collect_img_paths
from meme_search_app.utilities.imgs import collect_img_paths


def get_current_indexed_img_names(sqlite_db_path: str):
Expand Down
16 changes: 8 additions & 8 deletions meme_search/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
numpy
matplotlib
accelerate==0.25.0
huggingface-hub==0.20.1
Pillow==10.1.0
torch==2.1.2
torchvision==0.16.2
transformers==4.36.2
einops==0.7.0
sentence-transformers==2.2.2
accelerate
huggingface-hub
Pillow
torch
torchvision
transformers
einops
sentence-transformers
faiss-cpu
streamlit
7 changes: 7 additions & 0 deletions meme_search/tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import os

cwd = os.getcwd()
base_test_dir = os.path.dirname(os.path.abspath(__file__))

CONTAINER_NAME = "meme_search"
STREAMLIT_APP_FILE = "meme_search_app/app.py"
27 changes: 27 additions & 0 deletions meme_search/tests/test_app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import subprocess
import pytest
import time
from tests import STREAMLIT_APP_FILE


@pytest.fixture(scope="module")
def start_streamlit_app():
cmd = f"python -m streamlit run {STREAMLIT_APP_FILE} --server.headless true"
print(f"here here HERE --> {cmd}")
process = subprocess.Popen(
cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
time.sleep(5)
yield process
process.terminate()
process.wait()


def test_streamlit(subtests, start_streamlit_app):
with subtests.test(msg="streamlit up"):
assert start_streamlit_app.poll() is None, "Streamlit app failed to start"

with subtests.test(msg="streamlit down"):
start_streamlit_app.terminate()
time.sleep(2)
assert start_streamlit_app.poll() is not None, "Streamlit app failed to stop"
38 changes: 38 additions & 0 deletions meme_search/tests/test_compose.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import subprocess
from tests import cwd, CONTAINER_NAME


def terminal_process(command: list) -> int:
output = subprocess.Popen(
command,
cwd=cwd,
stdin=None,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
return output.wait()


def test_compose(subtests):
up_cmd = ["docker", "compose", "up", "-d"]
ps_cmd = "docker-compose ps"
down_cmd = ["docker", "compose", "down"]

with subtests.test(msg="compose down"):
code = terminal_process(down_cmd)
assert code == 0, "compose down failed"

with subtests.test(msg="compose up"):
code = terminal_process(up_cmd)
assert code == 0, "compose up failed"

with subtests.test(msg="docker ps"):
result = subprocess.run(ps_cmd, shell=True, check=True, capture_output=True)
assert result.returncode == 0, "Failed to run docker-compose ps"
assert (
bytes(CONTAINER_NAME, "utf-8") in result.stdout
), f"{CONTAINER_NAME} container not running"

with subtests.test(msg="compose down"):
code = terminal_process(down_cmd)
assert code == 0, "compose down failed"
16 changes: 16 additions & 0 deletions meme_search/tests/utilities/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
test_input_img_names = [
"data/input/test_meme_1.jpg",
"data/input/test_meme_2.jpg",
"data/input/test_meme_3.jpg",
"data/input/test_meme_4.jpg",
"data/input/test_meme_5.jpg",
"data/input/test_meme_6.jpg",
"data/input/test_meme_7.jpg",
"data/input/test_meme_8.jpg",
"data/input/test_meme_9.jpg",
]

test_db_names = [
"data/dbs/memes.db",
"data/dbs/memes.faiss",
]
75 changes: 75 additions & 0 deletions meme_search/tests/utilities/test_add_remove.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import os
import shutil
import time
from meme_search.utilities.status import get_input_directory_status
from meme_search.utilities.add import add
from meme_search.utilities.remove import remove
from tests import base_test_dir

utilities_dir = base_test_dir + "/utilities"
default_test_img_dir = utilities_dir + "/test_images/"
alt_test_img_dir = utilities_dir + "/test_images_alternate/"
db_test_dir = utilities_dir + "/test_dbs/"
sqlite_db_path = db_test_dir + "memes.db"
vector_db_path = db_test_dir + "memes.faiss"
test_img_default_location = default_test_img_dir + "test_meme_2.jpg"
test_img_alt_location = alt_test_img_dir + "test_meme_2.jpg"


def test_normalize(subtests):
with subtests.test(msg="reset image directories"):
try:
shutil.move(test_img_alt_location, default_test_img_dir)
except:
pass

time.sleep(2)

old_imgs_to_be_removed, new_imgs_to_be_indexed = get_input_directory_status(default_test_img_dir, sqlite_db_path)
if len(old_imgs_to_be_removed) > 0:
assert remove(old_imgs_to_be_removed, sqlite_db_path, vector_db_path) is None

if len(new_imgs_to_be_indexed) > 0:
assert add(new_imgs_to_be_indexed, sqlite_db_path, vector_db_path) is None

with subtests.test(msg="normalize final check"):
old_imgs_to_be_removed, new_imgs_to_be_indexed = get_input_directory_status(default_test_img_dir, sqlite_db_path)
assert len(old_imgs_to_be_removed) == 0
assert len(new_imgs_to_be_indexed) == 0


def test_remove(subtests):

with subtests.test(msg="move from default to alt"):
shutil.move(test_img_default_location, alt_test_img_dir)
time.sleep(5)
assert os.path.exists(test_img_alt_location), "FAILURE: image could not be moved from default to alt location"

with subtests.test(msg="remove old imgs"):
old_imgs_to_be_removed, new_imgs_to_be_indexed = get_input_directory_status(default_test_img_dir, sqlite_db_path)
assert len(old_imgs_to_be_removed) > 0
assert len(new_imgs_to_be_indexed) == 0
assert remove(old_imgs_to_be_removed, sqlite_db_path, vector_db_path) is None, "FAILURE: removing moving image"

with subtests.test(msg="remove final check"):
old_imgs_to_be_removed, new_imgs_to_be_indexed = get_input_directory_status(default_test_img_dir, sqlite_db_path)
assert len(old_imgs_to_be_removed) == 0
assert len(new_imgs_to_be_indexed) == 0


def test_add(subtests):
with subtests.test(msg="move from alt to default"):
shutil.move(test_img_alt_location, default_test_img_dir)
time.sleep(5)
assert os.path.exists(test_img_default_location), "FAILURE: image could not be moved from alt to default location"

with subtests.test(msg="add new img"):
old_imgs_to_be_removed, new_imgs_to_be_indexed = get_input_directory_status(default_test_img_dir, sqlite_db_path)
assert len(new_imgs_to_be_indexed) > 0
assert len(old_imgs_to_be_removed) == 0
assert add(new_imgs_to_be_indexed, sqlite_db_path, vector_db_path) is None, "FAILURE: adding image"

with subtests.test(msg="add final check"):
old_imgs_to_be_removed, new_imgs_to_be_indexed = get_input_directory_status(default_test_img_dir, sqlite_db_path)
assert len(old_imgs_to_be_removed) == 0
assert len(new_imgs_to_be_indexed) == 0
Binary file added meme_search/tests/utilities/test_dbs/memes.faiss
Binary file not shown.
Empty file.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Empty file.
20 changes: 20 additions & 0 deletions meme_search/tests/utilities/test_imgs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import os
from meme_search import meme_search_root_dir
from meme_search.utilities.imgs import collect_img_paths, allowable_extensions


def list_files_in_directory(directory_path):
try:
files = [f for f in os.listdir(directory_path) if (os.path.isfile(os.path.join(directory_path, f)) and f.split(".")[-1] in allowable_extensions)]
return files
except OSError as error:
print(f"Error accessing directory '{directory_path}': {error}")
return []


def test_collect_img_paths():
img_data_path = meme_search_root_dir + "/data/input"
actual_files = list_files_in_directory(img_data_path)
output_files = collect_img_paths(img_data_path)
output_files = [v.split("/")[-1] for v in output_files]
assert len(set(actual_files) - set(output_files)) == 0
14 changes: 14 additions & 0 deletions meme_search/tests/utilities/test_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from meme_search.utilities.query import complete_query
import pytest


test_queries = [
("two capsules", "test_meme_5.jpg"),
("no", "test_meme_9.jpg")
]


@pytest.mark.parametrize("query, top_result", test_queries)
def test_complete_query(query, top_result):
unique_img_entries = complete_query(query)
assert unique_img_entries[0]["img_path"].split("/")[-1] == top_result

0 comments on commit 31f471b

Please sign in to comment.