Skip to content

Commit

Permalink
Merge pull request VikParuchuri#259 from VikParuchuri/dev
Browse files Browse the repository at this point in the history
Bugfixes and `pdftext` improvements
  • Loading branch information
iammosespaulr authored Dec 12, 2024
2 parents 0ce57a2 + a3fde2f commit b46d5ce
Show file tree
Hide file tree
Showing 6 changed files with 590 additions and 757 deletions.
12 changes: 7 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -230,13 +230,15 @@ Setting the `LAYOUT_BATCH_SIZE` env var properly will make a big difference when
from PIL import Image
from surya.detection import batch_text_detection
from surya.layout import batch_layout_detection
from surya.model.layout.model import load_model, load_processor
from surya.model.detection.model import load_model as load_det_model, load_processor as load_det_processor
from surya.model.layout.model import load_model as load_layout_model
from surya.model.layout.processor import load_processor as load_layout_processor

image = Image.open(IMAGE_PATH)
model = load_model()
processor = load_processor()
det_model = load_model()
det_processor = load_processor()
model = load_layout_model()
processor = load_layout_processor()
det_model = load_det_model()
det_processor = load_det_processor()

# layout_predictions is a list of dicts, one per image
line_predictions = batch_text_detection([image], det_model, det_processor)
Expand Down
10 changes: 5 additions & 5 deletions ocr_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,22 +204,22 @@ def page_count(pdf_file):
if text_det:
det_img, pred = text_detection(pil_image)
with col1:
st.image(det_img, caption="Detected Text", use_column_width=True)
st.image(det_img, caption="Detected Text", use_container_width=True)
st.json(pred.model_dump(exclude=["heatmap", "affinity_map"]), expanded=True)


# Run layout
if layout_det:
layout_img, pred = layout_detection(pil_image)
with col1:
st.image(layout_img, caption="Detected Layout", use_column_width=True)
st.image(layout_img, caption="Detected Layout", use_container_width=True)
st.json(pred.model_dump(exclude=["segmentation_map"]), expanded=True)

# Run OCR
if text_rec:
rec_img, pred = ocr(pil_image, pil_image_highres, languages)
with col1:
st.image(rec_img, caption="OCR Result", use_column_width=True)
st.image(rec_img, caption="OCR Result", use_container_width=True)
json_tab, text_tab = st.tabs(["JSON", "Text Lines (for debugging)"])
with json_tab:
st.json(pred.model_dump(), expanded=True)
Expand All @@ -230,8 +230,8 @@ def page_count(pdf_file):
if table_rec:
table_img, pred = table_recognition(pil_image, pil_image_highres, in_file, page_number - 1 if page_number else None, use_pdf_boxes, skip_table_detection)
with col1:
st.image(table_img, caption="Table Recognition", use_column_width=True)
st.image(table_img, caption="Table Recognition", use_container_width=True)
st.json([p.model_dump() for p in pred], expanded=True)

with col2:
st.image(pil_image, caption="Uploaded Image", use_column_width=True)
st.image(pil_image, caption="Uploaded Image", use_container_width=True)
Loading

0 comments on commit b46d5ce

Please sign in to comment.