Skip to content

Commit

Permalink
clarify public disclosures to prevent 'sorry I can't do that' response
Browse files Browse the repository at this point in the history
  • Loading branch information
chance-on-brink committed Nov 16, 2024
1 parent 1f05fe4 commit 6c72ce2
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 21 deletions.
15 changes: 8 additions & 7 deletions automated_updates/modules/process/parse_house_clean_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,18 @@
import re

def assets_from_house_clean_image_to_csv(input_image_path):

base64_image = encode_image(input_image_path)
response = send_to_api("is there an assets table in the image with a column called 'Asset'? answer Y or N only.", base64_image)
if False:
# this idea was to skip pages that don't list assets. it is a waste of tokens and doesn't seem to hurt accuracy to just shove all pages
response = send_to_api("is there an assets table in the image with a column called 'Asset'? answer Y or N only.", base64_image)

if response.lower() == 'n':
# not present, skip the page
print('skipped')
return []
if response.lower() == 'n':
# not present, skip the page
print('skipped')
return []

# use '|' separator to avoid characters in asset names
response = send_to_api("get the asset names in the 'Asset' column of the Assets table. return only a | separated list. no other commentary.", base64_image)
response = send_to_api("get the asset names in the 'Asset' column of the Assets table in the public disclosure form. return only a | separated list. no other commentary.", base64_image)
asset_list = [response.strip() for response in response.split("|")]

return asset_list
Expand Down
16 changes: 9 additions & 7 deletions automated_updates/modules/process/parse_house_messy_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,18 @@

def assets_from_house_messy_image_to_csv(input_image_path):
base64_image = encode_image(input_image_path)
response = send_to_api(message="is this the schedule A part of the form that lists assets? answer Y or N only.",
base64_image=base64_image)
if False:
# this idea was to skip pages that don't list assets. it is a waste of tokens and doesn't seem to hurt accuracy to just shove all pages
response = send_to_api(message="is this the schedule A part of the form that lists assets? answer Y or N only.",
base64_image=base64_image)

if response.lower() == 'n':
# not schedule A, skip the page
print('skipped')
return []
if response.lower() == 'n':
# not schedule A, skip the page
print('skipped')
return []

# use '|' separator to avoid characters in asset names
response = send_to_api(message="get the asset names in the image, return them in a | separated list only, no other commentary.",
response = send_to_api(message="get the asset names in the public disclosure form, return them in a | separated list only, no other commentary.",
base64_image=base64_image,
model="gpt-4o")
asset_list = [response.strip() for response in response.split("|")]
Expand Down
16 changes: 9 additions & 7 deletions automated_updates/modules/process/parse_senate_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,18 @@

def assets_from_senate_image_to_csv(input_image_path):
base64_image = encode_image(input_image_path)
response = send_to_api(message="does this part of the form list asset disclosures? answer Y or N only.",
base64_image=base64_image)
if False:
# this idea was to skip pages that don't list assets. it is a waste of tokens and doesn't seem to hurt accuracy to just shove all pages
response = send_to_api(message="does this part of the form list asset disclosures? answer Y or N only.",
base64_image=base64_image)

if response.lower() == 'n':
print('skipped')
# not schedule A, skip the page
return []
if response.lower() == 'n':
print('skipped')
# not schedule A, skip the page
return []

# use '|' separator to avoid characters in asset names
response = send_to_api(message="get the asset names in the image. return them in a | separated list only. no other commentary.",
response = send_to_api(message="get the asset names in the public disclosure form. return them in a | separated list only. no other commentary.",
base64_image=base64_image,
model='gpt-4o')
asset_list = [response.strip() for response in response.split("|")]
Expand Down

0 comments on commit 6c72ce2

Please sign in to comment.