Skip to content

Commit

Permalink
Configure stopping criteria
Browse files Browse the repository at this point in the history
  • Loading branch information
VikParuchuri committed Oct 9, 2023
1 parent 9934ee9 commit bb3c043
Show file tree
Hide file tree
Showing 10 changed files with 60 additions and 24 deletions.
17 changes: 16 additions & 1 deletion app/lesson/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ async def generate_lesson(
outline: List[str],
revision: int,
research_notes: List[ResearchNote] | None = None,
sections_per_generation: int = settings.SECTIONS_PER_GENERATION,
) -> List[AllLessonComponentData] | None:
# Add numbers to the outline - needed for generating the lesson
numbered_outline = outline
Expand Down Expand Up @@ -62,9 +63,14 @@ async def generate_lesson(
current_section = f"{last_section.strip()}\n\n{current_section_header.strip()}"
current_section = f"{current_section}\n"

# When to stop generation
stop_section = None
if generated_sections + sections_per_generation < len(numbered_outline):
stop_section = numbered_outline[generated_sections + sections_per_generation]

# Filter research notes to save tokens, only keep notes relevant to the next 5 sections
# Find the indices of the next sections
future_sections = set(list(range(generated_sections, len(numbered_outline)))[:5])
future_sections = set(list(range(generated_sections, len(numbered_outline)))[:sections_per_generation])
selected_research_notes = None
if research_notes is not None:
selected_research_notes = []
Expand All @@ -84,6 +90,7 @@ async def generate_lesson(
research_notes=selected_research_notes,
include_examples=settings.INCLUDE_EXAMPLES,
cache=use_cache,
stop_section=stop_section,
)
new_components = []
new_component_keys = []
Expand Down Expand Up @@ -137,6 +144,7 @@ async def generate_single_lesson_chunk(
research_notes: List[ResearchNote] | None,
include_examples: bool,
cache: bool,
stop_section: str | None = None,
) -> AsyncGenerator[List[AllLessonComponentData], None]:
response = generate_lessons(
numbered_outline,
Expand All @@ -148,8 +156,15 @@ async def generate_single_lesson_chunk(
research_notes=research_notes,
include_examples=include_examples,
cache=cache,
stop_section=stop_section,
)

section_start = f"---{ComponentNames.section}"

async for chunk in response:
# Remove the final section header from the chunk
# This happens when we hit the stop token
if chunk.strip().endswith(section_start):
chunk = chunk.strip()[:-len(section_start)]
new_components = parse_lesson_markdown(chunk)
yield new_components
16 changes: 8 additions & 8 deletions app/llm/adaptors/oai.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ async def oai_chat_wrapped(
history: List,
temperature: float,
max_tokens: int,
stop_tokens: Optional[List] = None,
stop_sequences: Optional[List] = None,
model: str = settings.LLM_TYPE,
) -> AsyncGenerator[str, None]:
response = await openai.ChatCompletion.acreate(
Expand All @@ -39,7 +39,7 @@ async def oai_chat_wrapped(
temperature=temperature,
max_tokens=max_tokens,
n=1,
stop=stop_tokens,
stop=stop_sequences,
stream=True,
)
async for chunk in response:
Expand All @@ -54,7 +54,7 @@ async def oai_prompt_wrapped(
prompt: str,
temperature: float,
max_tokens: int,
stop_tokens: Optional[List] = None,
stop_sequences: Optional[List] = None,
model: str = settings.LLM_TYPE,
) -> AsyncGenerator[str, None]:
response = await openai.Completion.acreate(
Expand All @@ -63,7 +63,7 @@ async def oai_prompt_wrapped(
temperature=temperature,
max_tokens=max_tokens,
n=1,
stop=stop_tokens,
stop=stop_sequences,
stream=True,
)
async for chunk in response:
Expand All @@ -78,7 +78,7 @@ async def oai_prompt_response(
temperature: float = settings.LLM_TEMPERATURE,
timeout: int = settings.LLM_TIMEOUT,
max_tokens: int = settings.LLM_MAX_RESPONSE_TOKENS,
stop_tokens=None,
stop_sequences=None,
model: str = settings.LLM_TYPE,
) -> Optional[AsyncGenerator[LLMResponse, None]]:
response_tokens = 0
Expand All @@ -88,7 +88,7 @@ async def oai_prompt_response(
temperature,
max_tokens,
timeout=timeout,
stop_tokens=stop_tokens,
stop_sequences=stop_sequences,
model=model,
)
async for chunk in response:
Expand All @@ -113,7 +113,7 @@ async def oai_chat_response(
timeout: int = settings.LLM_TIMEOUT,
max_tokens: int = settings.LLM_MAX_RESPONSE_TOKENS,
history=None,
stop_tokens=None,
stop_sequences=None,
model: str = settings.LLM_TYPE,
) -> Optional[AsyncGenerator[LLMResponse, None]]:
current_message = {"role": "user", "content": prompt}
Expand All @@ -130,7 +130,7 @@ async def oai_chat_response(
temperature,
max_tokens,
timeout=timeout,
stop_tokens=stop_tokens,
stop_sequences=stop_sequences,
model=model,
)
async for chunk in response:
Expand Down
1 change: 0 additions & 1 deletion app/llm/generators/concepts.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ class CourseGeneratedConcepts(BaseModel):
temperature=0.7,
max_tokens=256,
timeout=40,
stop_tokens=None,
prompt_type="concept",
model=settings.LLM_INSTRUCT_TYPE,
)
Expand Down
10 changes: 7 additions & 3 deletions app/llm/generators/lesson.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@
lesson_settings = GenerationSettings(
temperature=0.4,
max_tokens=6000,
timeout=480,
stop_tokens=None,
timeout=1200,
prompt_type="lesson",
)

Expand Down Expand Up @@ -121,6 +120,7 @@ async def generate_lessons(
include_examples: bool = True,
update_after_chars: int = 500,
cache: bool = True,
stop_section: str | None = None,
) -> AsyncGenerator[str, None]:
prompt = lesson_prompt(
outline,
Expand All @@ -133,7 +133,11 @@ async def generate_lessons(
)

text = ""
response = generate_response(prompt, lesson_settings, cache=cache, revision=revision)
stop_sequences = None
if stop_section is not None:
stop_sequences = [stop_section]

response = generate_response(prompt, lesson_settings, cache=cache, revision=revision, stop_sequences=stop_sequences)
chunk_len = 0

# Yield text in batches, to avoid creating too many DB models
Expand Down
1 change: 0 additions & 1 deletion app/llm/generators/outline.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
temperature=0.6,
max_tokens=2048,
timeout=60,
stop_tokens=None,
prompt_type="outline",
model=settings.LLM_INSTRUCT_TYPE,
)
Expand Down
1 change: 0 additions & 1 deletion app/llm/generators/topic.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
temperature=0.9,
max_tokens=512,
timeout=40,
stop_tokens=None,
prompt_type="topic",
model=settings.LLM_INSTRUCT_TYPE,
)
Expand Down
22 changes: 18 additions & 4 deletions app/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,30 @@ async def generate_response(
max_tries: int = 2,
cache: bool = True,
revision: int = 1,
stop_sequences: Optional[List[str]] = None,
) -> AsyncGenerator[str, None]:
temperature = prompt_settings.temperature
max_tokens = prompt_settings.max_tokens
timeout = prompt_settings.timeout
stop_tokens = prompt_settings.stop_tokens
prompt_stops = prompt_settings.stop_sequences
prompt_type = prompt_settings.prompt_type
model = (
prompt_settings.model or settings.LLM_TYPE
) # Use default model if not specified

# Stop sequences for the llm
stops = []
if prompt_stops is not None:
stops.extend(prompt_stops)
if stop_sequences is not None:
stops.extend(stop_sequences)

# Only support up to 4 stop sequences
if len(stops) == 0:
stops = None
else:
stops = stops[:4]

# Remove utf-8 surrogate characters
prompt = fix_unicode_text(prompt)

Expand Down Expand Up @@ -84,7 +98,7 @@ async def generate_response(
timeout,
max_tokens,
history,
stop_tokens,
stops,
model=model,
)
case "gpt-3.5-turbo-instruct":
Expand All @@ -102,7 +116,7 @@ async def generate_response(
temperature,
timeout,
max_tokens,
stop_tokens,
stops,
model=model,
)
case _:
Expand All @@ -127,7 +141,7 @@ async def generate_response(
temperature,
timeout,
max_tokens,
stop_tokens,
stops,
model=model,
)
break
Expand Down
2 changes: 1 addition & 1 deletion app/llm/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class GenerationSettings(BaseModel):
temperature: float
max_tokens: int
timeout: int
stop_tokens: Optional[List[str]]
stop_sequences: Optional[List[str]]
prompt_type: str
component_name: Optional[str]
model: Optional[str]
4 changes: 3 additions & 1 deletion app/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class Settings(BaseSettings):

# Content
SECTIONS_PER_LESSON: int = 30 # Lower this to make books shorter
SECTIONS_PER_GENERATION: int = 5 # How many sections to generate in one prompt
MAX_DOWNLOAD_SIZE: int = 6 * 1024 * 1024 # Max pdf size to download, 6 MB
FINETUNED: bool = False # If we're using a finetuned textbook gen model
INCLUDE_EXAMPLES: bool = (
Expand All @@ -36,7 +37,7 @@ class Settings(BaseSettings):
}

LLM_TEMPERATURE: float = 0.5
LLM_TIMEOUT: int = 120
LLM_TIMEOUT: int = 480
LLM_MAX_RESPONSE_TOKENS: int = 2048
OPENAI_KEY: str = ""
OPENAI_BASE_URL: Optional[str] = None
Expand All @@ -56,6 +57,7 @@ class Settings(BaseSettings):
# General
THREADS_PER_WORKER: int = 1 # How many threads to use per worker process to save RAM
RAY_CACHE_PATH: Optional[str] = None # Where to save ray cache
RAY_DASHBOARD_HOST: str = "0.0.0.0"

class Config:
env_file = find_dotenv("local.env")
Expand Down
10 changes: 7 additions & 3 deletions book_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def get_json_data_from_course(course: Course, extended_fields=False):
return json.dumps(json_data)


async def generate_single_course(model, course_data: Dict | str, revision=1, outline_items=12):
async def generate_single_course(model, course_data: Dict | str, revision=1, outline_items=12, cache_only=False):
components = ["exercise", "example"]

outline = None
Expand All @@ -69,6 +69,9 @@ async def generate_single_course(model, course_data: Dict | str, revision=1, out
await asyncio.sleep(.001) # Sleep to avoid high CPU usage with many workers
return course

if cache_only:
return None

if not outline:
# Only generate outline if one was not passed in
concepts = await create_course_concepts(course_name, revision)
Expand Down Expand Up @@ -123,7 +126,7 @@ async def generate_single_course(model, course_data: Dict | str, revision=1, out

async def _process_course(model, topic, args):
try:
return await generate_single_course(model, topic, revision=args.revision)
return await generate_single_course(model, topic, revision=args.revision, cache_only=args.cache_only)
except Exception as e:
debug_print_trace()
print(f"Unhandled error generating course: {e}")
Expand Down Expand Up @@ -176,6 +179,7 @@ def to_iterator(obj_ids):
parser.add_argument("--extended-fields", action="store_true", default=False, help="Include extended fields in output")
parser.add_argument("--no_cache", action="store_true", default=False, help="Don't use the cache")
parser.add_argument("--revision", type=int, default=1, help="Revision number for the course. Change this to avoid hitting cache if you want to regenerate a course.")
parser.add_argument("--cache-only", action="store_true", default=False, help="Only use the cache, don't generate any new courses")

args = parser.parse_args()

Expand All @@ -202,7 +206,7 @@ def to_iterator(obj_ids):
total_processes = math.ceil(args.workers / settings.THREADS_PER_WORKER)
func = process_courses

ray.init(num_cpus=total_processes, storage=settings.RAY_CACHE_PATH, _temp_dir=settings.RAY_CACHE_PATH)
ray.init(num_cpus=total_processes, storage=settings.RAY_CACHE_PATH, _temp_dir=settings.RAY_CACHE_PATH, dashboard_host=settings.RAY_DASHBOARD_HOST)

model = SentenceTransformer("thenlper/gte-small")
model_ref = ray.put(model)
Expand Down

0 comments on commit bb3c043

Please sign in to comment.