Skip to content

Commit

Permalink
feat: update StructuredOutputParser and CombiningOutputParser prompts…
Browse files Browse the repository at this point in the history
… for increased reliability, add integration tests (langchain-ai#1109)

* feat: integration tests for StructuredOutputParser

* Update StructuredOutputParser prompt to improve reliability

* Fix test

* Fix combining output parser and add integration test for it, other tests

---------

Co-authored-by: Jacob Lee <[email protected]>
  • Loading branch information
jacobrosenthal and Jacob Lee authored May 5, 2023
1 parent caa0203 commit ca8302f
Show file tree
Hide file tree
Showing 6 changed files with 270 additions and 40 deletions.
18 changes: 14 additions & 4 deletions langchain/src/output_parsers/combining.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ export type CombinedOutput = Record<string, any>;
export class CombiningOutputParser extends BaseOutputParser {
parsers: BaseOutputParser[];

outputDelimiter = "-----";

constructor(...parsers: BaseOutputParser[]) {
super();
this.parsers = parsers;
Expand All @@ -19,15 +21,20 @@ export class CombiningOutputParser extends BaseOutputParser {
async parse(input: string, callbacks?: Callbacks): Promise<CombinedOutput> {
const inputs = input
.trim()
.split(/Output \d+:/)
.split(
new RegExp(`${this.outputDelimiter}Output \\d+${this.outputDelimiter}`)
)
.slice(1);
const ret: CombinedOutput = {};
for (const [i, p] of this.parsers.entries()) {
let parsed;
try {
const extracted = inputs[i].includes("```")
let extracted = inputs[i].includes("```")
? inputs[i].trim().split(/```/)[1]
: inputs[i].trim();
if (extracted.endsWith(this.outputDelimiter)) {
extracted = extracted.slice(0, -this.outputDelimiter.length);
}
parsed = await p.parse(extracted, callbacks);
} catch (e) {
parsed = await p.parse(input.trim(), callbacks);
Expand All @@ -39,9 +46,12 @@ export class CombiningOutputParser extends BaseOutputParser {

getFormatInstructions(): string {
return `${[
`Return the following ${this.parsers.length} outputs, each formatted as described below:`,
`Return the following ${this.parsers.length} outputs, each formatted as described below. Include the delimiter characters "${this.outputDelimiter}" in your response:`,
...this.parsers.map(
(p, i) => `Output ${i + 1}:\n${p.getFormatInstructions().trim()}`
(p, i) =>
`${this.outputDelimiter}Output ${i + 1}${this.outputDelimiter}\n${p
.getFormatInstructions()
.trim()}\n${this.outputDelimiter}`
),
].join("\n\n")}\n`;
}
Expand Down
15 changes: 10 additions & 5 deletions langchain/src/output_parsers/structured.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,18 @@ export class StructuredOutputParser<
}

getFormatInstructions(): string {
return `The output should be formatted as a JSON instance that conforms to the JSON schema below.
return `You must format your output as a JSON value that adheres to a given "JSON Schema" instance.
As an example, for the schema {{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}}}}
the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of the schema. The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted.
"JSON Schema" is a declarative language that allows you to annotate and validate JSON documents.
Here is the output schema:
\`\`\`
For example, the example "JSON Schema" instance {{"properties": {{"foo": {{"description": "a list of test words", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}}}}
would match an object with one required property, "foo". The "type" property specifies "foo" must be an "array", and the "description" property semantically describes it as "a list of test words". The items within "foo" must be strings.
Thus, the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of this example "JSON Schema". The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted.
Your output will be parsed and type-checked according to the provided schema instance, so make sure all fields in your output match exactly!
Here is the JSON Schema instance your output must adhere to:
\`\`\`json
${JSON.stringify(zodToJsonSchema(this.schema))}
\`\`\`
`;
Expand Down
46 changes: 46 additions & 0 deletions langchain/src/output_parsers/tests/combining.int.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import { test } from "@jest/globals";

import { OpenAI } from "../../llms/openai.js";
import { PromptTemplate } from "../../prompts/index.js";
import {
StructuredOutputParser,
RegexParser,
CombiningOutputParser,
} from "../index.js";

test("CombiningOutputParser", async () => {
const answerParser = StructuredOutputParser.fromNamesAndDescriptions({
answer: "answer to the user's question",
source: "source used to answer the user's question, should be a website.",
});

const confidenceParser = new RegexParser(
/Confidence: (A|B|C), Explanation: (.*)/,
["confidence", "explanation"],
"noConfidence"
);

const parser = new CombiningOutputParser(answerParser, confidenceParser);
const formatInstructions = parser.getFormatInstructions();

const prompt = new PromptTemplate({
template:
"Answer the users question as best as possible.\n{format_instructions}\n{question}",
inputVariables: ["question"],
partialVariables: { format_instructions: formatInstructions },
});

const model = new OpenAI({ temperature: 0 });

const input = await prompt.format({
question: "What is the capital of France?",
});

console.log(input);

const response = await model.call(input);

console.log(response);

console.log(await parser.parse(response));
});
31 changes: 20 additions & 11 deletions langchain/src/output_parsers/tests/combining.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,31 +17,40 @@ test("CombiningOutputParser", async () => {
);

expect(parser.getFormatInstructions()).toMatchInlineSnapshot(`
"Return the following 2 outputs, each formatted as described below:
"Return the following 2 outputs, each formatted as described below. Include the delimiter characters "-----" in your response:
Output 1:
The output should be formatted as a JSON instance that conforms to the JSON schema below.
-----Output 1-----
You must format your output as a JSON value that adheres to a given "JSON Schema" instance.
As an example, for the schema {{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}}}}
the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of the schema. The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted.
"JSON Schema" is a declarative language that allows you to annotate and validate JSON documents.
Here is the output schema:
\`\`\`
For example, the example "JSON Schema" instance {{"properties": {{"foo": {{"description": "a list of test words", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}}}}
would match an object with one required property, "foo". The "type" property specifies "foo" must be an "array", and the "description" property semantically describes it as "a list of test words". The items within "foo" must be strings.
Thus, the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of this example "JSON Schema". The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted.
Your output will be parsed and type-checked according to the provided schema instance, so make sure all fields in your output match exactly!
Here is the JSON Schema instance your output must adhere to:
\`\`\`json
{"type":"object","properties":{"url":{"type":"string","description":"A link to the resource"}},"required":["url"],"additionalProperties":false,"$schema":"http://json-schema.org/draft-07/schema#"}
\`\`\`
-----
Output 2:
-----Output 2-----
Your response should match the following regex: /Confidence: (A|B|C), Explanation: (.*)/
-----
"
`);

expect(
await parser.parse(
`Output 0:
`-----Output 0-----
{"url": "https://en.wikipedia.org/wiki/Paris"}
-----
Output 1:
Confidence: A, Explanation: Because it is the capital of France.`
-----Output 1-----
Confidence: A, Explanation: Because it is the capital of France.
-----`
)
).toMatchInlineSnapshot(`
{
Expand Down
140 changes: 140 additions & 0 deletions langchain/src/output_parsers/tests/structured.int.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
import { expect, test } from "@jest/globals";

import { StructuredOutputParser } from "../structured.js";
import { OpenAI } from "../../llms/openai.js";
import { ChatOpenAI } from "../../chat_models/openai.js";
import { LLMChain } from "../../chains/index.js";
import {
ChatPromptTemplate,
PromptTemplate,
SystemMessagePromptTemplate,
} from "../../prompts/index.js";

test("StructuredOutputParser deals special chars in prompt with llm model", async () => {
const model = new OpenAI({
temperature: 0,
});

const parser = StructuredOutputParser.fromNamesAndDescriptions({
question1: "a very on-topic question",
question2: "a super weird question",
question3: "an on-topic, but slightly creative",
});

const prompt = new PromptTemplate({
template: "context:\n{context}\n---{format_instructions}",
inputVariables: ["context"],
partialVariables: {
format_instructions: parser.getFormatInstructions(),
},
});

const chain = new LLMChain({
llm: model,
prompt,
outputParser: parser,
outputKey: "questions",
});

const result = await chain.call({
context: `The U2 ur-myth begins in 1976, when drummer Larry Mullen wanted to form a band.
He picked four school friends from Mount Temple Comprehensive School in Dublin.
“Larry formed U2,” says Paul McGuinness, U2’s manager from the beginning. “He
auditioned the other three and he chose them. The first name of U2 was the Larry
Mullen band,” McGuinness laughs. “And he never lets us forget it.” `,
});

console.log("response", result);

expect(result.questions).toHaveProperty("question1");
expect(result.questions).toHaveProperty("question2");
expect(result.questions).toHaveProperty("question3");
});

test("StructuredOutputParser deals special chars in prompt with chat model", async () => {
const model = new ChatOpenAI({
temperature: 0,
});

const parser = StructuredOutputParser.fromNamesAndDescriptions({
question1: "a very on-topic question",
question2: "a super weird question",
question3: "an on-topic, but slightly creative",
});

const prompt = new ChatPromptTemplate({
promptMessages: [
SystemMessagePromptTemplate.fromTemplate("context:\n{context}\n---"),
SystemMessagePromptTemplate.fromTemplate(`{format_instructions}`),
],
inputVariables: ["context"],
partialVariables: {
format_instructions: parser.getFormatInstructions(),
},
});

const chain = new LLMChain({
llm: model,
prompt,
outputParser: parser,
outputKey: "questions",
});

const result = await chain.call({
context: `The U2 ur-myth begins in 1976, when drummer Larry Mullen wanted to form a band.
He picked four school friends from Mount Temple Comprehensive School in Dublin.
“Larry formed U2,” says Paul McGuinness, U2’s manager from the beginning. “He
auditioned the other three and he chose them. The first name of U2 was the Larry
Mullen band,” McGuinness laughs. “And he never lets us forget it.” `,
});

console.log("response", result);

expect(result.questions).toHaveProperty("question1");
expect(result.questions).toHaveProperty("question2");
expect(result.questions).toHaveProperty("question3");
});

test("StructuredOutputParser deals special chars in prompt with chat model 2", async () => {
const model = new ChatOpenAI({
temperature: 0,
});

const parser = StructuredOutputParser.fromNamesAndDescriptions({
question1: "a very on-topic question",
question2: "a super weird question",
question3: "an on-topic, but slightly creative",
});

const prompt = new ChatPromptTemplate({
promptMessages: [
SystemMessagePromptTemplate.fromTemplate("context:\n{context}\n---"),
SystemMessagePromptTemplate.fromTemplate(`{format_instructions}`),
],
inputVariables: ["context"],
partialVariables: {
format_instructions: parser.getFormatInstructions(),
},
});

const chain = new LLMChain({
llm: model,
prompt,
outputKey: "questions",
});

const result = await chain.call({
context: `The U2 ur-myth begins in 1976, when drummer Larry Mullen wanted to form a band.
He picked four school friends from Mount Temple Comprehensive School in Dublin.
“Larry formed U2,” says Paul McGuinness, U2’s manager from the beginning. “He
auditioned the other three and he chose them. The first name of U2 was the Larry
Mullen band,” McGuinness laughs. “And he never lets us forget it.” `,
});

console.log("response", result);
const parsed = await parser.parse(result.questions);

expect(parsed).toHaveProperty("question1");
expect(parsed).toHaveProperty("question2");
expect(parsed).toHaveProperty("question3");
});
Loading

0 comments on commit ca8302f

Please sign in to comment.