Skip to content

Commit

Permalink
🚀 feat: Claude 3.7 Support + Reasoning (danny-avila#6008)
Browse files Browse the repository at this point in the history
* fix: missing console color methods for admin scripts

* feat: Anthropic Claude 3.7 Sonnet Support

* feat: update eventsource to version 3.0.2 and upgrade @modelcontextprotocol/sdk to 1.4.1

* fix: update DynamicInput to handle number type and improve initial value logic

* feat: first pass Anthropic Reasoning (Claude 3.7)

* feat: implement streaming support in AnthropicClient with reasoning UI handling

* feat: add missing xAI (grok) models
  • Loading branch information
danny-avila authored Feb 25, 2025
1 parent 0e71959 commit 50e8769
Show file tree
Hide file tree
Showing 17 changed files with 421 additions and 77 deletions.
2 changes: 1 addition & 1 deletion .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ PROXY=
#============#

ANTHROPIC_API_KEY=user_provided
# ANTHROPIC_MODELS=claude-3-5-haiku-20241022,claude-3-5-sonnet-20241022,claude-3-5-sonnet-latest,claude-3-5-sonnet-20240620,claude-3-opus-20240229,claude-3-sonnet-20240229,claude-3-haiku-20240307,claude-2.1,claude-2,claude-1.2,claude-1,claude-1-100k,claude-instant-1,claude-instant-1-100k
# ANTHROPIC_MODELS=claude-3-7-sonnet-latest,claude-3-7-sonnet-20250219,claude-3-5-haiku-20241022,claude-3-5-sonnet-20241022,claude-3-5-sonnet-latest,claude-3-5-sonnet-20240620,claude-3-opus-20240229,claude-3-sonnet-20240229,claude-3-haiku-20240307,claude-2.1,claude-2,claude-1.2,claude-1,claude-1-100k,claude-instant-1,claude-instant-1-100k
# ANTHROPIC_REVERSE_PROXY=

#============#
Expand Down
142 changes: 127 additions & 15 deletions api/app/clients/AnthropicClient.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ const {
getResponseSender,
validateVisionModel,
} = require('librechat-data-provider');
const { SplitStreamHandler, GraphEvents } = require('@librechat/agents');
const { encodeAndFormat } = require('~/server/services/Files/images/encode');
const {
truncateText,
Expand All @@ -19,9 +20,9 @@ const {
const { getModelMaxTokens, getModelMaxOutputTokens, matchModelName } = require('~/utils');
const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
const Tokenizer = require('~/server/services/Tokenizer');
const { logger, sendEvent } = require('~/config');
const { sleep } = require('~/server/utils');
const BaseClient = require('./BaseClient');
const { logger } = require('~/config');

const HUMAN_PROMPT = '\n\nHuman:';
const AI_PROMPT = '\n\nAssistant:';
Expand Down Expand Up @@ -68,6 +69,8 @@ class AnthropicClient extends BaseClient {
/** The key for the usage object's output tokens
* @type {string} */
this.outputTokensKey = 'output_tokens';
/** @type {SplitStreamHandler | undefined} */
this.streamHandler;
}

setOptions(options) {
Expand Down Expand Up @@ -125,7 +128,7 @@ class AnthropicClient extends BaseClient {
this.options.endpointType ?? this.options.endpoint,
this.options.endpointTokenConfig,
) ??
1500;
anthropicSettings.maxOutputTokens.reset(this.modelOptions.model);
this.maxPromptTokens =
this.options.maxPromptTokens || this.maxContextTokens - this.maxResponseTokens;

Expand Down Expand Up @@ -179,6 +182,14 @@ class AnthropicClient extends BaseClient {
options.defaultHeaders = {
'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15,prompt-caching-2024-07-31',
};
} else if (
this.supportsCacheControl &&
requestOptions?.model &&
requestOptions.model.includes('claude-3-7')
) {
options.defaultHeaders = {
'anthropic-beta': 'output-128k-2025-02-19,prompt-caching-2024-07-31',
};
} else if (this.supportsCacheControl) {
options.defaultHeaders = {
'anthropic-beta': 'prompt-caching-2024-07-31',
Expand Down Expand Up @@ -668,7 +679,7 @@ class AnthropicClient extends BaseClient {
* @returns {Promise<Anthropic.default.Message | Anthropic.default.Completion>} The response from the Anthropic client.
*/
async createResponse(client, options, useMessages) {
return useMessages ?? this.useMessages
return (useMessages ?? this.useMessages)
? await client.messages.create(options)
: await client.completions.create(options);
}
Expand All @@ -683,6 +694,7 @@ class AnthropicClient extends BaseClient {
return false;
}
if (
modelMatch === 'claude-3-7-sonnet' ||
modelMatch === 'claude-3-5-sonnet' ||
modelMatch === 'claude-3-5-haiku' ||
modelMatch === 'claude-3-haiku' ||
Expand All @@ -693,6 +705,35 @@ class AnthropicClient extends BaseClient {
return false;
}

getMessageMapMethod() {
/**
* @param {TMessage} msg
*/
return (msg) => {
if (msg.text != null && msg.text && msg.text.startsWith(':::thinking')) {
msg.text = msg.text.replace(/:::thinking.*?:::/gs, '').trim();
}

return msg;
};
}

/**
* @param {string[]} [intermediateReply]
* @returns {string}
*/
getStreamText(intermediateReply) {
if (!this.streamHandler) {
return intermediateReply?.join('') ?? '';
}

const reasoningText = this.streamHandler.reasoningTokens.join('');

const reasoningBlock = reasoningText.length > 0 ? `:::thinking\n${reasoningText}\n:::\n` : '';

return `${reasoningBlock}${this.streamHandler.tokens.join('')}`;
}

async sendCompletion(payload, { onProgress, abortController }) {
if (!abortController) {
abortController = new AbortController();
Expand All @@ -710,7 +751,6 @@ class AnthropicClient extends BaseClient {
user_id: this.user,
};

let text = '';
const {
stream,
model,
Expand All @@ -733,10 +773,46 @@ class AnthropicClient extends BaseClient {

if (this.useMessages) {
requestOptions.messages = payload;
requestOptions.max_tokens = maxOutputTokens || legacy.maxOutputTokens.default;
requestOptions.max_tokens =
maxOutputTokens || anthropicSettings.maxOutputTokens.reset(requestOptions.model);
} else {
requestOptions.prompt = payload;
requestOptions.max_tokens_to_sample = maxOutputTokens || 1500;
requestOptions.max_tokens_to_sample = maxOutputTokens || legacy.maxOutputTokens.default;
}

if (
this.options.thinking &&
requestOptions?.model &&
requestOptions.model.includes('claude-3-7')
) {
requestOptions.thinking = {
type: 'enabled',
};
}
if (requestOptions.thinking != null && this.options.thinkingBudget != null) {
requestOptions.thinking = {
...requestOptions.thinking,
budget_tokens: this.options.thinkingBudget,
};
}
if (
requestOptions.thinking != null &&
(requestOptions.max_tokens == null ||
requestOptions.thinking.budget_tokens > requestOptions.max_tokens)
) {
const maxTokens = anthropicSettings.maxOutputTokens.reset(requestOptions.model);
requestOptions.max_tokens = requestOptions.max_tokens ?? maxTokens;

logger.warn(
requestOptions.max_tokens === maxTokens
? '[AnthropicClient] max_tokens is not defined while thinking is enabled. Setting max_tokens to model default.'
: `[AnthropicClient] thinking budget_tokens (${requestOptions.thinking.budget_tokens}) exceeds max_tokens (${requestOptions.max_tokens}). Adjusting budget_tokens.`,
);

requestOptions.thinking.budget_tokens = Math.min(
requestOptions.thinking.budget_tokens,
Math.floor(requestOptions.max_tokens * 0.9),
);
}

if (this.systemMessage && this.supportsCacheControl === true) {
Expand All @@ -756,13 +832,17 @@ class AnthropicClient extends BaseClient {
}

logger.debug('[AnthropicClient]', { ...requestOptions });
this.streamHandler = new SplitStreamHandler({
accumulate: true,
runId: this.responseMessageId,
handlers: {
[GraphEvents.ON_RUN_STEP]: (event) => sendEvent(this.options.res, event),
[GraphEvents.ON_MESSAGE_DELTA]: (event) => sendEvent(this.options.res, event),
[GraphEvents.ON_REASONING_DELTA]: (event) => sendEvent(this.options.res, event),
},
});

const handleChunk = (currentChunk) => {
if (currentChunk) {
text += currentChunk;
onProgress(currentChunk);
}
};
let intermediateReply = this.streamHandler.tokens;

const maxRetries = 3;
const streamRate = this.options.streamRate ?? Constants.DEFAULT_STREAM_RATE;
Expand All @@ -782,14 +862,41 @@ class AnthropicClient extends BaseClient {
}
});

/** @param {string} chunk */
const handleChunk = (chunk) => {
this.streamHandler.handle({
choices: [
{
delta: {
content: chunk,
},
},
],
});
};
/** @param {string} chunk */
const handleReasoningChunk = (chunk) => {
this.streamHandler.handle({
choices: [
{
delta: {
reasoning_content: chunk,
},
},
],
});
};

for await (const completion of response) {
// Handle each completion as before
const type = completion?.type ?? '';
if (tokenEventTypes.has(type)) {
logger.debug(`[AnthropicClient] ${type}`, completion);
this[type] = completion;
}
if (completion?.delta?.text) {
if (completion?.delta?.thinking) {
handleReasoningChunk(completion.delta.thinking);
} else if (completion?.delta?.text) {
handleChunk(completion.delta.text);
} else if (completion.completion) {
handleChunk(completion.completion);
Expand All @@ -808,6 +915,10 @@ class AnthropicClient extends BaseClient {

if (attempts < maxRetries) {
await delayBeforeRetry(attempts, 350);
} else if (this.streamHandler && this.streamHandler.reasoningTokens.length) {
return this.getStreamText();
} else if (intermediateReply.length > 0) {
return this.getStreamText(intermediateReply);
} else {
throw new Error(`Operation failed after ${maxRetries} attempts: ${error.message}`);
}
Expand All @@ -823,8 +934,7 @@ class AnthropicClient extends BaseClient {
}

await processResponse.bind(this)();

return text.trim();
return this.getStreamText(intermediateReply);
}

getSaveOptions() {
Expand All @@ -834,6 +944,8 @@ class AnthropicClient extends BaseClient {
promptPrefix: this.options.promptPrefix,
modelLabel: this.options.modelLabel,
promptCache: this.options.promptCache,
thinking: this.options.thinking,
thinkingBudget: this.options.thinkingBudget,
resendFiles: this.options.resendFiles,
iconURL: this.options.iconURL,
greeting: this.options.greeting,
Expand Down
6 changes: 6 additions & 0 deletions api/models/schema/defaults.js
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,12 @@ const conversationPreset = {
promptCache: {
type: Boolean,
},
thinking: {
type: Boolean,
},
thinkingBudget: {
type: Number,
},
system: {
type: String,
},
Expand Down
12 changes: 12 additions & 0 deletions api/models/tx.js
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ const tokenValues = Object.assign(
'claude-3-sonnet': { prompt: 3, completion: 15 },
'claude-3-5-sonnet': { prompt: 3, completion: 15 },
'claude-3.5-sonnet': { prompt: 3, completion: 15 },
'claude-3-7-sonnet': { prompt: 3, completion: 15 },
'claude-3.7-sonnet': { prompt: 3, completion: 15 },
'claude-3-5-haiku': { prompt: 0.8, completion: 4 },
'claude-3.5-haiku': { prompt: 0.8, completion: 4 },
'claude-3-haiku': { prompt: 0.25, completion: 1.25 },
Expand All @@ -110,6 +112,14 @@ const tokenValues = Object.assign(
'gemini-1.5': { prompt: 2.5, completion: 10 },
'gemini-pro-vision': { prompt: 0.5, completion: 1.5 },
gemini: { prompt: 0.5, completion: 1.5 },
'grok-2-vision-1212': { prompt: 2.0, completion: 10.0 },
'grok-2-vision-latest': { prompt: 2.0, completion: 10.0 },
'grok-2-vision': { prompt: 2.0, completion: 10.0 },
'grok-vision-beta': { prompt: 5.0, completion: 15.0 },
'grok-2-1212': { prompt: 2.0, completion: 10.0 },
'grok-2-latest': { prompt: 2.0, completion: 10.0 },
'grok-2': { prompt: 2.0, completion: 10.0 },
'grok-beta': { prompt: 5.0, completion: 15.0 },
},
bedrockValues,
);
Expand All @@ -121,6 +131,8 @@ const tokenValues = Object.assign(
* @type {Object.<string, {write: number, read: number }>}
*/
const cacheTokenValues = {
'claude-3.7-sonnet': { write: 3.75, read: 0.3 },
'claude-3-7-sonnet': { write: 3.75, read: 0.3 },
'claude-3.5-sonnet': { write: 3.75, read: 0.3 },
'claude-3-5-sonnet': { write: 3.75, read: 0.3 },
'claude-3.5-haiku': { write: 1, read: 0.08 },
Expand Down
41 changes: 41 additions & 0 deletions api/models/tx.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,20 @@ describe('getValueKey', () => {
expect(getValueKey('chatgpt-4o-latest-0718')).toBe('gpt-4o');
});

it('should return "claude-3-7-sonnet" for model type of "claude-3-7-sonnet-"', () => {
expect(getValueKey('claude-3-7-sonnet-20240620')).toBe('claude-3-7-sonnet');
expect(getValueKey('anthropic/claude-3-7-sonnet')).toBe('claude-3-7-sonnet');
expect(getValueKey('claude-3-7-sonnet-turbo')).toBe('claude-3-7-sonnet');
expect(getValueKey('claude-3-7-sonnet-0125')).toBe('claude-3-7-sonnet');
});

it('should return "claude-3.7-sonnet" for model type of "claude-3.7-sonnet-"', () => {
expect(getValueKey('claude-3.7-sonnet-20240620')).toBe('claude-3.7-sonnet');
expect(getValueKey('anthropic/claude-3.7-sonnet')).toBe('claude-3.7-sonnet');
expect(getValueKey('claude-3.7-sonnet-turbo')).toBe('claude-3.7-sonnet');
expect(getValueKey('claude-3.7-sonnet-0125')).toBe('claude-3.7-sonnet');
});

it('should return "claude-3-5-sonnet" for model type of "claude-3-5-sonnet-"', () => {
expect(getValueKey('claude-3-5-sonnet-20240620')).toBe('claude-3-5-sonnet');
expect(getValueKey('anthropic/claude-3-5-sonnet')).toBe('claude-3-5-sonnet');
Expand Down Expand Up @@ -458,3 +472,30 @@ describe('Google Model Tests', () => {
});
});
});

describe('Grok Model Tests - Pricing', () => {
describe('getMultiplier', () => {
test('should return correct prompt and completion rates for Grok vision models', () => {
const models = ['grok-2-vision-1212', 'grok-2-vision', 'grok-2-vision-latest'];
models.forEach((model) => {
expect(getMultiplier({ model, tokenType: 'prompt' })).toBe(2.0);
expect(getMultiplier({ model, tokenType: 'completion' })).toBe(10.0);
});
});

test('should return correct prompt and completion rates for Grok text models', () => {
const models = ['grok-2-1212', 'grok-2', 'grok-2-latest'];
models.forEach((model) => {
expect(getMultiplier({ model, tokenType: 'prompt' })).toBe(2.0);
expect(getMultiplier({ model, tokenType: 'completion' })).toBe(10.0);
});
});

test('should return correct prompt and completion rates for Grok beta models', () => {
expect(getMultiplier({ model: 'grok-vision-beta', tokenType: 'prompt' })).toBe(5.0);
expect(getMultiplier({ model: 'grok-vision-beta', tokenType: 'completion' })).toBe(15.0);
expect(getMultiplier({ model: 'grok-beta', tokenType: 'prompt' })).toBe(5.0);
expect(getMultiplier({ model: 'grok-beta', tokenType: 'completion' })).toBe(15.0);
});
});
});
3 changes: 2 additions & 1 deletion api/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
},
"homepage": "https://librechat.ai",
"dependencies": {
"@anthropic-ai/sdk": "^0.32.1",
"@anthropic-ai/sdk": "^0.37.0",
"@azure/search-documents": "^12.0.0",
"@google/generative-ai": "^0.21.0",
"@googleapis/youtube": "^20.0.0",
Expand All @@ -57,6 +57,7 @@
"cors": "^2.8.5",
"dedent": "^1.5.3",
"dotenv": "^16.0.3",
"eventsource": "^3.0.2",
"express": "^4.21.2",
"express-mongo-sanitize": "^2.2.0",
"express-rate-limit": "^7.4.1",
Expand Down
Loading

0 comments on commit 50e8769

Please sign in to comment.