🚀 feat: Claude 3.7 Support + Reasoning (danny-avila#6008)

* fix: missing console color methods for admin scripts * feat: Anthropic Claude 3.7 Sonnet Support * feat: update eventsource to version 3.0.2 and upgrade @modelcontextprotocol/sdk to 1.4.1 * fix: update DynamicInput to handle number type and improve initial value logic * feat: first pass Anthropic Reasoning (Claude 3.7) * feat: implement streaming support in AnthropicClient with reasoning UI handling * feat: add missing xAI (grok) models
naga-ai-hub · Feb 25, 2025 · 50e8769 · 50e8769
1 parent 0e71959
commit 50e8769
Show file tree

Hide file tree

Showing 17 changed files with 421 additions and 77 deletions.
diff --git a/.env.example b/.env.example
@@ -88,7 +88,7 @@ PROXY=
 #============#
 
 ANTHROPIC_API_KEY=user_provided
-# ANTHROPIC_MODELS=claude-3-5-haiku-20241022,claude-3-5-sonnet-20241022,claude-3-5-sonnet-latest,claude-3-5-sonnet-20240620,claude-3-opus-20240229,claude-3-sonnet-20240229,claude-3-haiku-20240307,claude-2.1,claude-2,claude-1.2,claude-1,claude-1-100k,claude-instant-1,claude-instant-1-100k
+# ANTHROPIC_MODELS=claude-3-7-sonnet-latest,claude-3-7-sonnet-20250219,claude-3-5-haiku-20241022,claude-3-5-sonnet-20241022,claude-3-5-sonnet-latest,claude-3-5-sonnet-20240620,claude-3-opus-20240229,claude-3-sonnet-20240229,claude-3-haiku-20240307,claude-2.1,claude-2,claude-1.2,claude-1,claude-1-100k,claude-instant-1,claude-instant-1-100k
 # ANTHROPIC_REVERSE_PROXY=
 
 #============#

diff --git a/api/app/clients/AnthropicClient.js b/api/app/clients/AnthropicClient.js
@@ -7,6 +7,7 @@ const {
   getResponseSender,
   validateVisionModel,
 } = require('librechat-data-provider');
+const { SplitStreamHandler, GraphEvents } = require('@librechat/agents');
 const { encodeAndFormat } = require('~/server/services/Files/images/encode');
 const {
   truncateText,
@@ -19,9 +20,9 @@ const {
 const { getModelMaxTokens, getModelMaxOutputTokens, matchModelName } = require('~/utils');
 const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
 const Tokenizer = require('~/server/services/Tokenizer');
+const { logger, sendEvent } = require('~/config');
 const { sleep } = require('~/server/utils');
 const BaseClient = require('./BaseClient');
-const { logger } = require('~/config');
 
 const HUMAN_PROMPT = '\n\nHuman:';
 const AI_PROMPT = '\n\nAssistant:';
@@ -68,6 +69,8 @@ class AnthropicClient extends BaseClient {
     /** The key for the usage object's output tokens
      * @type {string} */
     this.outputTokensKey = 'output_tokens';
+    /** @type {SplitStreamHandler | undefined} */
+    this.streamHandler;
   }
 
   setOptions(options) {
@@ -125,7 +128,7 @@ class AnthropicClient extends BaseClient {
         this.options.endpointType ?? this.options.endpoint,
         this.options.endpointTokenConfig,
       ) ??
-      1500;
+      anthropicSettings.maxOutputTokens.reset(this.modelOptions.model);
     this.maxPromptTokens =
       this.options.maxPromptTokens || this.maxContextTokens - this.maxResponseTokens;
 
@@ -179,6 +182,14 @@ class AnthropicClient extends BaseClient {
       options.defaultHeaders = {
         'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15,prompt-caching-2024-07-31',
       };
+    } else if (
+      this.supportsCacheControl &&
+      requestOptions?.model &&
+      requestOptions.model.includes('claude-3-7')
+    ) {
+      options.defaultHeaders = {
+        'anthropic-beta': 'output-128k-2025-02-19,prompt-caching-2024-07-31',
+      };
     } else if (this.supportsCacheControl) {
       options.defaultHeaders = {
         'anthropic-beta': 'prompt-caching-2024-07-31',
@@ -668,7 +679,7 @@ class AnthropicClient extends BaseClient {
    * @returns {Promise<Anthropic.default.Message | Anthropic.default.Completion>} The response from the Anthropic client.
    */
   async createResponse(client, options, useMessages) {
-    return useMessages ?? this.useMessages
+    return (useMessages ?? this.useMessages)
       ? await client.messages.create(options)
       : await client.completions.create(options);
   }
@@ -683,6 +694,7 @@ class AnthropicClient extends BaseClient {
       return false;
     }
     if (
+      modelMatch === 'claude-3-7-sonnet' ||
       modelMatch === 'claude-3-5-sonnet' ||
       modelMatch === 'claude-3-5-haiku' ||
       modelMatch === 'claude-3-haiku' ||
@@ -693,6 +705,35 @@ class AnthropicClient extends BaseClient {
     return false;
   }
 
+  getMessageMapMethod() {
+    /**
+     * @param {TMessage} msg
+     */
+    return (msg) => {
+      if (msg.text != null && msg.text && msg.text.startsWith(':::thinking')) {
+        msg.text = msg.text.replace(/:::thinking.*?:::/gs, '').trim();
+      }
+
+      return msg;
+    };
+  }
+
+  /**
+   * @param {string[]} [intermediateReply]
+   * @returns {string}
+   */
+  getStreamText(intermediateReply) {
+    if (!this.streamHandler) {
+      return intermediateReply?.join('') ?? '';
+    }
+
+    const reasoningText = this.streamHandler.reasoningTokens.join('');
+
+    const reasoningBlock = reasoningText.length > 0 ? `:::thinking\n${reasoningText}\n:::\n` : '';
+
+    return `${reasoningBlock}${this.streamHandler.tokens.join('')}`;
+  }
+
   async sendCompletion(payload, { onProgress, abortController }) {
     if (!abortController) {
       abortController = new AbortController();
@@ -710,7 +751,6 @@ class AnthropicClient extends BaseClient {
       user_id: this.user,
     };
 
-    let text = '';
     const {
       stream,
       model,
@@ -733,10 +773,46 @@ class AnthropicClient extends BaseClient {
 
     if (this.useMessages) {
       requestOptions.messages = payload;
-      requestOptions.max_tokens = maxOutputTokens || legacy.maxOutputTokens.default;
+      requestOptions.max_tokens =
+        maxOutputTokens || anthropicSettings.maxOutputTokens.reset(requestOptions.model);
     } else {
       requestOptions.prompt = payload;
-      requestOptions.max_tokens_to_sample = maxOutputTokens || 1500;
+      requestOptions.max_tokens_to_sample = maxOutputTokens || legacy.maxOutputTokens.default;
+    }
+
+    if (
+      this.options.thinking &&
+      requestOptions?.model &&
+      requestOptions.model.includes('claude-3-7')
+    ) {
+      requestOptions.thinking = {
+        type: 'enabled',
+      };
+    }
+    if (requestOptions.thinking != null && this.options.thinkingBudget != null) {
+      requestOptions.thinking = {
+        ...requestOptions.thinking,
+        budget_tokens: this.options.thinkingBudget,
+      };
+    }
+    if (
+      requestOptions.thinking != null &&
+      (requestOptions.max_tokens == null ||
+        requestOptions.thinking.budget_tokens > requestOptions.max_tokens)
+    ) {
+      const maxTokens = anthropicSettings.maxOutputTokens.reset(requestOptions.model);
+      requestOptions.max_tokens = requestOptions.max_tokens ?? maxTokens;
+
+      logger.warn(
+        requestOptions.max_tokens === maxTokens
+          ? '[AnthropicClient] max_tokens is not defined while thinking is enabled. Setting max_tokens to model default.'
+          : `[AnthropicClient] thinking budget_tokens (${requestOptions.thinking.budget_tokens}) exceeds max_tokens (${requestOptions.max_tokens}). Adjusting budget_tokens.`,
+      );
+
+      requestOptions.thinking.budget_tokens = Math.min(
+        requestOptions.thinking.budget_tokens,
+        Math.floor(requestOptions.max_tokens * 0.9),
+      );
     }
 
     if (this.systemMessage && this.supportsCacheControl === true) {
@@ -756,13 +832,17 @@ class AnthropicClient extends BaseClient {
     }
 
     logger.debug('[AnthropicClient]', { ...requestOptions });
+    this.streamHandler = new SplitStreamHandler({
+      accumulate: true,
+      runId: this.responseMessageId,
+      handlers: {
+        [GraphEvents.ON_RUN_STEP]: (event) => sendEvent(this.options.res, event),
+        [GraphEvents.ON_MESSAGE_DELTA]: (event) => sendEvent(this.options.res, event),
+        [GraphEvents.ON_REASONING_DELTA]: (event) => sendEvent(this.options.res, event),
+      },
+    });
 
-    const handleChunk = (currentChunk) => {
-      if (currentChunk) {
-        text += currentChunk;
-        onProgress(currentChunk);
-      }
-    };
+    let intermediateReply = this.streamHandler.tokens;
 
     const maxRetries = 3;
     const streamRate = this.options.streamRate ?? Constants.DEFAULT_STREAM_RATE;
@@ -782,14 +862,41 @@ class AnthropicClient extends BaseClient {
             }
           });
 
+          /** @param {string} chunk */
+          const handleChunk = (chunk) => {
+            this.streamHandler.handle({
+              choices: [
+                {
+                  delta: {
+                    content: chunk,
+                  },
+                },
+              ],
+            });
+          };
+          /** @param {string} chunk */
+          const handleReasoningChunk = (chunk) => {
+            this.streamHandler.handle({
+              choices: [
+                {
+                  delta: {
+                    reasoning_content: chunk,
+                  },
+                },
+              ],
+            });
+          };
+
           for await (const completion of response) {
             // Handle each completion as before
             const type = completion?.type ?? '';
             if (tokenEventTypes.has(type)) {
               logger.debug(`[AnthropicClient] ${type}`, completion);
               this[type] = completion;
             }
-            if (completion?.delta?.text) {
+            if (completion?.delta?.thinking) {
+              handleReasoningChunk(completion.delta.thinking);
+            } else if (completion?.delta?.text) {
               handleChunk(completion.delta.text);
             } else if (completion.completion) {
               handleChunk(completion.completion);
@@ -808,6 +915,10 @@ class AnthropicClient extends BaseClient {
 
           if (attempts < maxRetries) {
             await delayBeforeRetry(attempts, 350);
+          } else if (this.streamHandler && this.streamHandler.reasoningTokens.length) {
+            return this.getStreamText();
+          } else if (intermediateReply.length > 0) {
+            return this.getStreamText(intermediateReply);
           } else {
             throw new Error(`Operation failed after ${maxRetries} attempts: ${error.message}`);
           }
@@ -823,8 +934,7 @@ class AnthropicClient extends BaseClient {
     }
 
     await processResponse.bind(this)();
-
-    return text.trim();
+    return this.getStreamText(intermediateReply);
   }
 
   getSaveOptions() {
@@ -834,6 +944,8 @@ class AnthropicClient extends BaseClient {
       promptPrefix: this.options.promptPrefix,
       modelLabel: this.options.modelLabel,
       promptCache: this.options.promptCache,
+      thinking: this.options.thinking,
+      thinkingBudget: this.options.thinkingBudget,
       resendFiles: this.options.resendFiles,
       iconURL: this.options.iconURL,
       greeting: this.options.greeting,

diff --git a/api/models/schema/defaults.js b/api/models/schema/defaults.js
@@ -70,6 +70,12 @@ const conversationPreset = {
   promptCache: {
     type: Boolean,
   },
+  thinking: {
+    type: Boolean,
+  },
+  thinkingBudget: {
+    type: Number,
+  },
   system: {
     type: String,
   },

diff --git a/api/models/tx.js b/api/models/tx.js
@@ -88,6 +88,8 @@ const tokenValues = Object.assign(
     'claude-3-sonnet': { prompt: 3, completion: 15 },
     'claude-3-5-sonnet': { prompt: 3, completion: 15 },
     'claude-3.5-sonnet': { prompt: 3, completion: 15 },
+    'claude-3-7-sonnet': { prompt: 3, completion: 15 },
+    'claude-3.7-sonnet': { prompt: 3, completion: 15 },
     'claude-3-5-haiku': { prompt: 0.8, completion: 4 },
     'claude-3.5-haiku': { prompt: 0.8, completion: 4 },
     'claude-3-haiku': { prompt: 0.25, completion: 1.25 },
@@ -110,6 +112,14 @@ const tokenValues = Object.assign(
     'gemini-1.5': { prompt: 2.5, completion: 10 },
     'gemini-pro-vision': { prompt: 0.5, completion: 1.5 },
     gemini: { prompt: 0.5, completion: 1.5 },
+    'grok-2-vision-1212': { prompt: 2.0, completion: 10.0 },
+    'grok-2-vision-latest': { prompt: 2.0, completion: 10.0 },
+    'grok-2-vision': { prompt: 2.0, completion: 10.0 },
+    'grok-vision-beta': { prompt: 5.0, completion: 15.0 },
+    'grok-2-1212': { prompt: 2.0, completion: 10.0 },
+    'grok-2-latest': { prompt: 2.0, completion: 10.0 },
+    'grok-2': { prompt: 2.0, completion: 10.0 },
+    'grok-beta': { prompt: 5.0, completion: 15.0 },
   },
   bedrockValues,
 );
@@ -121,6 +131,8 @@ const tokenValues = Object.assign(
  * @type {Object.<string, {write: number, read: number }>}
  */
 const cacheTokenValues = {
+  'claude-3.7-sonnet': { write: 3.75, read: 0.3 },
+  'claude-3-7-sonnet': { write: 3.75, read: 0.3 },
   'claude-3.5-sonnet': { write: 3.75, read: 0.3 },
   'claude-3-5-sonnet': { write: 3.75, read: 0.3 },
   'claude-3.5-haiku': { write: 1, read: 0.08 },

diff --git a/api/models/tx.spec.js b/api/models/tx.spec.js
@@ -80,6 +80,20 @@ describe('getValueKey', () => {
     expect(getValueKey('chatgpt-4o-latest-0718')).toBe('gpt-4o');
   });
 
+  it('should return "claude-3-7-sonnet" for model type of "claude-3-7-sonnet-"', () => {
+    expect(getValueKey('claude-3-7-sonnet-20240620')).toBe('claude-3-7-sonnet');
+    expect(getValueKey('anthropic/claude-3-7-sonnet')).toBe('claude-3-7-sonnet');
+    expect(getValueKey('claude-3-7-sonnet-turbo')).toBe('claude-3-7-sonnet');
+    expect(getValueKey('claude-3-7-sonnet-0125')).toBe('claude-3-7-sonnet');
+  });
+
+  it('should return "claude-3.7-sonnet" for model type of "claude-3.7-sonnet-"', () => {
+    expect(getValueKey('claude-3.7-sonnet-20240620')).toBe('claude-3.7-sonnet');
+    expect(getValueKey('anthropic/claude-3.7-sonnet')).toBe('claude-3.7-sonnet');
+    expect(getValueKey('claude-3.7-sonnet-turbo')).toBe('claude-3.7-sonnet');
+    expect(getValueKey('claude-3.7-sonnet-0125')).toBe('claude-3.7-sonnet');
+  });
+
   it('should return "claude-3-5-sonnet" for model type of "claude-3-5-sonnet-"', () => {
     expect(getValueKey('claude-3-5-sonnet-20240620')).toBe('claude-3-5-sonnet');
     expect(getValueKey('anthropic/claude-3-5-sonnet')).toBe('claude-3-5-sonnet');
@@ -458,3 +472,30 @@ describe('Google Model Tests', () => {
     });
   });
 });
+
+describe('Grok Model Tests - Pricing', () => {
+  describe('getMultiplier', () => {
+    test('should return correct prompt and completion rates for Grok vision models', () => {
+      const models = ['grok-2-vision-1212', 'grok-2-vision', 'grok-2-vision-latest'];
+      models.forEach((model) => {
+        expect(getMultiplier({ model, tokenType: 'prompt' })).toBe(2.0);
+        expect(getMultiplier({ model, tokenType: 'completion' })).toBe(10.0);
+      });
+    });
+
+    test('should return correct prompt and completion rates for Grok text models', () => {
+      const models = ['grok-2-1212', 'grok-2', 'grok-2-latest'];
+      models.forEach((model) => {
+        expect(getMultiplier({ model, tokenType: 'prompt' })).toBe(2.0);
+        expect(getMultiplier({ model, tokenType: 'completion' })).toBe(10.0);
+      });
+    });
+
+    test('should return correct prompt and completion rates for Grok beta models', () => {
+      expect(getMultiplier({ model: 'grok-vision-beta', tokenType: 'prompt' })).toBe(5.0);
+      expect(getMultiplier({ model: 'grok-vision-beta', tokenType: 'completion' })).toBe(15.0);
+      expect(getMultiplier({ model: 'grok-beta', tokenType: 'prompt' })).toBe(5.0);
+      expect(getMultiplier({ model: 'grok-beta', tokenType: 'completion' })).toBe(15.0);
+    });
+  });
+});
diff --git a/api/package.json b/api/package.json
@@ -34,7 +34,7 @@
   },
   "homepage": "https://librechat.ai",
   "dependencies": {
-    "@anthropic-ai/sdk": "^0.32.1",
+    "@anthropic-ai/sdk": "^0.37.0",
     "@azure/search-documents": "^12.0.0",
     "@google/generative-ai": "^0.21.0",
     "@googleapis/youtube": "^20.0.0",
@@ -57,6 +57,7 @@
     "cors": "^2.8.5",
     "dedent": "^1.5.3",
     "dotenv": "^16.0.3",
+    "eventsource": "^3.0.2",
     "express": "^4.21.2",
     "express-mongo-sanitize": "^2.2.0",
     "express-rate-limit": "^7.4.1",