diff --git a/.github/workflows/accuracy-tests.yml b/.github/workflows/accuracy-tests.yml new file mode 100644 index 00000000..75dac32c --- /dev/null +++ b/.github/workflows/accuracy-tests.yml @@ -0,0 +1,48 @@ +name: Accuracy Tests + +on: + workflow_dispatch: + pull_request: + types: [labeled] + +jobs: + run-accuracy-tests: + name: Run Accuracy Tests + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + if: | + github.event_name == 'workflow_dispatch' || + (github.event_name == 'pull_request' && github.event.label.name == 'accuracy-tests') + env: + MDB_OPEN_AI_API_KEY: ${{ secrets.ACCURACY_OPEN_AI_API_KEY }} + MDB_GEMINI_API_KEY: ${{ secrets.MDB_GEMINI_API_KEY }} + MDB_AZURE_OPEN_AI_API_KEY: ${{ secrets.MDB_AZURE_OPEN_AI_API_KEY }} + MDB_AZURE_OPEN_AI_API_URL: ${{ secrets.MDB_AZURE_OPEN_AI_API_URL }} + MDB_ACCURACY_MDB_URL: ${{ secrets.ACCURACY_MDB_CONNECTION_STRING }} + MDB_ACCURACY_MDB_DB: ${{ vars.ACCURACY_MDB_DB }} + MDB_ACCURACY_MDB_COLLECTION: ${{ vars.ACCURACY_MDB_COLLECTION }} + MDB_ACCURACY_BASELINE_COMMIT: ${{ github.event.pull_request.base.sha || '' }} + steps: + - uses: GitHubSecurityLab/actions-permissions/monitor@v1 + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version-file: package.json + cache: "npm" + - name: Install dependencies + run: npm ci + - name: Run accuracy tests + run: ./scripts/run-accuracy-tests.sh + - name: Upload accuracy test summary + if: always() + uses: actions/upload-artifact@v4 + with: + name: accuracy-test-summary + path: .accuracy/tests-summary.html + - name: Comment summary on PR + if: github.event_name == 'pull_request' && github.event.label.name == 'accuracy-tests' + uses: marocchino/sticky-pull-request-comment@v2 + with: + path: .accuracy/tests-summary.html diff --git a/.gitignore b/.gitignore index 4e3f7a54..49550e27 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,5 @@ state.json tests/tmp coverage +# Generated assets by accuracy runs +.accuracy diff --git a/package-lock.json b/package-lock.json index 29132ba3..a3bf47c4 100644 --- a/package-lock.json +++ b/package-lock.json @@ -30,7 +30,11 @@ "mongodb-mcp-server": "dist/index.js" }, "devDependencies": { + "@ai-sdk/anthropic": "^1.2.12", + "@ai-sdk/azure": "^1.3.23", + "@ai-sdk/openai": "^1.3.22", "@eslint/js": "^9.30.1", + "@himanshusinghs/google": "^1.2.11", "@jest/globals": "^30.0.4", "@modelcontextprotocol/inspector": "^0.16.0", "@redocly/cli": "^1.34.4", @@ -38,6 +42,7 @@ "@types/node": "^24.0.12", "@types/simple-oauth2": "^5.0.7", "@types/yargs-parser": "^21.0.3", + "ai": "^4.3.16", "eslint": "^9.30.1", "eslint-config-prettier": "^10.1.5", "eslint-plugin-jest": "^29.0.1", @@ -46,20 +51,153 @@ "jest": "^30.0.4", "jest-environment-node": "^30.0.4", "jest-extended": "^6.0.0", + "microdiff": "^1.5.0", "mongodb-runner": "^5.9.2", + "ollama-ai-provider": "^1.2.0", "openapi-types": "^12.1.3", "openapi-typescript": "^7.8.0", "prettier": "^3.6.2", + "simple-git": "^3.28.0", "ts-jest": "^29.4.0", "tsx": "^4.20.3", "typescript": "^5.8.3", "typescript-eslint": "^8.36.0", + "uuid": "^11.1.0", "yaml": "^2.8.0" }, "engines": { "node": ">=20.10.0" } }, + "@himanshusinghs/ai-sdk-google": { + "extraneous": true + }, + "node_modules/@ai-sdk/anthropic": { + "version": "1.2.12", + "resolved": "https://registry.npmjs.org/@ai-sdk/anthropic/-/anthropic-1.2.12.tgz", + "integrity": "sha512-YSzjlko7JvuiyQFmI9RN1tNZdEiZxc+6xld/0tq/VkJaHpEzGAb1yiNxxvmYVcjvfu/PcvCxAAYXmTYQQ63IHQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@ai-sdk/provider": "1.1.3", + "@ai-sdk/provider-utils": "2.2.8" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "zod": "^3.0.0" + } + }, + "node_modules/@ai-sdk/azure": { + "version": "1.3.23", + "resolved": "https://registry.npmjs.org/@ai-sdk/azure/-/azure-1.3.23.tgz", + "integrity": "sha512-vpsaPtU24RBVk/IMM5UylR/N4RtAuL2NZLWc7LJ3tvMTHu6pI46a7w+1qIwR3F6yO9ehWR8qvfLaBefJNFxaVw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@ai-sdk/openai": "1.3.22", + "@ai-sdk/provider": "1.1.3", + "@ai-sdk/provider-utils": "2.2.8" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "zod": "^3.0.0" + } + }, + "node_modules/@ai-sdk/openai": { + "version": "1.3.22", + "resolved": "https://registry.npmjs.org/@ai-sdk/openai/-/openai-1.3.22.tgz", + "integrity": "sha512-QwA+2EkG0QyjVR+7h6FE7iOu2ivNqAVMm9UJZkVxxTk5OIq5fFJDTEI/zICEMuHImTTXR2JjsL6EirJ28Jc4cw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@ai-sdk/provider": "1.1.3", + "@ai-sdk/provider-utils": "2.2.8" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "zod": "^3.0.0" + } + }, + "node_modules/@ai-sdk/provider": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/@ai-sdk/provider/-/provider-1.1.3.tgz", + "integrity": "sha512-qZMxYJ0qqX/RfnuIaab+zp8UAeJn/ygXXAffR5I4N0n1IrvA6qBsjc8hXLmBiMV2zoXlifkacF7sEFnYnjBcqg==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "json-schema": "^0.4.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@ai-sdk/provider-utils": { + "version": "2.2.8", + "resolved": "https://registry.npmjs.org/@ai-sdk/provider-utils/-/provider-utils-2.2.8.tgz", + "integrity": "sha512-fqhG+4sCVv8x7nFzYnFo19ryhAa3w096Kmc3hWxMQfW/TubPOmt3A6tYZhl4mUfQWWQMsuSkLrtjlWuXBVSGQA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@ai-sdk/provider": "1.1.3", + "nanoid": "^3.3.8", + "secure-json-parse": "^2.7.0" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "zod": "^3.23.8" + } + }, + "node_modules/@ai-sdk/react": { + "version": "1.2.12", + "resolved": "https://registry.npmjs.org/@ai-sdk/react/-/react-1.2.12.tgz", + "integrity": "sha512-jK1IZZ22evPZoQW3vlkZ7wvjYGYF+tRBKXtrcolduIkQ/m/sOAVcVeVDUDvh1T91xCnWCdUGCPZg2avZ90mv3g==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@ai-sdk/provider-utils": "2.2.8", + "@ai-sdk/ui-utils": "1.2.11", + "swr": "^2.2.5", + "throttleit": "2.1.0" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "react": "^18 || ^19 || ^19.0.0-rc", + "zod": "^3.23.8" + }, + "peerDependenciesMeta": { + "zod": { + "optional": true + } + } + }, + "node_modules/@ai-sdk/ui-utils": { + "version": "1.2.11", + "resolved": "https://registry.npmjs.org/@ai-sdk/ui-utils/-/ui-utils-1.2.11.tgz", + "integrity": "sha512-3zcwCc8ezzFlwp3ZD15wAPjf2Au4s3vAbKsXQVyhxODHcmu0iyPO2Eua6D/vicq/AUm/BAo60r97O6HU+EI0+w==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@ai-sdk/provider": "1.1.3", + "@ai-sdk/provider-utils": "2.2.8", + "zod-to-json-schema": "^3.24.1" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "zod": "^3.23.8" + } + }, "node_modules/@ampproject/remapping": { "version": "2.3.0", "resolved": "https://registry.npmjs.org/@ampproject/remapping/-/remapping-2.3.0.tgz", @@ -2090,6 +2228,54 @@ "@hapi/hoek": "^11.0.2" } }, + "node_modules/@himanshusinghs/google": { + "version": "1.2.11", + "resolved": "https://registry.npmjs.org/@himanshusinghs/google/-/google-1.2.11.tgz", + "integrity": "sha512-SKTFxwN9PpUHVrppFod8sF1jqys5azzsgcBVrSbc7VaazmVEnBxHQlv5/yfeZFjD3ly5Mw+AJdFfC0bxwdWBNg==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@ai-sdk/provider": "1.1.2", + "@ai-sdk/provider-utils": "2.2.6" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "zod": "^3.0.0" + } + }, + "node_modules/@himanshusinghs/google/node_modules/@ai-sdk/provider": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@ai-sdk/provider/-/provider-1.1.2.tgz", + "integrity": "sha512-ITdgNilJZwLKR7X5TnUr1BsQW6UTX5yFp0h66Nfx8XjBYkWD9W3yugr50GOz3CnE9m/U/Cd5OyEbTMI0rgi6ZQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "json-schema": "^0.4.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@himanshusinghs/google/node_modules/@ai-sdk/provider-utils": { + "version": "2.2.6", + "resolved": "https://registry.npmjs.org/@ai-sdk/provider-utils/-/provider-utils-2.2.6.tgz", + "integrity": "sha512-sUlZ7Gnq84DCGWMQRIK8XVbkzIBnvPR1diV4v6JwPgpn5armnLI/j+rqn62MpLrU5ZCQZlDKl/Lw6ed3ulYqaA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@ai-sdk/provider": "1.1.2", + "nanoid": "^3.3.8", + "secure-json-parse": "^2.7.0" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "zod": "^3.23.8" + } + }, "node_modules/@humanfs/core": { "version": "0.19.1", "resolved": "https://registry.npmjs.org/@humanfs/core/-/core-0.19.1.tgz", @@ -2929,6 +3115,23 @@ "jsep": "^0.4.0||^1.0.0" } }, + "node_modules/@kwsites/file-exists": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@kwsites/file-exists/-/file-exists-1.1.1.tgz", + "integrity": "sha512-m9/5YGR18lIwxSFDwfE3oA7bWuq9kdau6ugN4H2rJeyhFQZcG9AgSHkQtSD15a8WvTgfz9aikZMrKPHvbpqFiw==", + "dev": true, + "license": "MIT", + "dependencies": { + "debug": "^4.1.1" + } + }, + "node_modules/@kwsites/promise-deferred": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@kwsites/promise-deferred/-/promise-deferred-1.1.1.tgz", + "integrity": "sha512-GaHYm+c0O9MjZRu0ongGBRbinu8gVAMd2UZjji6jVmqKtZluZnptXGWhz1E8j8D2HJ3f/yMxKAUC0b+57wncIw==", + "dev": true, + "license": "MIT" + }, "node_modules/@modelcontextprotocol/inspector": { "version": "0.16.0", "resolved": "https://registry.npmjs.org/@modelcontextprotocol/inspector/-/inspector-0.16.0.tgz", @@ -5424,6 +5627,19 @@ "node": ">=18.0.0" } }, + "node_modules/@smithy/middleware-retry/node_modules/uuid": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", + "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], + "license": "MIT", + "bin": { + "uuid": "dist/bin/uuid" + } + }, "node_modules/@smithy/middleware-serde": { "version": "4.0.3", "resolved": "https://registry.npmjs.org/@smithy/middleware-serde/-/middleware-serde-4.0.3.tgz", @@ -5906,6 +6122,13 @@ "@babel/types": "^7.20.7" } }, + "node_modules/@types/diff-match-patch": { + "version": "1.0.36", + "resolved": "https://registry.npmjs.org/@types/diff-match-patch/-/diff-match-patch-1.0.36.tgz", + "integrity": "sha512-xFdR6tkm0MWvBfO8xXCSsinYxHcqkQUlcHeSpMC2ukzOb6lwQAfDmW+Qt0AvlGd8HpsS28qKsB+oPeJn9I39jg==", + "dev": true, + "license": "MIT" + }, "node_modules/@types/estree": { "version": "1.0.7", "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.7.tgz", @@ -6660,6 +6883,33 @@ "node": ">= 14" } }, + "node_modules/ai": { + "version": "4.3.16", + "resolved": "https://registry.npmjs.org/ai/-/ai-4.3.16.tgz", + "integrity": "sha512-KUDwlThJ5tr2Vw0A1ZkbDKNME3wzWhuVfAOwIvFUzl1TPVDFAXDFTXio3p+jaKneB+dKNCvFFlolYmmgHttG1g==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@ai-sdk/provider": "1.1.3", + "@ai-sdk/provider-utils": "2.2.8", + "@ai-sdk/react": "1.2.12", + "@ai-sdk/ui-utils": "1.2.11", + "@opentelemetry/api": "1.9.0", + "jsondiffpatch": "0.6.0" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "react": "^18 || ^19 || ^19.0.0-rc", + "zod": "^3.23.8" + }, + "peerDependenciesMeta": { + "react": { + "optional": true + } + } + }, "node_modules/ajv": { "version": "6.12.6", "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", @@ -8376,6 +8626,16 @@ "node": ">= 0.8" } }, + "node_modules/dequal": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/dequal/-/dequal-2.0.3.tgz", + "integrity": "sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6" + } + }, "node_modules/destroy": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/destroy/-/destroy-1.2.0.tgz", @@ -8423,6 +8683,13 @@ "node": ">=0.3.1" } }, + "node_modules/diff-match-patch": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/diff-match-patch/-/diff-match-patch-1.0.5.tgz", + "integrity": "sha512-IayShXAgj/QMXgB0IWmKx+rOPuGMhqm5w6jvFxmVenXKIzRqTAAsbBPT3kWQeGANj3jGgvcvv4yK6SxqYmikgw==", + "dev": true, + "license": "Apache-2.0" + }, "node_modules/diff-sequences": { "version": "29.6.3", "resolved": "https://registry.npmjs.org/diff-sequences/-/diff-sequences-29.6.3.tgz", @@ -11803,6 +12070,13 @@ "foreach": "^2.0.4" } }, + "node_modules/json-schema": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/json-schema/-/json-schema-0.4.0.tgz", + "integrity": "sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA==", + "dev": true, + "license": "(AFL-2.1 OR BSD-3-Clause)" + }, "node_modules/json-schema-traverse": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", @@ -11830,6 +12104,37 @@ "node": ">=6" } }, + "node_modules/jsondiffpatch": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/jsondiffpatch/-/jsondiffpatch-0.6.0.tgz", + "integrity": "sha512-3QItJOXp2AP1uv7waBkao5nCvhEv+QmJAd38Ybq7wNI74Q+BBmnLn4EDKz6yI9xGAIQoUF87qHt+kc1IVxB4zQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/diff-match-patch": "^1.0.36", + "chalk": "^5.3.0", + "diff-match-patch": "^1.0.5" + }, + "bin": { + "jsondiffpatch": "bin/jsondiffpatch.js" + }, + "engines": { + "node": "^18.0.0 || >=20.0.0" + } + }, + "node_modules/jsondiffpatch/node_modules/chalk": { + "version": "5.4.1", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-5.4.1.tgz", + "integrity": "sha512-zgVZuo2WcZgfUEmsn6eO3kINexW8RAE4maiQ8QNs8CtpPCSyMiYsULR3HQYkm3w8FIA3SberyMJMSldGsW+U3w==", + "dev": true, + "license": "MIT", + "engines": { + "node": "^12.17.0 || ^14.13 || >=16.0.0" + }, + "funding": { + "url": "https://github.com/chalk/chalk?sponsor=1" + } + }, "node_modules/jsonpath-plus": { "version": "10.3.0", "resolved": "https://registry.npmjs.org/jsonpath-plus/-/jsonpath-plus-10.3.0.tgz", @@ -12132,6 +12437,13 @@ "node": ">= 0.6" } }, + "node_modules/microdiff": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/microdiff/-/microdiff-1.5.0.tgz", + "integrity": "sha512-Drq+/THMvDdzRYrK0oxJmOKiC24ayUV8ahrt8l3oRK51PWt6gdtrIGrlIH3pT/lFh1z93FbAcidtsHcWbnRz8Q==", + "dev": true, + "license": "MIT" + }, "node_modules/micromatch": { "version": "4.0.8", "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.8.tgz", @@ -13011,6 +13323,29 @@ "node": "^10.13.0 || >=12.0.0" } }, + "node_modules/ollama-ai-provider": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/ollama-ai-provider/-/ollama-ai-provider-1.2.0.tgz", + "integrity": "sha512-jTNFruwe3O/ruJeppI/quoOUxG7NA6blG3ZyQj3lei4+NnJo7bi3eIRWqlVpRlu/mbzbFXeJSBuYQWF6pzGKww==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@ai-sdk/provider": "^1.0.0", + "@ai-sdk/provider-utils": "^2.0.0", + "partial-json": "0.1.7" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "zod": "^3.0.0" + }, + "peerDependenciesMeta": { + "zod": { + "optional": true + } + } + }, "node_modules/on-finished": { "version": "2.4.1", "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.4.1.tgz", @@ -13388,6 +13723,13 @@ "node": ">= 0.8" } }, + "node_modules/partial-json": { + "version": "0.1.7", + "resolved": "https://registry.npmjs.org/partial-json/-/partial-json-0.1.7.tgz", + "integrity": "sha512-Njv/59hHaokb/hRUjce3Hdv12wd60MtM9Z5Olmn+nehe0QDAsRtRbJPvJ0Z91TusF0SuZRIvnM+S4l6EIP8leA==", + "dev": true, + "license": "MIT" + }, "node_modules/path-browserify": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/path-browserify/-/path-browserify-1.0.1.tgz", @@ -14402,6 +14744,13 @@ "loose-envify": "^1.1.0" } }, + "node_modules/secure-json-parse": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/secure-json-parse/-/secure-json-parse-2.7.0.tgz", + "integrity": "sha512-6aU+Rwsezw7VR8/nyvKTx8QpWH9FrcYiXXlqC4z5d5XQBDRqtbfsRjnwGyqbi3gddNtWHuEk9OANUotL26qKUw==", + "dev": true, + "license": "BSD-3-Clause" + }, "node_modules/seek-bzip": { "version": "1.0.6", "resolved": "https://registry.npmjs.org/seek-bzip/-/seek-bzip-1.0.6.tgz", @@ -14830,6 +15179,22 @@ "simple-concat": "^1.0.0" } }, + "node_modules/simple-git": { + "version": "3.28.0", + "resolved": "https://registry.npmjs.org/simple-git/-/simple-git-3.28.0.tgz", + "integrity": "sha512-Rs/vQRwsn1ILH1oBUy8NucJlXmnnLeLCfcvbSehkPzbv3wwoFWIdtfd6Ndo6ZPhlPsCZ60CPI4rxurnwAa+a2w==", + "dev": true, + "license": "MIT", + "dependencies": { + "@kwsites/file-exists": "^1.1.1", + "@kwsites/promise-deferred": "^1.1.1", + "debug": "^4.4.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/steveukx/git-js?sponsor=1" + } + }, "node_modules/simple-oauth2": { "version": "5.1.0", "resolved": "https://registry.npmjs.org/simple-oauth2/-/simple-oauth2-5.1.0.tgz", @@ -15351,6 +15716,20 @@ "node": ">= 6" } }, + "node_modules/swr": { + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/swr/-/swr-2.3.3.tgz", + "integrity": "sha512-dshNvs3ExOqtZ6kJBaAsabhPdHyeY4P2cKwRCniDVifBMoG/SVI7tfLWqPXriVspf2Rg4tPzXJTnwaihIeFw2A==", + "dev": true, + "license": "MIT", + "dependencies": { + "dequal": "^2.0.3", + "use-sync-external-store": "^1.4.0" + }, + "peerDependencies": { + "react": "^16.11.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" + } + }, "node_modules/synckit": { "version": "0.11.8", "resolved": "https://registry.npmjs.org/synckit/-/synckit-0.11.8.tgz", @@ -15571,6 +15950,19 @@ "node": "*" } }, + "node_modules/throttleit": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/throttleit/-/throttleit-2.1.0.tgz", + "integrity": "sha512-nt6AMGKW1p/70DF/hGBdJB57B8Tspmbp5gfJ8ilhLnt7kkr2ye7hzD6NVG8GGErk2HWF34igrL2CXmNIkzKqKw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/through": { "version": "2.3.8", "resolved": "https://registry.npmjs.org/through/-/through-2.3.8.tgz", @@ -16163,16 +16555,17 @@ } }, "node_modules/uuid": { - "version": "9.0.1", - "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", - "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", + "version": "11.1.0", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-11.1.0.tgz", + "integrity": "sha512-0/A9rDy9P7cJ+8w1c9WD9V//9Wj15Ce2MPz8Ri6032usz+NfePxx5AcN3bN+r6ZL6jEo066/yNYB3tn4pQEx+A==", + "dev": true, "funding": [ "https://github.com/sponsors/broofa", "https://github.com/sponsors/ctavan" ], "license": "MIT", "bin": { - "uuid": "dist/bin/uuid" + "uuid": "dist/esm/bin/uuid" } }, "node_modules/v8-compile-cache-lib": { diff --git a/package.json b/package.json index 53d6d2c6..53639aec 100644 --- a/package.json +++ b/package.json @@ -29,11 +29,17 @@ "check:types": "tsc --noEmit --project tsconfig.json", "reformat": "prettier --write .", "generate": "./scripts/generate.sh", - "test": "node --experimental-vm-modules node_modules/jest/bin/jest.js --coverage" + "test": "node --experimental-vm-modules node_modules/jest/bin/jest.js --coverage --testPathIgnorePatterns=/tests/accuracy/", + "pre:test:accuracy": "npm run build:compile", + "test:accuracy": "sh ./scripts/run-accuracy-tests.sh" }, "license": "Apache-2.0", "devDependencies": { + "@ai-sdk/anthropic": "^1.2.12", + "@ai-sdk/azure": "^1.3.23", + "@ai-sdk/openai": "^1.3.22", "@eslint/js": "^9.30.1", + "@himanshusinghs/google": "^1.2.11", "@jest/globals": "^30.0.4", "@modelcontextprotocol/inspector": "^0.16.0", "@redocly/cli": "^1.34.4", @@ -41,6 +47,7 @@ "@types/node": "^24.0.12", "@types/simple-oauth2": "^5.0.7", "@types/yargs-parser": "^21.0.3", + "ai": "^4.3.16", "eslint": "^9.30.1", "eslint-config-prettier": "^10.1.5", "eslint-plugin-jest": "^29.0.1", @@ -49,14 +56,18 @@ "jest": "^30.0.4", "jest-environment-node": "^30.0.4", "jest-extended": "^6.0.0", + "microdiff": "^1.5.0", "mongodb-runner": "^5.9.2", + "ollama-ai-provider": "^1.2.0", "openapi-types": "^12.1.3", "openapi-typescript": "^7.8.0", "prettier": "^3.6.2", + "simple-git": "^3.28.0", "ts-jest": "^29.4.0", "tsx": "^4.20.3", "typescript": "^5.8.3", "typescript-eslint": "^8.36.0", + "uuid": "^11.1.0", "yaml": "^2.8.0" }, "dependencies": { diff --git a/resources/test-summary-template.html b/resources/test-summary-template.html new file mode 100644 index 00000000..903457f8 --- /dev/null +++ b/resources/test-summary-template.html @@ -0,0 +1,407 @@ + + +
+ + +Prompt | +Model | +Expected Tool Calls | +LLM Tool Calls | +Accuracy | +Baseline Accuracy | +LLM Response Time (ms) | +Total Tokens Used | +
---|
{
+ readonly modelName: string;
+ readonly provider: string;
+ readonly displayName: string;
+ isAvailable(): boolean;
+ getModel(): P;
+}
+
+export class OpenAIModel implements Model {
+ readonly provider = "OpenAI";
+ readonly displayName: string;
+
+ constructor(readonly modelName: string) {
+ this.displayName = `${this.provider} - ${modelName}`;
+ }
+
+ isAvailable(): boolean {
+ return !!process.env.MDB_OPEN_AI_API_KEY;
+ }
+
+ getModel() {
+ return createOpenAI({
+ apiKey: process.env.MDB_OPEN_AI_API_KEY,
+ })(this.modelName);
+ }
+}
+
+export class AzureOpenAIModel implements Model {
+ readonly provider = "Azure";
+ readonly displayName: string;
+
+ constructor(readonly modelName: string) {
+ this.displayName = `${this.provider} - ${modelName}`;
+ }
+
+ isAvailable(): boolean {
+ return !!process.env.MDB_AZURE_OPEN_AI_API_KEY && !!process.env.MDB_AZURE_OPEN_AI_API_URL;
+ }
+
+ getModel() {
+ return createAzure({
+ baseURL: process.env.MDB_AZURE_OPEN_AI_API_URL,
+ apiKey: process.env.MDB_AZURE_OPEN_AI_API_KEY,
+ apiVersion: "2024-12-01-preview",
+ })(this.modelName);
+ }
+}
+
+export class GeminiModel implements Model {
+ readonly provider = "Google";
+ readonly displayName: string;
+
+ constructor(readonly modelName: string) {
+ this.displayName = `${this.provider} - ${modelName}`;
+ }
+
+ isAvailable(): boolean {
+ return !!process.env.MDB_GEMINI_API_KEY;
+ }
+
+ getModel() {
+ return createGoogleGenerativeAI({
+ apiKey: process.env.MDB_GEMINI_API_KEY,
+ })(this.modelName);
+ }
+}
+
+export class OllamaModel implements Model {
+ readonly provider = "Ollama";
+ readonly displayName: string;
+
+ constructor(readonly modelName: string) {
+ this.displayName = `${this.provider} - ${modelName}`;
+ }
+
+ isAvailable(): boolean {
+ return true;
+ }
+
+ getModel() {
+ return ollama(this.modelName);
+ }
+}
+
+const ALL_TESTABLE_MODELS = [new AzureOpenAIModel("gpt-4o")];
+
+export type TestableModels = ReturnType