Skip to content

Commit

Permalink
Add custom metric framework and cluster API metrics (razee-io#1341)
Browse files Browse the repository at this point in the history
* initial commit: add custom metric framework and cluster API metrics

* add channel, group, subscription API metrics

* update metric syntax, change counter to gauge, add gauges, add inc and dec checks

* rework API metrics

* add routes metrics

* audit fix

* npm install/update

* remove passOperationName

* bump tests

* typo

* test Travis build

* trigger travis

* npm update

* conflict fix

---------

Co-authored-by: ethanstjohn <[email protected]>
  • Loading branch information
ethanstjohn and ethanstjohn authored Nov 10, 2023
1 parent 921caa5 commit d116ea0
Show file tree
Hide file tree
Showing 10 changed files with 670 additions and 39 deletions.
43 changes: 42 additions & 1 deletion app/apollo/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ const { models, connectDb } = require('./models');
const promClient = require('prom-client');
const createMetricsPlugin = require('apollo-metrics');
const apolloMetricsPlugin = createMetricsPlugin(promClient.register);
const { customMetricsClient } = require('../customMetricsClient'); // Add custom metrics plugin
const apolloMaintenancePlugin = require('./maintenance/maintenanceModePlugin.js');
const { GraphqlPubSub } = require('./subscription');

Expand Down Expand Up @@ -140,7 +141,47 @@ const createApolloServer = (schema) => {
initLogger.info(customPlugins, 'Apollo server custom plugin are loaded.');
const server = new ApolloServer({
introspection: true, // set to true as long as user has valid token
plugins: customPlugins,
plugins: [
customPlugins,
{
// Populate API metrics as they occur
requestDidStart(context) {
// Capture the start time when the request starts
const startTime = Date.now();

// Increment API counter metric
customMetricsClient.apiCallsCount.inc();

let encounteredError = false;
return {
didResolveOperation() {
// Parse API operation name
const match = context.request.query.match(/\{\s*(\w+)/);
const operationName = match ? match[1] : 'Query name not found';
// Record API operation duration metrics
const durationInSeconds = (Date.now() - startTime) / 1000;
console.log('potato');
customMetricsClient.apiCallHistogram(operationName).observe(durationInSeconds);
console.log('potato1');
},
didEncounterErrors() {
encounteredError = true;
},
willSendResponse() {
// Parse API operation name
const match = context.request.query.match(/\{\s*(\w+)/);
const operationName = match ? match[1] : 'Query name not found';
// Record API operation success and failure gauge metrics
if (encounteredError) {
customMetricsClient.apiCallCounter(operationName).inc({ status: 'failure' });
} else {
customMetricsClient.apiCallCounter(operationName).inc({ status: 'success' });
}
}
};
},
}
],
schema,
allowBatchedHttpRequests: (process.env.GRAPHQL_DISABLE_BATCHING ? false : true),
formatError: error => {
Expand Down
57 changes: 57 additions & 0 deletions app/customMetricsClient.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/**
* Copyright 2023 IBM Corp. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

const { Counter, Histogram } = require('prom-client');
// Maintain a map for previously created counters and histograms
const counters = {};
const histograms = {};

const apiCallsCount = new Counter({
name: 'api_calls_total',
help: 'Total number of API calls'
});

const customMetricsClient = {
apiCallsCount: apiCallsCount,

// Count success and failure of each API operation and record as unique metric
apiCallCounter(operationName) {
if (!counters[operationName]) {
counters[operationName] = new Counter({
name: `${operationName}_counter_result_total`,
help: `Total number of ${operationName} operation calls, labeled by success or failure`,
labelNames: ['status']
});
}
return counters[operationName];
},

// Track duration of each API operation and record as unique metric
apiCallHistogram(operationName) {
if (!histograms[operationName]) {
histograms[operationName] = new Histogram({
name: `${operationName}_duration_seconds`,
help: `Duration of ${operationName} operations in seconds`,
buckets: [0.1, 0.5, 1, 2, 5]
});
}
return histograms[operationName];
}
};

module.exports = {
customMetricsClient
};
2 changes: 1 addition & 1 deletion app/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ const apollo = require('./apollo');

const promClient = require('prom-client');
const collectDefaultMetrics = promClient.collectDefaultMetrics;
collectDefaultMetrics({ timeout: 5000 }); //Collect all default metrics
collectDefaultMetrics({ timeout: 5000 }); //Collect all default metrics
const connections = new promClient.Gauge({ name: 'razee_server_connections_count', help: 'Razee server request count' });
const i18next = require('i18next');
const i18nextMiddleware = require('i18next-http-middleware');
Expand Down
27 changes: 27 additions & 0 deletions app/routes/v1/channels.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ const MongoClientClass = require('../../mongo/mongoClient.js');
const MongoClient = new MongoClientClass(mongoConf);
const getOrg = require('../../utils/orgs.js').getOrg;
const { getDecryptedContent } = require('../../apollo/utils/versionUtils');
const { customMetricsClient } = require('../../customMetricsClient'); // Add custom metrics plugin

router.use(asyncHandler(async (req, res, next) => {
req.db = await MongoClient.getClient();
Expand All @@ -33,6 +34,11 @@ router.use(asyncHandler(async (req, res, next) => {
// --url http://localhost:3333/api/v1/channels/:channelName/:versionId \
// --header 'razee-org-key: orgApiKey-api-key-goes-here' \
const getChannelVersion = async (req, res) => {
// Capture the start time when the request starts
const startTime = Date.now();
// Increment API counter metric
customMetricsClient.apiCallsCount.inc();

var orgId = req.org._id;
var channelName = req.params.channelName + '';
var versionId = req.params.versionId + '';
Expand Down Expand Up @@ -60,22 +66,43 @@ const getChannelVersion = async (req, res) => {
org = await Orgs.findOne({ _id: orgId });
deployable = await Channels.findOne({ org_id: orgId, name: channelName });
} else {
// Observe the duration for the histogram
const durationInSeconds = (Date.now() - startTime) / 1000;
customMetricsClient.apiCallHistogram('getChannelVersion').observe(durationInSeconds);
customMetricsClient.apiCallCounter('getChannelVersion').inc({ status: 'failure' });

res.status(404).send({ status: 'error', message: `channel "${channelName}" not found for this org` });
return;
}
}

var deployableVersion = await DeployableVersions.findOne({ org_id: orgId, channel_id: deployable.uuid, uuid: versionId });
if (!deployableVersion) {
// Observe the duration for the histogram
const durationInSeconds = (Date.now() - startTime) / 1000;
customMetricsClient.apiCallHistogram('getChannelVersion').observe(durationInSeconds);
customMetricsClient.apiCallCounter('getChannelVersion').inc({ status: 'failure' });

res.status(404).send({ status: 'error', message: `versionId "${versionId}" not found` });
return;
}

try {
const data = await getDecryptedContent({ logger: req.log, req_id: req.id, me: null }, org, deployableVersion);
res.set('Content-Type', deployableVersion.type);

// Observe the duration for the histogram
const durationInSeconds = (Date.now() - startTime) / 1000;
customMetricsClient.apiCallHistogram('getChannelVersion').observe(durationInSeconds);
customMetricsClient.apiCallCounter('getChannelVersion').inc({ status: 'success' });

res.status(200).send(data.content);
} catch (error) {
// Observe the duration for the histogram
const durationInSeconds = (Date.now() - startTime) / 1000;
customMetricsClient.apiCallHistogram('getChannelVersion').observe(durationInSeconds);
customMetricsClient.apiCallCounter('getChannelVersion').inc({ status: 'failure' });

req.log.error(error);
return res.status(500).json({ status: 'error', message: error.message });
}
Expand Down
21 changes: 21 additions & 0 deletions app/routes/v1/systemSubscriptions.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,17 @@ const { getOrg, bestOrgKey } = require('../../utils/orgs');
const axios = require('axios');
const yaml = require('js-yaml');
const { getRddArgs } = require('../../utils/rdd');
const { customMetricsClient } = require('../../customMetricsClient'); // Add custom metrics plugin

/*
Serves a System Subscription that regenerates the `razee-identity` secret with the 'best' OrgKey value.
*/
const getPrimaryOrgKeySubscription = async(req, res) => {
// Capture the start time when the request starts
const startTime = Date.now();
// Increment API counter metric
customMetricsClient.apiCallsCount.inc();

const razeeIdentitySecretYaml = `apiVersion: v1
kind: Secret
metadata:
Expand All @@ -39,13 +45,23 @@ data:
type: Opaque
`;

// Observe the duration for the histogram
const durationInSeconds = (Date.now() - startTime) / 1000;
customMetricsClient.apiCallHistogram('getPrimaryOrgKeySubscription').observe(durationInSeconds);
customMetricsClient.apiCallCounter('getPrimaryOrgKeySubscription').inc({ status: 'success' });

res.status( 200 ).send( razeeIdentitySecretYaml );
};

/*
Serves a System Subscription that returns a CronJob that updates the operators: Cluster Subscription, Remote Resource and Watch-Keeper
*/
const getOperatorsSubscription = async(req, res) => {
// Capture the start time when the request starts
const startTime = Date.now();
// Increment API counter metric
customMetricsClient.apiCallsCount.inc();

// Get the image and command for the update cronjob from the current values returned from the razeedeploy-job api
const protocol = req.protocol || 'http';
let host = req.header('host') || 'localhost:3333';
Expand Down Expand Up @@ -110,6 +126,11 @@ metadata:
namespace: razeedeploy
`;

// Observe the duration for the histogram
const durationInSeconds = (Date.now() - startTime) / 1000;
customMetricsClient.apiCallHistogram('getOperatorsSubscription').observe(durationInSeconds);
customMetricsClient.apiCallCounter('getOperatorsSubscription').inc({ status: 'success' });

res.status( 200 ).send( razeeupdateYaml );
};

Expand Down
Loading

0 comments on commit d116ea0

Please sign in to comment.