diff --git a/src/current/_data/v25.2/metrics/metrics.yml b/src/current/_data/v25.2/metrics/metrics-cloud.yml similarity index 100% rename from src/current/_data/v25.2/metrics/metrics.yml rename to src/current/_data/v25.2/metrics/metrics-cloud.yml diff --git a/src/current/_data/v25.3/cluster-settings.csv b/src/current/_data/v25.3/cluster-settings.csv new file mode 100644 index 00000000000..270126400c2 --- /dev/null +++ b/src/current/_data/v25.3/cluster-settings.csv @@ -0,0 +1,460 @@ +setting,type,default,description,class +admission.disk_bandwidth_tokens.elastic.enabled,boolean,true,"when true, and provisioned bandwidth for the disk corresponding to a store is configured, tokens for elastic work will be limited if disk bandwidth becomes a bottleneck",system-only +admission.epoch_lifo.enabled,boolean,false,"when true, epoch-LIFO behavior is enabled when there is significant delay in admission",application +admission.epoch_lifo.epoch_closing_delta_duration,duration,5ms,"the delta duration before closing an epoch, for epoch-LIFO admission control ordering",application +admission.epoch_lifo.epoch_duration,duration,100ms,"the duration of an epoch, for epoch-LIFO admission control ordering",application +admission.epoch_lifo.queue_delay_threshold_to_switch_to_lifo,duration,105ms,"the queue delay encountered by a (tenant,priority) for switching to epoch-LIFO ordering",application +admission.kv.enabled,boolean,true,"when true, work performed by the KV layer is subject to admission control",system-only +admission.sql_kv_response.enabled,boolean,true,"when true, work performed by the SQL layer when receiving a KV response is subject to admission control",application +admission.sql_sql_response.enabled,boolean,true,"when true, work performed by the SQL layer when receiving a DistSQL response is subject to admission control",application +bulkio.backup.deprecated_full_backup_with_subdir.enabled,boolean,false,"when true, a backup command with a user specified subdirectory will create a full backup at the subdirectory if no backup already exists at that subdirectory",application +bulkio.backup.file_size,byte size,128 MiB,target size for individual data files produced during BACKUP,application +bulkio.backup.read_timeout,duration,5m0s,"amount of time after which a read attempt is considered timed out, which causes the backup to fail",application +bulkio.backup.read_with_priority_after,duration,1m0s,amount of time since the read-as-of time above which a BACKUP should use priority when retrying reads,application +physical_replication.consumer.minimum_flush_interval (alias: bulkio.stream_ingestion.minimum_flush_interval),duration,5s,the minimum timestamp between flushes; flushes may still occur if internal buffers fill up,system-only +changefeed.aggregator.flush_jitter,float,0.1,jitter aggregator flushes as a fraction of min_checkpoint_frequency. This setting has no effect if min_checkpoint_frequency is set to 0.,application +changefeed.backfill.concurrent_scan_requests,integer,0,number of concurrent scan requests per node issued during a backfill,application +changefeed.backfill.scan_request_size,integer,524288,the maximum number of bytes returned by each scan request,application +changefeed.batch_reduction_retry.enabled (alias: changefeed.batch_reduction_retry_enabled),boolean,false,"if true, kafka changefeeds upon erroring on an oversized batch will attempt to resend the messages with progressively lower batch sizes",application +changefeed.default_range_distribution_strategy,enumeration,default,"configures how work is distributed among nodes for a given changefeed. for the most balanced distribution, use `balanced_simple`. changing this setting will not override locality restrictions [default = 0, balanced_simple = 1]",application +changefeed.event_consumer_worker_queue_size,integer,16,"if changefeed.event_consumer_workers is enabled, this setting sets the maxmimum number of events which a worker can buffer",application +changefeed.event_consumer_workers,integer,0,"the number of workers to use when processing events: <0 disables, 0 assigns a reasonable default, >0 assigns the setting value. for experimental/core changefeeds and changefeeds using parquet format, this is disabled",application +changefeed.fast_gzip.enabled,boolean,true,use fast gzip implementation,application +changefeed.span_checkpoint.lag_threshold (alias: changefeed.frontier_highwater_lag_checkpoint_threshold),duration,10m0s,"the amount of time a changefeed's lagging (slowest) spans must lag behind its leading (fastest) spans before a span-level checkpoint to save leading span progress is written; if 0, span-level checkpoints due to lagging spans is disabled",application +changefeed.kafka_v2_error_details.enabled,boolean,true,"if enabled, Kafka v2 sinks will include the message key, size, and MVCC timestamp in message too large errors",application +changefeed.memory.per_changefeed_limit,byte size,512 MiB,controls amount of data that can be buffered per changefeed,application +changefeed.resolved_timestamp.min_update_interval (alias: changefeed.min_highwater_advance),duration,0s,minimum amount of time that must have elapsed since the last time a changefeed's resolved timestamp was updated before it is eligible to be updated again; default of 0 means no minimum interval is enforced but updating will still be limited by the average time it takes to checkpoint progress,application +changefeed.node_throttle_config,string,,specifies node level throttling configuration for all changefeeeds,application +changefeed.protect_timestamp.max_age,duration,96h0m0s,fail the changefeed if the protected timestamp age exceeds this threshold; 0 disables expiration,application +changefeed.protect_timestamp_interval,duration,10m0s,controls how often the changefeed forwards its protected timestamp to the resolved timestamp,application +changefeed.schema_feed.read_with_priority_after,duration,1m0s,retry with high priority if we were not able to read descriptors for too long; 0 disables,application +changefeed.sink_io_workers,integer,0,"the number of workers used by changefeeds when sending requests to the sink (currently the batching versions of webhook, pubsub, and kafka sinks that are enabled by changefeed.new__sink_enabled only): <0 disables, 0 assigns a reasonable default, >0 assigns the setting value",application +cloudstorage.azure.concurrent_upload_buffers,integer,1,controls the number of concurrent buffers that will be used by the Azure client when uploading chunks.Each buffer can buffer up to cloudstorage.write_chunk.size of memory during an upload,application +cloudstorage.azure.read.node_burst_limit,byte size,0 B,burst limit on number of bytes per second per node across operations writing to the designated cloud storage provider if non-zero,application +cloudstorage.azure.read.node_rate_limit,byte size,0 B,limit on number of bytes per second per node across operations writing to the designated cloud storage provider if non-zero,application +cloudstorage.azure.write.node_burst_limit,byte size,0 B,burst limit on number of bytes per second per node across operations writing to the designated cloud storage provider if non-zero,application +cloudstorage.azure.write.node_rate_limit,byte size,0 B,limit on number of bytes per second per node across operations writing to the designated cloud storage provider if non-zero,application +cloudstorage.gs.read.node_burst_limit,byte size,0 B,burst limit on number of bytes per second per node across operations writing to the designated cloud storage provider if non-zero,application +cloudstorage.gs.read.node_rate_limit,byte size,0 B,limit on number of bytes per second per node across operations writing to the designated cloud storage provider if non-zero,application +cloudstorage.gs.write.node_burst_limit,byte size,0 B,burst limit on number of bytes per second per node across operations writing to the designated cloud storage provider if non-zero,application +cloudstorage.gs.write.node_rate_limit,byte size,0 B,limit on number of bytes per second per node across operations writing to the designated cloud storage provider if non-zero,application +cloudstorage.http.custom_ca,string,,custom root CA (appended to system's default CAs) for verifying certificates when interacting with HTTPS storage,application +cloudstorage.http.read.node_burst_limit,byte size,0 B,burst limit on number of bytes per second per node across operations writing to the designated cloud storage provider if non-zero,application +cloudstorage.http.read.node_rate_limit,byte size,0 B,limit on number of bytes per second per node across operations writing to the designated cloud storage provider if non-zero,application +cloudstorage.http.write.node_burst_limit,byte size,0 B,burst limit on number of bytes per second per node across operations writing to the designated cloud storage provider if non-zero,application +cloudstorage.http.write.node_rate_limit,byte size,0 B,limit on number of bytes per second per node across operations writing to the designated cloud storage provider if non-zero,application +cloudstorage.nodelocal.read.node_burst_limit,byte size,0 B,burst limit on number of bytes per second per node across operations writing to the designated cloud storage provider if non-zero,application +cloudstorage.nodelocal.read.node_rate_limit,byte size,0 B,limit on number of bytes per second per node across operations writing to the designated cloud storage provider if non-zero,application +cloudstorage.nodelocal.write.node_burst_limit,byte size,0 B,burst limit on number of bytes per second per node across operations writing to the designated cloud storage provider if non-zero,application +cloudstorage.nodelocal.write.node_rate_limit,byte size,0 B,limit on number of bytes per second per node across operations writing to the designated cloud storage provider if non-zero,application +cloudstorage.nullsink.read.node_burst_limit,byte size,0 B,burst limit on number of bytes per second per node across operations writing to the designated cloud storage provider if non-zero,application +cloudstorage.nullsink.read.node_rate_limit,byte size,0 B,limit on number of bytes per second per node across operations writing to the designated cloud storage provider if non-zero,application +cloudstorage.nullsink.write.node_burst_limit,byte size,0 B,burst limit on number of bytes per second per node across operations writing to the designated cloud storage provider if non-zero,application +cloudstorage.nullsink.write.node_rate_limit,byte size,0 B,limit on number of bytes per second per node across operations writing to the designated cloud storage provider if non-zero,application +cloudstorage.s3.read.node_burst_limit,byte size,0 B,burst limit on number of bytes per second per node across operations writing to the designated cloud storage provider if non-zero,application +cloudstorage.s3.read.node_rate_limit,byte size,0 B,limit on number of bytes per second per node across operations writing to the designated cloud storage provider if non-zero,application +cloudstorage.s3.write.node_burst_limit,byte size,0 B,burst limit on number of bytes per second per node across operations writing to the designated cloud storage provider if non-zero,application +cloudstorage.s3.write.node_rate_limit,byte size,0 B,limit on number of bytes per second per node across operations writing to the designated cloud storage provider if non-zero,application +cloudstorage.timeout,duration,10m0s,the timeout for import/export storage operations,application +cloudstorage.userfile.read.node_burst_limit,byte size,0 B,burst limit on number of bytes per second per node across operations writing to the designated cloud storage provider if non-zero,application +cloudstorage.userfile.read.node_rate_limit,byte size,0 B,limit on number of bytes per second per node across operations writing to the designated cloud storage provider if non-zero,application +cloudstorage.userfile.write.node_burst_limit,byte size,0 B,burst limit on number of bytes per second per node across operations writing to the designated cloud storage provider if non-zero,application +cloudstorage.userfile.write.node_rate_limit,byte size,0 B,limit on number of bytes per second per node across operations writing to the designated cloud storage provider if non-zero,application +cluster.auto_upgrade.enabled,boolean,true,disable automatic cluster version upgrade until reset,application +cluster.organization,string,,organization name,system-visible +cluster.preserve_downgrade_option,string,,disable (automatic or manual) cluster version upgrade from the specified version until reset,application +debug.zip.redact_addresses.enabled,boolean,false,enables the redaction of hostnames and ip addresses in debug zip,application +diagnostics.active_query_dumps.enabled,boolean,true,experimental: enable dumping of anonymized active queries to disk when node is under memory pressure,system-visible +diagnostics.forced_sql_stat_reset.interval,duration,2h0m0s,interval after which the reported SQL Stats are reset even if not collected by telemetry reporter. It has a max value of 24H.,application +diagnostics.memory_monitoring_dumps.enabled,boolean,true,enable dumping of memory monitoring state at the same time as heap profiles are taken,system-visible +diagnostics.reporting.enabled,boolean,true,"enable reporting diagnostic metrics to cockroach labs, but is ignored for Trial or Free licenses",application +diagnostics.reporting.interval,duration,1h0m0s,interval at which diagnostics data should be reported,application +enterprise.license,string,,the encoded cluster license,system-visible +external.graphite.endpoint,string,,"if nonempty, push server metrics to the Graphite or Carbon server at the specified host:port",application +external.graphite.interval,duration,10s,the interval at which metrics are pushed to Graphite (if enabled),application +feature.backup.enabled,boolean,true,"set to true to enable backups, false to disable; default is true",application +feature.changefeed.enabled,boolean,true,"set to true to enable changefeeds, false to disable; default is true",application +feature.export.enabled,boolean,true,"set to true to enable exports, false to disable; default is true",application +feature.import.enabled,boolean,true,"set to true to enable imports, false to disable; default is true",application +feature.infer_rbr_region_col_using_constraint.enabled,boolean,false,"set to true to enable looking up the region column via a foreign key constraint in a REGIONAL BY ROW table, false to disable; default is false",application +feature.restore.enabled,boolean,true,"set to true to enable restore, false to disable; default is true",application +feature.schema_change.enabled,boolean,true,"set to true to enable schema changes, false to disable; default is true",application +feature.stats.enabled,boolean,true,"set to true to enable CREATE STATISTICS/ANALYZE, false to disable; default is true",application +feature.vector_index.enabled,boolean,false,"set to true to enable vector indexes, false to disable; default is false",application +jobs.retention_time,duration,336h0m0s,the amount of time for which records for completed jobs are retained,application +kv.allocator.lease_rebalance_threshold,float,0.05,minimum fraction away from the mean a store's lease count can be before it is considered for lease-transfers,system-only +kv.allocator.load_based_lease_rebalancing.enabled,boolean,true,set to enable rebalancing of range leases based on load and latency,system-only +kv.allocator.load_based_rebalancing,enumeration,leases and replicas,"whether to rebalance based on the distribution of load across stores [off = 0, leases = 1, leases and replicas = 2]",system-only +kv.allocator.load_based_rebalancing.objective,enumeration,cpu,"what objective does the cluster use to rebalance; if set to `qps` the cluster will attempt to balance qps among stores, if set to `cpu` the cluster will attempt to balance cpu usage among stores [qps = 0, cpu = 1]",system-only +kv.allocator.load_based_rebalancing_interval,duration,1m0s,the rough interval at which each store will check for load-based lease / replica rebalancing opportunities,system-only +kv.allocator.qps_rebalance_threshold,float,0.1,minimum fraction away from the mean a store's QPS (such as queries per second) can be before it is considered overfull or underfull,system-only +kv.allocator.range_rebalance_threshold,float,0.05,minimum fraction away from the mean a store's range count can be before it is considered overfull or underfull,system-only +kv.allocator.store_cpu_rebalance_threshold,float,0.1,minimum fraction away from the mean a store's cpu usage can be before it is considered overfull or underfull,system-only +kv.bulk_io_write.max_rate,byte size,1.0 TiB,the rate limit (bytes/sec) to use for writes to disk on behalf of bulk io ops,system-only +kv.bulk_io_write.min_capacity_remaining_fraction,float,0.05,remaining store capacity fraction below which bulk ingestion requests are rejected,system-only +kv.bulk_sst.max_allowed_overage,byte size,64 MiB,"if positive, allowed size in excess of target size for SSTs from export requests; export requests (i.e. BACKUP) may buffer up to the sum of kv.bulk_sst.target_size and kv.bulk_sst.max_allowed_overage in memory",system-only +kv.bulk_sst.target_size,byte size,16 MiB,target size for SSTs emitted from export requests; export requests (i.e. BACKUP) may buffer up to the sum of kv.bulk_sst.target_size and kv.bulk_sst.max_allowed_overage in memory,system-visible +kv.closed_timestamp.follower_reads.enabled (alias: kv.closed_timestamp.follower_reads_enabled),boolean,true,allow (all) replicas to serve consistent historical reads based on closed timestamp information,system-visible +kv.closed_timestamp.lead_for_global_reads_auto_tune.enabled,boolean,false,"if enabled, observed network latency between leaseholders and their furthest follower will be used to adjust closed timestamp policies for rangesranges configured to serve global reads. kv.closed_timestamp.lead_for_global_reads_override takes precedence if set.",system-visible +kv.closed_timestamp.lead_for_global_reads_override,duration,0s,"if nonzero, overrides the lead time that global_read ranges use to publish closed timestamps",system-visible +kv.closed_timestamp.side_transport_interval,duration,200ms,the interval at which the closed timestamp side-transport attempts to advance each range's closed timestamp; set to 0 to disable the side-transport,system-visible +kv.closed_timestamp.target_duration,duration,3s,"if nonzero, attempt to provide closed timestamp notifications for timestamps trailing cluster time by approximately this duration",system-visible +kv.dist_sender.circuit_breaker.cancellation.enabled,boolean,true,"when enabled, in-flight requests will be cancelled when the circuit breaker trips",application +kv.dist_sender.circuit_breaker.cancellation.write_grace_period,duration,10s,"how long after the circuit breaker trips to cancel write requests (these can't retry internally, so should be long enough to allow quorum/lease recovery)",application +kv.dist_sender.circuit_breaker.probe.interval,duration,3s,interval between replica probes,application +kv.dist_sender.circuit_breaker.probe.threshold,duration,3s,duration of errors or stalls after which a replica will be probed,application +kv.dist_sender.circuit_breaker.probe.timeout,duration,3s,timeout for replica probes,application +kv.dist_sender.circuit_breakers.mode,enumeration,liveness range only,"set of ranges to trip circuit breakers for failing or stalled replicas [no ranges = 0, liveness range only = 1, all ranges = 2]",application +kv.lease_transfer_read_summary.global_budget,byte size,0 B,controls the maximum number of bytes that will be used to summarize the global segment of the timestamp cache during lease transfers and range merges. A smaller budget will result in loss of precision.,system-only +kv.lease_transfer_read_summary.local_budget,byte size,4.0 MiB,controls the maximum number of bytes that will be used to summarize the local segment of the timestamp cache during lease transfers and range merges. A smaller budget will result in loss of precision.,system-only +kv.log_range_and_node_events.enabled,boolean,true,"set to true to transactionally log range events (e.g., split, merge, add/remove voter/non-voter) into system.rangelogand node join and restart events into system.eventolog",system-only +kv.protectedts.reconciliation.interval,duration,5m0s,the frequency for reconciling jobs with protected timestamp records,system-visible +kv.raft.leader_fortification.fraction_enabled,float,1,"controls the fraction of ranges for which the raft leader fortification protocol is enabled. Leader fortification is needed for a range to use a Leader lease. Set to 0.0 to disable leader fortification and, by extension, Leader leases. Set to 1.0 to enable leader fortification for all ranges and, by extension, use Leader leases for all ranges which do not require expiration-based leases. Set to a value between 0.0 and 1.0 to gradually roll out Leader leases across the ranges in a cluster.",system-only +kv.range.range_size_hard_cap,byte size,8.0 GiB,hard cap on the maximum size a range is allowed to grow to withoutsplitting before writes to the range are blocked. Takes precedence over all other configurations,system-only +kv.range_split.by_load.enabled (alias: kv.range_split.by_load_enabled),boolean,true,allow automatic splits of ranges based on where load is concentrated,system-only +kv.range_split.load_cpu_threshold,duration,500ms,"the CPU use per second over which, the range becomes a candidate for load based splitting",system-only +kv.range_split.load_qps_threshold,integer,2500,"the QPS over which, the range becomes a candidate for load based splitting",system-only +kv.rangefeed.client.stream_startup_rate,integer,100,controls the rate per second the client will initiate new rangefeed stream for a single range; 0 implies unlimited,application +kv.rangefeed.closed_timestamp_refresh_interval,duration,3s,the interval at which closed-timestamp updatesare delivered to rangefeeds; set to 0 to use kv.closed_timestamp.side_transport_interval,system-visible +kv.rangefeed.enabled,boolean,false,"if set, rangefeed registration is enabled",system-visible +kv.replica_circuit_breaker.slow_replication_threshold,duration,1m0s,duration after which slow proposals trip the per-Replica circuit breaker (zero duration disables breakers),system-only +kv.replica_raft.leaderless_unavailable_threshold,duration,1m0s,duration after which leaderless replicas is considered unavailable. Set to 0 to disable leaderless replica availability checks,system-only +kv.replica_stats.addsst_request_size_factor,integer,50000,"the divisor that is applied to addsstable request sizes, then recorded in a leaseholders QPS; 0 means all requests are treated as cost 1",system-only +kv.replication_reports.interval,duration,1m0s,"the frequency for generating the replication_constraint_stats, replication_stats_report and replication_critical_localities reports (set to 0 to disable)",system-only +kv.snapshot_rebalance.max_rate,byte size,32 MiB,the rate limit (bytes/sec) to use for rebalance and upreplication snapshots,system-only +kv.transaction.max_intents_and_locks,integer,0,"maximum count of inserts or durable locks for a single transactions, 0 to disable",application +kv.transaction.max_intents_bytes,integer,4194304,maximum number of bytes used to track locks in transactions,application +kv.transaction.max_refresh_spans_bytes,integer,4194304,maximum number of bytes used to track refresh spans in serializable transactions,application +kv.transaction.randomized_anchor_key.enabled,boolean,false,dictates whether a transactions anchor key is randomized or not,application +kv.transaction.reject_over_max_intents_budget.enabled,boolean,false,"if set, transactions that exceed their lock tracking budget (kv.transaction.max_intents_bytes) are rejected instead of having their lock spans imprecisely compressed",application +kv.transaction.write_buffering.enabled,boolean,false,"if enabled, transactional writes are buffered on the client",application +kv.transaction.write_buffering.max_buffer_size,byte size,4.0 MiB,"if non-zero, defines that maximum size of the buffer that will be used to buffer transactional writes per-transaction",application +kv.transaction.write_pipelining.locking_reads.enabled,boolean,true,"if enabled, transactional locking reads are pipelined through Raft consensus",application +kv.transaction.write_pipelining.ranged_writes.enabled,boolean,true,"if enabled, transactional ranged writes are pipelined through Raft consensus",application +kv.transaction.write_pipelining.enabled (alias: kv.transaction.write_pipelining_enabled),boolean,true,"if enabled, transactional writes are pipelined through Raft consensus",application +kv.transaction.write_pipelining.max_batch_size (alias: kv.transaction.write_pipelining_max_batch_size),integer,128,"if non-zero, defines that maximum size batch that will be pipelined through Raft consensus",application +kvadmission.store.provisioned_bandwidth,byte size,0 B,"if set to a non-zero value, this is used as the provisioned bandwidth (in bytes/s), for each store. It can be overridden on a per-store basis using the --store flag. Note that setting the provisioned bandwidth to a positive value may enable disk bandwidth based admission control, since admission.disk_bandwidth_tokens.elastic.enabled defaults to true",system-only +kvadmission.store.snapshot_ingest_bandwidth_control.enabled,boolean,true,"if set to true, snapshot ingests will be subject to disk write control in AC",system-only +obs.tablemetadata.automatic_updates.enabled,boolean,false,enables automatic updates of the table metadata cache system.table_metadata,application +obs.tablemetadata.data_valid_duration,duration,20m0s,the duration for which the data in system.table_metadata is considered valid,application +schedules.backup.gc_protection.enabled,boolean,true,enable chaining of GC protection across backups run as part of a schedule,application +security.client_cert.subject_required.enabled,boolean,false,mandates a requirement for subject role to be set for db user,system-visible +security.ocsp.mode,enumeration,off,"use OCSP to check whether TLS certificates are revoked. If the OCSP server is unreachable, in strict mode all certificates will be rejected and in lax mode all certificates will be accepted. [off = 0, lax = 1, strict = 2]",application +security.ocsp.timeout,duration,3s,timeout before considering the OCSP server unreachable,application +security.provisioning.ldap.enabled,boolean,false,enables automatic creation of SQL users upon successful LDAP login,application +server.auth_log.sql_connections.enabled,boolean,false,"if set, log SQL client connect and disconnect events to the SESSIONS log channel (note: may hinder performance on loaded nodes)",application +server.auth_log.sql_sessions.enabled,boolean,false,"if set, log verbose SQL session authentication events to the SESSIONS log channel (note: may hinder performance on loaded nodes). Session start and end events are always logged regardless of this setting; disable the SESSIONS log channel to suppress them.",application +server.authentication_cache.enabled,boolean,true,enables a cache used during authentication to avoid lookups to system tables when retrieving per-user authentication-related information,application +server.child_metrics.enabled,boolean,false,"enables the exporting of child metrics, additional prometheus time series with extra labels",application +server.child_metrics.include_aggregate.enabled,boolean,true,include the reporting of the aggregate time series when child metrics are enabled. This cluster setting has no effect if child metrics are disabled.,application +server.clock.forward_jump_check.enabled (alias: server.clock.forward_jump_check_enabled),boolean,false,"if enabled, forward clock jumps > max_offset/2 will cause a panic",application +server.clock.persist_upper_bound_interval,duration,0s,"the interval between persisting the wall time upper bound of the clock. The clock does not generate a wall time greater than the persisted timestamp and will panic if it sees a wall time greater than this value. When cockroach starts, it waits for the wall time to catch-up till this persisted timestamp. This guarantees monotonic wall time across server restarts. Not setting this or setting a value of 0 disables this feature.",application +server.consistency_check.max_rate,byte size,8.0 MiB,the rate limit (bytes/sec) to use for consistency checks; used in conjunction with server.consistency_check.interval to control the frequency of consistency checks. Note that setting this too high can negatively impact performance.,system-only +server.eventlog.enabled,boolean,true,"if set, logged notable events are also stored in the table system.eventlog",application +server.eventlog.ttl,duration,2160h0m0s,"if nonzero, entries in system.eventlog older than this duration are periodically purged",application +server.host_based_authentication.configuration,string,,host-based authentication configuration to use during connection authentication,application +server.hot_ranges_request.node.timeout,duration,5m0s,"the duration allowed for a single node to return hot range data before the request is cancelled; if set to 0, there is no timeout",application +server.hsts.enabled,boolean,false,"if true, HSTS headers will be sent along with all HTTP requests. The headers will contain a max-age setting of one year. Browsers honoring the header will always use HTTPS to access the DB Console. Ensure that TLS is correctly configured prior to enabling.",application +server.http.base_path,string,/,path to redirect the user to upon succcessful login,application +server.identity_map.configuration,string,,system-identity to database-username mappings,application +server.jwt_authentication.audience,string,,sets accepted audience values for JWT logins over the SQL interface,application +server.jwt_authentication.claim,string,,sets the JWT claim that is parsed to get the username,application +server.jwt_authentication.client.timeout,duration,15s,"sets the client timeout for external calls made during JWT authentication (e.g. fetching JWKS, etc.)",application +server.jwt_authentication.enabled,boolean,false,enables or disables JWT login for the SQL interface,application +server.jwt_authentication.issuers.configuration (alias: server.jwt_authentication.issuers),string,,sets accepted issuer values for JWT logins over the SQL interface which can be a single issuer URL string or a JSON string containing an array of issuer URLs or a JSON object containing map of issuer URLS to JWKS URIs,application +server.jwt_authentication.issuers.custom_ca,string,,sets the PEM encoded custom root CA for verifying certificates while fetching JWKS,application +server.jwt_authentication.jwks,string,"{""keys"":[]}",sets the public key set for JWT logins over the SQL interface (JWKS format),application +server.jwt_authentication.jwks_auto_fetch.enabled,boolean,false,"enables or disables automatic fetching of JWKS from the issuer's well-known endpoint or JWKS URI set in JWTAuthIssuersConfig. If this is enabled, the server.jwt_authentication.jwks will be ignored.",application +server.ldap_authentication.client.tls_certificate,string,,sets the client certificate PEM for establishing mTLS connection with LDAP server,application +server.ldap_authentication.client.tls_key,string,,sets the client key PEM for establishing mTLS connection with LDAP server,application +server.ldap_authentication.domain.custom_ca,string,,sets the PEM encoded custom root CA for verifying domain certificates when establishing connection with LDAP server,application +server.log_gc.max_deletions_per_cycle,integer,1000,the maximum number of entries to delete on each purge of log-like system tables,application +server.log_gc.period,duration,1h0m0s,the period at which log-like system tables are checked for old entries,application +server.max_connections_per_gateway,integer,-1,the maximum number of SQL connections per gateway allowed at a given time (note: this will only limit future connection attempts and will not affect already established connections). Negative values result in unlimited number of connections. Superusers are not affected by this limit.,application +server.max_open_transactions_per_gateway,integer,-1,the maximum number of open SQL transactions per gateway allowed at a given time. Negative values result in unlimited number of connections. Superusers are not affected by this limit.,application +server.oidc_authentication.autologin.enabled (alias: server.oidc_authentication.autologin),boolean,false,"if true, logged-out visitors to the DB Console will be automatically redirected to the OIDC login endpoint",application +server.oidc_authentication.button_text,string,Log in with your OIDC provider,text to show on button on DB Console login page to login with your OIDC provider (only shown if OIDC is enabled),application +server.oidc_authentication.claim_json_key,string,,sets JSON key of principal to extract from payload after OIDC authentication completes (usually email or sid),application +server.oidc_authentication.client.timeout,duration,15s,"sets the client timeout for external calls made during OIDC authentication (e.g. authorization code flow, etc.)",application +server.oidc_authentication.client_id,string,,sets OIDC client id,application +server.oidc_authentication.client_secret,string,,sets OIDC client secret,application +server.oidc_authentication.enabled,boolean,false,enables or disabled OIDC login for the DB Console,application +server.oidc_authentication.principal_regex,string,(.+),"regular expression to apply to extracted principal (see claim_json_key setting) to translate to SQL user (golang regex format, must include 1 grouping to extract)",application +server.oidc_authentication.provider.custom_ca,string,,sets the PEM encoded custom root CA for verifying certificates while authenticating through the OIDC provider,application +server.oidc_authentication.provider_url,string,,sets OIDC provider URL ({provider_url}/.well-known/openid-configuration must resolve),application +server.oidc_authentication.redirect_url,string,https://localhost:8080/oidc/v1/callback,sets OIDC redirect URL via a URL string or a JSON string containing a required `redirect_urls` key with an object that maps from region keys to URL strings (URLs should point to your load balancer and must route to the path /oidc/v1/callback),application +server.oidc_authentication.scopes,string,openid,"sets OIDC scopes to include with authentication request (space delimited list of strings, required to start with `openid`)",application +server.rangelog.ttl,duration,720h0m0s,"if nonzero, entries in system.rangelog older than this duration are periodically purged",system-only +server.redact_sensitive_settings.enabled,boolean,false,enables or disables the redaction of sensitive settings in the output of SHOW CLUSTER SETTINGS and SHOW ALL CLUSTER SETTINGS for users without the MODIFYCLUSTERSETTING privilege,application +server.shutdown.connections.timeout (alias: server.shutdown.connection_wait),duration,0s,the maximum amount of time a server waits for all SQL connections to be closed before proceeding with a drain. (note that the --drain-wait parameter for cockroach node drain may need adjustment after changing this setting),application +server.shutdown.initial_wait (alias: server.shutdown.drain_wait),duration,0s,"the amount of time a server waits in an unready state before proceeding with a drain (note that the --drain-wait parameter for cockroach node drain may need adjustment after changing this setting. --drain-wait is to specify the duration of the whole draining process, while server.shutdown.initial_wait is to set the wait time for health probes to notice that the node is not ready.)",application +server.shutdown.lease_transfer_iteration.timeout (alias: server.shutdown.lease_transfer_wait),duration,5s,the timeout for a single iteration of the range lease transfer phase of draining (note that the --drain-wait parameter for cockroach node drain may need adjustment after changing this setting),system-only +server.shutdown.transactions.timeout (alias: server.shutdown.query_wait),duration,10s,the timeout for waiting for active transactions to finish during a drain (note that the --drain-wait parameter for cockroach node drain may need adjustment after changing this setting),application +server.sql_tcp_keep_alive.count,integer,3,maximum number of probes that will be sent out before a connection is dropped because it's unresponsive (Linux and Darwin only),application +server.sql_tcp_keep_alive.interval,duration,10s,time between keep alive probes and idle time before probes are sent out,application +server.time_until_store_dead,duration,5m0s,"the time after which if there is no new gossiped information about a store, it is considered dead",application +server.user_login.cert_password_method.auto_scram_promotion.enabled,boolean,true,whether to automatically promote cert-password authentication to use SCRAM,application +server.user_login.downgrade_scram_stored_passwords_to_bcrypt.enabled,boolean,true,"if server.user_login.password_encryption=crdb-bcrypt, this controls whether to automatically re-encode stored passwords using scram-sha-256 to crdb-bcrypt",application +server.user_login.min_password_length,integer,1,the minimum length accepted for passwords set in cleartext via SQL. Note that a value lower than 1 is ignored: passwords cannot be empty in any case. This setting only applies when adding new users or altering an existing user's password; it will not affect existing logins.,application +server.user_login.password_encryption,enumeration,scram-sha-256,"which hash method to use to encode cleartext passwords passed via ALTER/CREATE USER/ROLE WITH PASSWORD [crdb-bcrypt = 2, scram-sha-256 = 3]",application +server.user_login.password_hashes.default_cost.crdb_bcrypt,integer,10,the hashing cost to use when storing passwords supplied as cleartext by SQL clients with the hashing method crdb-bcrypt (allowed range: 4-31),application +server.user_login.password_hashes.default_cost.scram_sha_256,integer,10610,the hashing cost to use when storing passwords supplied as cleartext by SQL clients with the hashing method scram-sha-256 (allowed range: 4096-240000000000),application +server.user_login.rehash_scram_stored_passwords_on_cost_change.enabled,boolean,true,"if server.user_login.password_hashes.default_cost.scram_sha_256 differs from, the cost in a stored hash, this controls whether to automatically re-encode stored passwords using scram-sha-256 with the new default cost",application +server.user_login.timeout,duration,10s,timeout after which client authentication times out if some system range is unavailable (0 = no timeout),application +server.user_login.upgrade_bcrypt_stored_passwords_to_scram.enabled,boolean,true,"if server.user_login.password_encryption=scram-sha-256, this controls whether to automatically re-encode stored passwords using crdb-bcrypt to scram-sha-256",application +server.web_session.purge.ttl,duration,1h0m0s,"if nonzero, entries in system.web_sessions older than this duration are periodically purged",application +server.web_session.timeout (alias: server.web_session_timeout),duration,168h0m0s,the duration that a newly created web session will be valid,application +spanconfig.bounds.enabled,boolean,true,dictates whether span config bounds are consulted when serving span configs for secondary tenants,system-only +spanconfig.range_coalescing.system.enabled (alias: spanconfig.storage_coalesce_adjacent.enabled),boolean,true,"collapse adjacent ranges with the same span configs, for the ranges specific to the system tenant",system-only +spanconfig.range_coalescing.application.enabled (alias: spanconfig.tenant_coalesce_adjacent.enabled),boolean,true,collapse adjacent ranges with the same span configs across all secondary tenant keyspaces,system-only +sql.auth.change_own_password.enabled,boolean,false,"controls whether a user is allowed to change their own password, even if they have no other privileges",application +sql.auth.grant_option_for_owner.enabled,boolean,true,determines whether the GRANT OPTION for privileges is implicitly given to the owner of an object,application +sql.auth.grant_option_inheritance.enabled,boolean,true,determines whether the GRANT OPTION for privileges is inherited through role membership,application +sql.auth.public_schema_create_privilege.enabled,boolean,true,determines whether to grant all users the CREATE privileges on the public schema when it is created,application +sql.closed_session_cache.capacity,integer,1000,the maximum number of sessions in the cache,application +sql.closed_session_cache.time_to_live,integer,3600,"the maximum time to live, in seconds",application +sql.contention.event_store.capacity,byte size,64 MiB,the in-memory storage capacity per-node of contention event store,application +sql.contention.event_store.duration_threshold,duration,0s,minimum contention duration to cause the contention events to be collected into crdb_internal.transaction_contention_events,application +sql.contention.record_serialization_conflicts.enabled,boolean,true,enables recording 40001 errors with conflicting txn meta as SERIALIZATION_CONFLICTcontention events into crdb_internal.transaction_contention_events,application +sql.contention.txn_id_cache.max_size,byte size,64 MiB,the maximum byte size TxnID cache will use (set to 0 to disable),application +sql.cross_db_fks.enabled,boolean,false,"if true, creating foreign key references across databases is allowed",application +sql.cross_db_sequence_owners.enabled,boolean,false,"if true, creating sequences owned by tables from other databases is allowed",application +sql.cross_db_sequence_references.enabled,boolean,false,"if true, sequences referenced by tables from other databases are allowed",application +sql.cross_db_views.enabled,boolean,false,"if true, creating views that refer to other databases is allowed",application +sql.defaults.cost_scans_with_default_col_size.enabled,boolean,false,"setting to true uses the same size for all columns to compute scan cost +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.datestyle,enumeration,"iso, mdy","default value for DateStyle session setting [iso, mdy = 0, iso, dmy = 1, iso, ymd = 2] +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.default_hash_sharded_index_bucket_count,integer,16,"used as bucket count if bucket count is not specified in hash sharded index definition +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.default_int_size,integer,8,"the size, in bytes, of an INT type +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.disallow_full_table_scans.enabled,boolean,false,"setting to true rejects queries that have planned a full table scan; set large_full_scan_rows > 0 to allow small full table scans estimated to read fewer than large_full_scan_rows +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.distsql,enumeration,auto,"default distributed SQL execution mode [off = 0, auto = 1, on = 2, always = 3] +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.experimental_alter_column_type.enabled,boolean,false,"default value for experimental_alter_column_type session setting; enables the use of ALTER COLUMN TYPE for general conversions +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.experimental_distsql_planning,enumeration,off,"default experimental_distsql_planning mode; enables experimental opt-driven DistSQL planning [off = 0, on = 1] +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.experimental_enable_unique_without_index_constraints.enabled,boolean,false,"default value for experimental_enable_unique_without_index_constraints session setting;disables unique without index constraints by default +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.experimental_implicit_column_partitioning.enabled,boolean,false,"default value for experimental_enable_temp_tables; allows for the use of implicit column partitioning +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.experimental_temporary_tables.enabled,boolean,false,"default value for experimental_enable_temp_tables; allows for use of temporary tables by default +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.foreign_key_cascades_limit,integer,10000,"default value for foreign_key_cascades_limit session setting; limits the number of cascading operations that run as part of a single query +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.idle_in_session_timeout,duration,0s,"default value for the idle_in_session_timeout; default value for the idle_in_session_timeout session setting; controls the duration a session is permitted to idle before the session is terminated; if set to 0, there is no timeout +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.idle_in_transaction_session_timeout,duration,0s,"default value for the idle_in_transaction_session_timeout; controls the duration a session is permitted to idle in a transaction before the session is terminated; if set to 0, there is no timeout +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.implicit_select_for_update.enabled,boolean,true,"default value for enable_implicit_select_for_update session setting; enables FOR UPDATE locking during the row-fetch phase of mutation statements +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.insert_fast_path.enabled,boolean,true,"default value for enable_insert_fast_path session setting; enables a specialized insert path +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.intervalstyle,enumeration,postgres,"default value for IntervalStyle session setting [postgres = 0, iso_8601 = 1, sql_standard = 2] +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.large_full_scan_rows,float,0,"default value for large_full_scan_rows session variable which determines the table size at which full scans are considered large and disallowed when disallow_full_table_scans is set to true; set to 0 to reject all full table or full index scans when disallow_full_table_scans is true +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.locality_optimized_partitioned_index_scan.enabled,boolean,true,"default value for locality_optimized_partitioned_index_scan session setting; enables searching for rows in the current region before searching remote regions +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.lock_timeout,duration,0s,"default value for the lock_timeout; default value for the lock_timeout session setting; controls the duration a query is permitted to wait while attempting to acquire a lock on a key or while blocking on an existing lock in order to perform a non-locking read on a key; if set to 0, there is no timeout +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.on_update_rehome_row.enabled,boolean,true,"default value for on_update_rehome_row; enables ON UPDATE rehome_row() expressions to trigger on updates +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.optimizer_use_histograms.enabled,boolean,true,"default value for optimizer_use_histograms session setting; enables usage of histograms in the optimizer by default +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.optimizer_use_multicol_stats.enabled,boolean,true,"default value for optimizer_use_multicol_stats session setting; enables usage of multi-column stats in the optimizer by default +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.override_alter_primary_region_in_super_region.enabled,boolean,false,"default value for override_alter_primary_region_in_super_region; allows for altering the primary region even if the primary region is a member of a super region +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.override_multi_region_zone_config.enabled,boolean,false,"default value for override_multi_region_zone_config; allows for overriding the zone configs of a multi-region table or database +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.prefer_lookup_joins_for_fks.enabled,boolean,false,"default value for prefer_lookup_joins_for_fks session setting; causes foreign key operations to use lookup joins when possible +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.primary_region,string,,"if not empty, all databases created without a PRIMARY REGION will implicitly have the given PRIMARY REGION +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.reorder_joins_limit,integer,8,"default number of joins to reorder +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.require_explicit_primary_keys.enabled,boolean,false,"default value for requiring explicit primary keys in CREATE TABLE statements +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.results_buffer.size,byte size,512 KiB,"default size of the buffer that accumulates results for a statement or a batch of statements before they are sent to the client. This can be overridden on an individual connection with the 'results_buffer_size' parameter. Note that auto-retries generally only happen while no results have been delivered to the client, so reducing this size can increase the number of retriable errors a client receives. On the other hand, increasing the buffer size can increase the delay until the client receives the first result row. Updating the setting only affects new connections. Setting to 0 disables any buffering. +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.serial_normalization,enumeration,rowid,"default handling of SERIAL in table definitions [rowid = 0, virtual_sequence = 1, sql_sequence = 2, sql_sequence_cached = 3, unordered_rowid = 4, sql_sequence_cached_node = 5] +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.statement_timeout,duration,0s,"default value for the statement_timeout; default value for the statement_timeout session setting; controls the duration a query is permitted to run before it is canceled; if set to 0, there is no timeout +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.stub_catalog_tables.enabled,boolean,true,"default value for stub_catalog_tables session setting +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.super_regions.enabled,boolean,false,"default value for enable_super_regions; allows for the usage of super regions +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.transaction_rows_read_err,integer,0,"the limit for the number of rows read by a SQL transaction which - once exceeded - will fail the transaction (or will trigger a logging event to SQL_INTERNAL_PERF for internal transactions); use 0 to disable +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.transaction_rows_read_log,integer,0,"the threshold for the number of rows read by a SQL transaction which - once exceeded - will trigger a logging event to SQL_PERF (or SQL_INTERNAL_PERF for internal transactions); use 0 to disable +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.transaction_rows_written_err,integer,0,"the limit for the number of rows written by a SQL transaction which - once exceeded - will fail the transaction (or will trigger a logging event to SQL_INTERNAL_PERF for internal transactions); use 0 to disable +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.transaction_rows_written_log,integer,0,"the threshold for the number of rows written by a SQL transaction which - once exceeded - will trigger a logging event to SQL_PERF (or SQL_INTERNAL_PERF for internal transactions); use 0 to disable +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.use_declarative_schema_changer,enumeration,on,"default value for use_declarative_schema_changer session setting;disables new schema changer by default [off = 0, on = 1, unsafe = 2, unsafe_always = 3] +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.vectorize,enumeration,on,"default vectorize mode [on = 0, on = 1, on = 2, experimental_always = 3, off = 4] +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.defaults.zigzag_join.enabled,boolean,false,"default value for enable_zigzag_join session setting; disallows use of zig-zag join by default +This cluster setting is being kept to preserve backwards-compatibility. +This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html",application +sql.distsql.temp_storage.workmem,byte size,64 MiB,maximum amount of memory in bytes a processor can use before falling back to temp storage,application +sql.guardrails.max_row_size_err,byte size,512 MiB,"maximum size of row (or column family if multiple column families are in use) that SQL can write to the database, above which an error is returned; use 0 to disable",application +sql.guardrails.max_row_size_log,byte size,64 MiB,"maximum size of row (or column family if multiple column families are in use) that SQL can write to the database, above which an event is logged to SQL_PERF (or SQL_INTERNAL_PERF if the mutating statement was internal); use 0 to disable",application +sql.hash_sharded_range_pre_split.max,integer,16,max pre-split ranges to have when adding hash sharded index to an existing table,application +sql.index_recommendation.drop_unused_duration,duration,168h0m0s,the index unused duration at which we begin to recommend dropping the index,application +sql.insights.anomaly_detection.enabled,boolean,true,enable per-fingerprint latency recording and anomaly detection,application +sql.insights.anomaly_detection.latency_threshold,duration,50ms,statements must surpass this threshold to trigger anomaly detection and identification,application +sql.insights.anomaly_detection.memory_limit,byte size,1.0 MiB,the maximum amount of memory allowed for tracking statement latencies,application +sql.insights.execution_insights_capacity,integer,1000,the size of the per-node store of execution insights,application +sql.insights.high_retry_count.threshold,integer,10,the number of retries a slow statement must have undergone for its high retry count to be highlighted as a potential problem,application +sql.insights.latency_threshold,duration,100ms,amount of time after which an executing statement is considered slow. Use 0 to disable.,application +sql.log.redact_names.enabled,boolean,false,"if set, schema object identifers are redacted in SQL statements that appear in event logs",application +sql.log.slow_query.experimental_full_table_scans.enabled,boolean,false,"when set to true, statements that perform a full table/index scan will be logged to the slow query log even if they do not meet the latency threshold. Must have the slow query log enabled for this setting to have any effect.",application +sql.log.slow_query.internal_queries.enabled,boolean,false,"when set to true, internal queries which exceed the slow query log threshold are logged to a separate log. Must have the slow query log enabled for this setting to have any effect.",application +sql.log.slow_query.latency_threshold,duration,0s,"when set to non-zero, log statements whose service latency exceeds the threshold to a secondary logger on each node",application +sql.log.user_audit,string,,user/role-based audit logging configuration. An enterprise license is required for this cluster setting to take effect.,application +sql.log.user_audit.reduced_config.enabled,boolean,false,"enables logic to compute a reduced audit configuration, computing the audit configuration only once at session start instead of at each SQL event. The tradeoff with the increase in performance (~5%), is that changes to the audit configuration (user role memberships/cluster setting) are not reflected within session. Users will need to start a new session to see these changes in their auditing behaviour.",application +sql.metrics.application_name.enabled,boolean,false,"when enabled, SQL metrics would export application name as and additional label as part of child metrics. The number of unique label combinations is limited to 5000 by default.",application +sql.metrics.database_name.enabled,boolean,false,"when enabled, SQL metrics would export database name as and additional label as part of child metrics. The number of unique label combinations is limited to 5000 by default.",application +sql.metrics.index_usage_stats.enabled,boolean,true,collect per index usage statistics,application +sql.metrics.max_mem_reported_stmt_fingerprints,integer,100000,the maximum number of reported statement fingerprints stored in memory,application +sql.metrics.max_mem_reported_txn_fingerprints,integer,100000,the maximum number of reported transaction fingerprints stored in memory,application +sql.metrics.max_mem_stmt_fingerprints,integer,7500,the maximum number of statement fingerprints stored in memory,application +sql.metrics.max_mem_txn_fingerprints,integer,7500,the maximum number of transaction fingerprints stored in memory,application +sql.metrics.statement_details.dump_to_logs.enabled (alias: sql.metrics.statement_details.dump_to_logs),boolean,false,dump collected statement statistics to node logs when periodically cleared,application +sql.metrics.statement_details.enabled,boolean,true,collect per-statement query statistics,application +sql.metrics.statement_details.gateway_node.enabled,boolean,false,"save the gateway node for each statement fingerprint. If false, the value will be stored as 0.",application +sql.metrics.statement_details.index_recommendation_collection.enabled,boolean,true,generate an index recommendation for each fingerprint ID,application +sql.metrics.statement_details.max_mem_reported_idx_recommendations,integer,5000,the maximum number of reported index recommendation info stored in memory,application +sql.metrics.statement_details.threshold,duration,0s,"minimum execution time to cause statement statistics to be collected. If configured, no transaction stats are collected.",application +sql.metrics.transaction_details.enabled,boolean,true,collect per-application transaction statistics,application +sql.multiple_modifications_of_table.enabled,boolean,false,"if true, allow statements containing multiple INSERT ON CONFLICT, UPSERT, UPDATE, or DELETE subqueries modifying the same table, at the risk of data corruption if the same row is modified multiple times by a single statement (multiple INSERT subqueries without ON CONFLICT cannot cause corruption and are always allowed)",application +sql.multiregion.drop_primary_region.enabled,boolean,true,allows dropping the PRIMARY REGION of a database if it is the last region,application +sql.notices.enabled,boolean,true,enable notices in the server/client protocol being sent,application +sql.optimizer.uniqueness_checks_for_gen_random_uuid.enabled,boolean,false,"if enabled, uniqueness checks may be planned for mutations of UUID columns updated with gen_random_uuid(); otherwise, uniqueness is assumed due to near-zero collision probability",application +sql.schema.telemetry.recurrence,string,@weekly,cron-tab recurrence for SQL schema telemetry job,system-visible +sql.spatial.experimental_box2d_comparison_operators.enabled,boolean,false,enables the use of certain experimental box2d comparison operators,application +sql.sqlcommenter.enabled,boolean,false,enables support for sqlcommenter. Key value parsed from sqlcommenter comments will be included in sql insights and sql logs. See https://google.github.io/sqlcommenter/ for more details.,application +sql.stats.activity.persisted_rows.max,integer,200000,maximum number of rows of statement and transaction activity that will be persisted in the system tables,application +sql.stats.automatic_collection.enabled,boolean,true,automatic statistics collection mode,application +sql.stats.automatic_collection.fraction_stale_rows,float,0.2,target fraction of stale rows per table that will trigger a statistics refresh,application +sql.stats.automatic_collection.min_stale_rows,integer,500,target minimum number of stale rows per table that will trigger a statistics refresh,application +sql.stats.automatic_full_collection.enabled,boolean,true,automatic full statistics collection mode,application +sql.stats.automatic_partial_collection.enabled,boolean,true,automatic partial statistics collection mode,application +sql.stats.automatic_partial_collection.fraction_stale_rows,float,0.05,target fraction of stale rows per table that will trigger a partial statistics refresh,application +sql.stats.automatic_partial_collection.min_stale_rows,integer,100,target minimum number of stale rows per table that will trigger a partial statistics refresh,application +sql.stats.cleanup.recurrence,string,@hourly,cron-tab recurrence for SQL Stats cleanup job,application +sql.stats.detailed_latency_metrics.enabled,boolean,false,"label latency metrics with the statement fingerprint. Workloads with tens of thousands of distinct query fingerprints should leave this setting false. (experimental, affects performance for workloads with high fingerprint cardinality)",application +sql.stats.error_on_concurrent_create_stats.enabled,boolean,false,"set to true to error on concurrent CREATE STATISTICS jobs, instead of skipping them",application +sql.stats.flush.enabled,boolean,true,"if set, SQL execution statistics are periodically flushed to disk",application +sql.stats.flush.interval,duration,10m0s,"the interval at which SQL execution statistics are flushed to disk, this value must be less than or equal to 1 hour",application +sql.stats.forecasts.enabled,boolean,true,"when true, enables generation of statistics forecasts by default for all tables",application +sql.stats.forecasts.max_decrease,float,0.3333333333333333,"the most a prediction is allowed to decrease, expressed as the minimum ratio of the prediction to the lowest prior observation",application +sql.stats.forecasts.min_goodness_of_fit,float,0.95,the minimum R² (goodness of fit) measurement required from all predictive models to use a forecast,application +sql.stats.forecasts.min_observations,integer,3,the mimimum number of observed statistics required to produce a statistics forecast,application +sql.stats.histogram_buckets.count,integer,200,maximum number of histogram buckets to build during table statistics collection,application +sql.stats.histogram_buckets.include_most_common_values.enabled,boolean,true,whether to include most common values as histogram buckets,application +sql.stats.histogram_buckets.max_fraction_most_common_values,float,0.1,maximum fraction of histogram buckets to use for most common values,application +sql.stats.histogram_collection.enabled,boolean,true,histogram collection mode,application +sql.stats.histogram_samples.count,integer,0,number of rows sampled for histogram construction during table statistics collection. Not setting this or setting a value of 0 means that a reasonable sample size will be automatically picked based on the table size.,application +sql.stats.multi_column_collection.enabled,boolean,true,multi-column statistics collection mode,application +sql.stats.non_default_columns.min_retention_period,duration,24h0m0s,minimum retention period for table statistics collected on non-default columns,application +sql.stats.non_indexed_json_histograms.enabled,boolean,false,set to true to collect table statistics histograms on non-indexed JSON columns,application +sql.stats.persisted_rows.max,integer,1000000,maximum number of rows of statement and transaction statistics that will be persisted in the system tables before compaction begins,application +sql.stats.post_events.enabled,boolean,false,"if set, an event is logged for every successful CREATE STATISTICS job",application +sql.stats.response.max,integer,20000,the maximum number of statements and transaction stats returned in a CombinedStatements request,application +sql.stats.response.show_internal.enabled,boolean,false,controls if statistics for internal executions should be returned by the CombinedStatements and if internal sessions should be returned by the ListSessions endpoints. These endpoints are used to display statistics on the SQL Activity pages,application +sql.stats.system_tables.enabled,boolean,true,"when true, enables use of statistics on system tables by the query optimizer",application +sql.stats.system_tables_autostats.enabled,boolean,true,"when true, enables automatic collection of statistics on system tables",application +sql.stats.virtual_computed_columns.enabled,boolean,true,set to true to collect table statistics on virtual computed columns,application +sql.telemetry.query_sampling.enabled,boolean,false,"when set to true, executed queries will emit an event on the telemetry logging channel",application +sql.telemetry.query_sampling.internal.enabled,boolean,false,"when set to true, internal queries will be sampled in telemetry logging",application +sql.telemetry.query_sampling.max_event_frequency,integer,8,"the max event frequency (events per second) at which we sample executions for telemetry, note that it is recommended that this value shares a log-line limit of 10 logs per second on the telemetry pipeline with all other telemetry events. If sampling mode is set to 'transaction', this value is ignored.",application +sql.telemetry.query_sampling.mode,enumeration,statement,"the execution level used for telemetry sampling. If set to 'statement', events are sampled at the statement execution level. If set to 'transaction', events are sampled at the transaction execution level, i.e. all statements for a transaction will be logged and are counted together as one sampled event (events are still emitted one per statement). [statement = 0, transaction = 1]",application +sql.telemetry.transaction_sampling.max_event_frequency,integer,8,"the max event frequency (events per second) at which we sample transactions for telemetry. If sampling mode is set to 'statement', this setting is ignored. In practice, this means that we only sample a transaction if 1/max_event_frequency seconds have elapsed since the last transaction was sampled.",application +sql.telemetry.transaction_sampling.statement_events_per_transaction.max,integer,50,the maximum number of statement events to log for every sampled transaction. Note that statements that are logged by force do not adhere to this limit.,application +sql.temp_object_cleaner.cleanup_interval,duration,30m0s,how often to clean up orphaned temporary objects,application +sql.temp_object_cleaner.wait_interval,duration,30m0s,how long after creation a temporary object will be cleaned up,application +sql.log.all_statements.enabled (alias: sql.trace.log_statement_execute),boolean,false,set to true to enable logging of all executed statements,application +sql.trace.stmt.enable_threshold,duration,0s,enables tracing on all statements; statements executing for longer than this duration will have their trace logged (set to 0 to disable); note that enabling this may have a negative performance impact; this setting applies to individual statements within a transaction and is therefore finer-grained than sql.trace.txn.enable_threshold,application +sql.trace.txn.enable_threshold,duration,0s,"enables transaction traces for transactions exceeding this duration, used with `sql.trace.txn.sample_rate`",application +sql.trace.txn.sample_rate,float,1,"enables probabilistic transaction tracing. It should be used in conjunction with `sql.trace.txn.enable_threshold`. A percentage of transactions between 0 and 1.0 will have tracing enabled, and only those which exceed the configured threshold will be logged.",application +sql.ttl.changefeed_replication.disabled,boolean,false,"if true, deletes issued by TTL will not be replicated via changefeeds (this setting will be ignored by changefeeds that have the ignore_disable_changefeed_replication option set; such changefeeds will continue to replicate all TTL deletes)",application +sql.ttl.default_delete_batch_size,integer,100,default amount of rows to delete in a single query during a TTL job,application +sql.ttl.default_delete_rate_limit,integer,100,default delete rate limit (rows per second) per node for each TTL job. Use 0 to signify no rate limit.,application +sql.ttl.default_select_batch_size,integer,500,default amount of rows to select in a single query during a TTL job,application +sql.ttl.default_select_rate_limit,integer,0,default select rate limit (rows per second) per node for each TTL job. Use 0 to signify no rate limit.,application +sql.ttl.job.enabled,boolean,true,whether the TTL job is enabled,application +sql.txn.read_committed_isolation.enabled,boolean,true,set to true to allow transactions to use the READ COMMITTED isolation level if specified by BEGIN/SET commands,application +sql.txn.repeatable_read_isolation.enabled (alias: sql.txn.snapshot_isolation.enabled),boolean,false,set to true to allow transactions to use the REPEATABLE READ isolation level if specified by BEGIN/SET commands,application +sql.txn_fingerprint_id_cache.capacity,integer,100,the maximum number of txn fingerprint IDs stored,application +sql.vecindex.stalled_op.timeout,duration,100ms,amount of time before other vector index workers will assist with a stalled background fixup,application +storage.columnar_blocks.enabled,boolean,true,set to true to enable columnar-blocks to store KVs in a columnar format,system-visible +storage.delete_compaction_excise.enabled,boolean,true,set to false to direct Pebble to not partially excise sstables in delete-only compactions,system-visible +storage.ingest_split.enabled,boolean,true,set to false to disable ingest-time splitting that lowers write-amplification,system-only +storage.ingestion.value_blocks.enabled,boolean,true,set to true to enable writing of value blocks in ingestion sstables,application +storage.max_sync_duration,duration,20s,maximum duration for disk operations; any operations that take longer than this setting trigger a warning log entry or process crash,system-visible +storage.max_sync_duration.fatal.enabled,boolean,true,"if true, fatal the process when a disk operation exceeds storage.max_sync_duration",application +storage.sstable.compression_algorithm,enumeration,fastest,"determines the compression algorithm to use when compressing sstable data blocks for use in a Pebble store (balanced,good are experimental); [snappy = 1, zstd = 2, none = 3, minlz = 4, fastest = 5, balanced = 6, good = 7]",system-visible +storage.sstable.compression_algorithm_backup_storage,enumeration,fastest,"determines the compression algorithm to use when compressing sstable data blocks for backup row data storage (fast,balanced,good are experimental); [snappy = 1, zstd = 2, none = 3, minlz = 4, fastest = 5, fast = 6, balanced = 7, good = 8]",system-visible +storage.sstable.compression_algorithm_backup_transport,enumeration,fastest,"determines the compression algorithm to use when compressing sstable data blocks for backup transport (fast,balanced,good are experimental); [snappy = 1, zstd = 2, none = 3, minlz = 4, fastest = 5, fast = 6, balanced = 7, good = 8]",system-visible +storage.wal_failover.unhealthy_op_threshold,duration,100ms,the latency of a WAL write considered unhealthy and triggers a failover to a secondary WAL location,system-only +timeseries.storage.enabled,boolean,true,"if set, periodic timeseries data is stored within the cluster; disabling is not recommended unless you are storing the data elsewhere",system-only +timeseries.storage.resolution_10s.ttl,duration,240h0m0s,the maximum age of time series data stored at the 10 second resolution. Data older than this is subject to rollup and deletion.,system-visible +timeseries.storage.resolution_30m.ttl,duration,2160h0m0s,the maximum age of time series data stored at the 30 minute resolution. Data older than this is subject to deletion.,system-visible +trace.debug_http_endpoint.enabled (alias: trace.debug.enable),boolean,false,"if set, traces for recent requests can be seen at https:///debug/requests",application +trace.opentelemetry.collector,string,,"address of an OpenTelemetry trace collector to receive traces using the otel gRPC protocol, as :. If no port is specified, 4317 will be used.",application +trace.snapshot.rate,duration,0s,"if non-zero, interval at which background trace snapshots are captured",application +trace.span_registry.enabled,boolean,false,"if set, ongoing traces can be seen at https:///#/debug/tracez",application +trace.zipkin.collector,string,,"the address of a Zipkin instance to receive traces, as :. If no port is specified, 9411 will be used.",application +ui.database_locality_metadata.enabled,boolean,true,if enabled shows extended locality data about databases and tables in DB Console which can be expensive to compute,application +ui.default_timezone,string,,the default timezone used to format timestamps in the ui,application +ui.display_timezone,enumeration,etc/utc,"the timezone used to format timestamps in the ui. This setting is deprecatedand will be removed in a future version. Use the 'ui.default_timezone' setting instead. 'ui.default_timezone' takes precedence over this setting. [etc/utc = 0, america/new_york = 1]",application +version,version,25.3,set the active cluster version in the format '.',application diff --git a/src/current/_data/v25.3/metrics/metrics.yml b/src/current/_data/v25.3/metrics/metrics-cloud.yml similarity index 100% rename from src/current/_data/v25.3/metrics/metrics.yml rename to src/current/_data/v25.3/metrics/metrics-cloud.yml diff --git a/src/current/_data/v25.3/metrics/metrics.yaml b/src/current/_data/v25.3/metrics/metrics.yaml new file mode 100644 index 00000000000..7fe4416b4ae --- /dev/null +++ b/src/current/_data/v25.3/metrics/metrics.yaml @@ -0,0 +1,17558 @@ +layers: +- name: APPLICATION + categories: + - name: CHANGEFEEDS + metrics: + - name: changefeed.commit_latency + exported_name: changefeed_commit_latency + description: 'Event commit latency: a difference between event MVCC timestamp and the time it was acknowledged by the downstream sink. If the sink batches events, then the difference between the oldest event in the batch and acknowledgement is recorded; Excludes latency during backfill' + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + how_to_use: This metric provides a useful context when assessing the state of changefeeds. This metric characterizes the end-to-end lag between a committed change and that change applied at the destination. + essential: true + - name: changefeed.emitted_bytes + exported_name: changefeed_emitted_bytes + description: Bytes emitted by all feeds + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric provides a useful context when assessing the state of changefeeds. This metric characterizes the throughput bytes being streamed from the CockroachDB cluster. + essential: true + - name: changefeed.emitted_messages + exported_name: changefeed_emitted_messages + description: Messages emitted by all feeds + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric provides a useful context when assessing the state of changefeeds. This metric characterizes the rate of changes being streamed from the CockroachDB cluster. + essential: true + - name: changefeed.error_retries + exported_name: changefeed_error_retries + description: Total retryable errors encountered by all changefeeds + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric tracks transient changefeed errors. Alert on "too many" errors, such as 50 retries in 15 minutes. For example, during a rolling upgrade this counter will increase because the changefeed jobs will restart following node restarts. There is an exponential backoff, up to 10 minutes. But if there is no rolling upgrade in process or other cluster maintenance, and the error rate is high, investigate the changefeed job. + essential: true + - name: changefeed.failures + exported_name: changefeed_failures + description: Total number of changefeed jobs which have failed + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric tracks the permanent changefeed job failures that the jobs system will not try to restart. Any increase in this counter should be investigated. An alert on this metric is recommended. + essential: true + - name: changefeed.running + exported_name: changefeed_running + description: Number of currently running changefeeds, including sinkless + y_axis_label: Changefeeds + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: This metric tracks the total number of all running changefeeds. + essential: true + - name: jobs.changefeed.currently_paused + exported_name: jobs_changefeed_currently_paused + labeled_name: 'jobs{name: changefeed, status: currently_paused}' + description: Number of changefeed jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: Monitor and alert on this metric to safeguard against an inadvertent operational error of leaving a changefeed job in a paused state for an extended period of time. Changefeed jobs should not be paused for a long time because the protected timestamp prevents garbage collection. + essential: true + - name: jobs.changefeed.protected_age_sec + exported_name: jobs_changefeed_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: changefeed}' + description: The age of the oldest PTS record protected by changefeed jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + how_to_use: Changefeeds use protected timestamps to protect the data from being garbage collected. Ensure the protected timestamp age does not significantly exceed the GC TTL zone configuration. Alert on this metric if the protected timestamp age is greater than 3 times the GC TTL. + essential: true + - name: DISTRIBUTED + metrics: + - name: distsender.errors.notleaseholder + exported_name: distsender_errors_notleaseholder + description: Number of NotLeaseHolderErrors encountered from replica-addressed RPCs + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: Errors of this type are normal during elastic cluster topology changes when leaseholders are actively rebalancing. They are automatically retried. However they may create occasional response time spikes. In that case, this metric may provide the explanation of the cause. + essential: true + - name: distsender.rpc.sent.nextreplicaerror + exported_name: distsender_rpc_sent_nextreplicaerror + description: Number of replica-addressed RPCs sent due to per-replica errors + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: RPC errors do not necessarily indicate a problem. This metric tracks remote procedure calls that return a status value other than "success". A non-success status of an RPC should not be misconstrued as a network transport issue. It is database code logic executed on another cluster node. The non-success status is a result of an orderly execution of an RPC that reports a specific logical condition. + essential: true + - name: NETWORKING + metrics: + - name: clock-offset.meannanos + exported_name: clock_offset_meannanos + description: Mean clock offset with other nodes + y_axis_label: Clock Offset + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + how_to_use: This metric gives the node's clock skew. In a well-configured environment, the actual clock skew would be in the sub-millisecond range. A skew exceeding 5 ms is likely due to a NTP service mis-configuration. Reducing the actual clock skew reduces the probability of uncertainty related conflicts and corresponding retires which has a positive impact on workload performance. Conversely, a larger actual clock skew increases the probability of retries due to uncertainty conflicts, with potentially measurable adverse effects on workload performance. + essential: true + - name: rpc.connection.avg_round_trip_latency + exported_name: rpc_connection_avg_round_trip_latency + description: | + Sum of exponentially weighted moving average of round-trip latencies, as measured through a gRPC RPC. + + Dividing this Gauge by rpc.connection.healthy gives an approximation of average + latency, but the top-level round-trip-latency histogram is more useful. Instead, + users should consult the label families of this metric if they are available + (which requires prometheus and the cluster setting 'server.child_metrics.enabled'); + these provide per-peer moving averages. + + This metric does not track failed connection. A failed connection's contribution + is reset to zero. + y_axis_label: Latency + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + how_to_use: This metric is helpful in understanding general network issues outside of CockroachDB that could be impacting the user’s workload. + essential: true + - name: rpc.connection.failures + exported_name: rpc_connection_failures + description: | + Counter of failed connections. + + This includes both the event in which a healthy connection terminates as well as + unsuccessful reconnection attempts. + + Connections that are terminated as part of local node shutdown are excluded. + Decommissioned peers are excluded. + y_axis_label: Connections + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: See Description. + essential: true + - name: rpc.connection.healthy + exported_name: rpc_connection_healthy + description: Gauge of current connections in a healthy state (i.e. bidirectionally connected and heartbeating) + y_axis_label: Connections + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: See Description. + essential: true + - name: rpc.connection.healthy_nanos + exported_name: rpc_connection_healthy_nanos + description: |- + Gauge of nanoseconds of healthy connection time + + On the prometheus endpoint scraped with the cluster setting 'server.child_metrics.enabled' set, + the constituent parts of this metric are available on a per-peer basis and one can read off + for how long a given peer has been connected + y_axis_label: Nanoseconds + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + how_to_use: This can be useful for monitoring the stability and health of connections within your CockroachDB cluster. + essential: true + - name: rpc.connection.heartbeats + exported_name: rpc_connection_heartbeats + description: Counter of successful heartbeats. + y_axis_label: Heartbeats + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: See Description. + essential: true + - name: rpc.connection.unhealthy + exported_name: rpc_connection_unhealthy + description: Gauge of current connections in an unhealthy state (not bidirectionally connected or heartbeating) + y_axis_label: Connections + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: If the value of this metric is greater than 0, this could indicate a network partition. + essential: true + - name: rpc.connection.unhealthy_nanos + exported_name: rpc_connection_unhealthy_nanos + description: |- + Gauge of nanoseconds of unhealthy connection time. + + On the prometheus endpoint scraped with the cluster setting 'server.child_metrics.enabled' set, + the constituent parts of this metric are available on a per-peer basis and one can read off + for how long a given peer has been unreachable + y_axis_label: Nanoseconds + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + how_to_use: If this duration is greater than 0, this could indicate how long a network partition has been occurring. + essential: true + - name: SQL + metrics: + - name: jobs.auto_create_stats.currently_paused + exported_name: jobs_auto_create_stats_currently_paused + labeled_name: 'jobs{name: auto_create_stats, status: currently_paused}' + description: Number of auto_create_stats jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: This metric is a high-level indicator that automatically generated statistics jobs are paused which can lead to the query optimizer running with stale statistics. Stale statistics can cause suboptimal query plans to be selected leading to poor query performance. + essential: true + - name: jobs.auto_create_stats.currently_running + exported_name: jobs_auto_create_stats_currently_running + labeled_name: 'jobs{type: auto_create_stats, status: currently_running}' + description: Number of auto_create_stats jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: This metric tracks the number of active automatically generated statistics jobs that could also be consuming resources. Ensure that foreground SQL traffic is not impacted by correlating this metric with SQL latency and query volume metrics. + essential: true + - name: jobs.auto_create_stats.resume_failed + exported_name: jobs_auto_create_stats_resume_failed + labeled_name: 'jobs.resume{name: auto_create_stats, status: failed}' + description: Number of auto_create_stats jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric is a high-level indicator that automatically generated table statistics is failing. Failed statistic creation can lead to the query optimizer running with stale statistics. Stale statistics can cause suboptimal query plans to be selected leading to poor query performance. + essential: true + - name: jobs.backup.currently_paused + exported_name: jobs_backup_currently_paused + labeled_name: 'jobs{name: backup, status: currently_paused}' + description: Number of backup jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: Monitor and alert on this metric to safeguard against an inadvertent operational error of leaving a backup job in a paused state for an extended period of time. In functional areas, a paused job can hold resources or have concurrency impact or some other negative consequence. Paused backup may break the recovery point objective (RPO). + essential: true + - name: jobs.backup.currently_running + exported_name: jobs_backup_currently_running + labeled_name: 'jobs{type: backup, status: currently_running}' + description: Number of backup jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: See Description. + essential: true + - name: jobs.create_stats.currently_running + exported_name: jobs_create_stats_currently_running + labeled_name: 'jobs{type: create_stats, status: currently_running}' + description: Number of create_stats jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: This metric tracks the number of active create statistics jobs that may be consuming resources. Ensure that foreground SQL traffic is not impacted by correlating this metric with SQL latency and query volume metrics. + essential: true + - name: schedules.BACKUP.failed + exported_name: schedules_BACKUP_failed + labeled_name: 'schedules{name: BACKUP, status: failed}' + description: Number of BACKUP jobs failed + y_axis_label: Jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: Monitor this metric and investigate backup job failures. + essential: true + - name: schedules.BACKUP.last-completed-time + exported_name: schedules_BACKUP_last_completed_time + description: The unix timestamp of the most recently completed backup by a schedule specified as maintaining this metric + y_axis_label: Jobs + type: GAUGE + unit: TIMESTAMP_SEC + aggregation: AVG + derivative: NONE + how_to_use: "Monitor this metric to ensure that backups are\n\t\t\t\t\t\tmeeting the recovery point objective (RPO). Each node\n\t\t\t\t\t\texports the time that it last completed a backup on behalf\n\t\t\t\t\t\tof the schedule. If a node is restarted, it will report 0\n\t\t\t\t\t\tuntil it completes a backup. If all nodes are restarted,\n\t\t\t\t\t\tmax() is 0 until a node completes a backup.\n\n\t\t\t\t\t\tTo make use of this metric, first, from each node, take the maximum\n\t\t\t\t\t\tover a rolling window equal to or greater than the backup frequency,\n\t\t\t\t\t\tand then take the maximum of those values across nodes. For example\n\t\t\t\t\t\twith a backup frequency of 60 minutes, monitor time() -\n\t\t\t\t\t\tmax_across_nodes(max_over_time(schedules_BACKUP_last_completed_time,\n\t\t\t\t\t\t60min))." + essential: true + - name: sql.conn.failures + exported_name: sql_conn_failures + description: Number of SQL connection failures + y_axis_label: Connections + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric is incremented whenever a connection attempt fails for any reason, including timeouts. + essential: true + - name: sql.conn.latency + exported_name: sql_conn_latency + description: Latency to establish and authenticate a SQL connection + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + how_to_use: These metrics characterize the database connection latency which can affect the application performance, for example, by having slow startup times. Connection failures are not recorded in these metrics. + essential: true + - name: sql.conns + exported_name: sql_conns + description: Number of open SQL connections + y_axis_label: Connections + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: This metric shows the number of connections as well as the distribution, or balancing, of connections across cluster nodes. An imbalance can lead to nodes becoming overloaded. Review Connection Pooling. + essential: true + - name: sql.ddl.count + exported_name: sql_ddl_count + description: Number of SQL DDL statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This high-level metric reflects workload volume. Monitor this metric to identify abnormal application behavior or patterns over time. If abnormal patterns emerge, apply the metric's time range to the SQL Activity pages to investigate interesting outliers or patterns. For example, on the Transactions page and the Statements page, sort on the Execution Count column. To find problematic sessions, on the Sessions page, sort on the Transaction Count column. Find the sessions with high transaction counts and trace back to a user or application. + essential: true + - name: sql.ddl.count.internal + exported_name: sql_ddl_count_internal + description: Number of SQL DDL statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.delete.count + exported_name: sql_delete_count + labeled_name: 'sql.count{query_type: delete}' + description: Number of SQL DELETE statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This high-level metric reflects workload volume. Monitor this metric to identify abnormal application behavior or patterns over time. If abnormal patterns emerge, apply the metric's time range to the SQL Activity pages to investigate interesting outliers or patterns. For example, on the Transactions page and the Statements page, sort on the Execution Count column. To find problematic sessions, on the Sessions page, sort on the Transaction Count column. Find the sessions with high transaction counts and trace back to a user or application. + essential: true + - name: sql.delete.count.internal + exported_name: sql_delete_count_internal + labeled_name: 'sql.count{query_type: delete, query_internal: true}' + description: Number of SQL DELETE statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.distsql.contended_queries.count + exported_name: sql_distsql_contended_queries_count + description: Number of SQL queries that experienced contention + y_axis_label: Queries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric is incremented whenever there is a non-trivial amount of contention experienced by a statement whether read-write or write-write conflicts. Monitor this metric to correlate possible workload performance issues to contention conflicts. + essential: true + - name: sql.failure.count + exported_name: sql_failure_count + description: Number of statements resulting in a planning or runtime error + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric is a high-level indicator of workload and application degradation with query failures. Use the Insights page to find failed executions with their error code to troubleshoot or use application-level logs, if instrumented, to determine the cause of error. + essential: true + - name: sql.failure.count.internal + exported_name: sql_failure_count_internal + description: Number of statements resulting in a planning or runtime error (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.full.scan.count + exported_name: sql_full_scan_count + description: Number of full table or index scans + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric is a high-level indicator of potentially suboptimal query plans in the workload that may require index tuning and maintenance. To identify the statements with a full table scan, use SHOW FULL TABLE SCAN or the SQL Activity Statements page with the corresponding metric time frame. The Statements page also includes explain plans and index recommendations. Not all full scans are necessarily bad especially over smaller tables. + essential: true + - name: sql.full.scan.count.internal + exported_name: sql_full_scan_count_internal + description: Number of full table or index scans (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.insert.count + exported_name: sql_insert_count + labeled_name: 'sql.count{query_type: insert}' + description: Number of SQL INSERT statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This high-level metric reflects workload volume. Monitor this metric to identify abnormal application behavior or patterns over time. If abnormal patterns emerge, apply the metric's time range to the SQL Activity pages to investigate interesting outliers or patterns. For example, on the Transactions page and the Statements page, sort on the Execution Count column. To find problematic sessions, on the Sessions page, sort on the Transaction Count column. Find the sessions with high transaction counts and trace back to a user or application. + essential: true + - name: sql.insert.count.internal + exported_name: sql_insert_count_internal + labeled_name: 'sql.count{query_type: insert, query_internal: true}' + description: Number of SQL INSERT statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.mem.root.current + exported_name: sql_mem_root_current + description: Current sql statement memory usage for root + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + how_to_use: This metric shows how memory set aside for temporary materializations, such as hash tables and intermediary result sets, is utilized. Use this metric to optimize memory allocations based on long term observations. The maximum amount is set with --max_sql_memory. If the utilization of sql memory is persistently low, perhaps some portion of this memory allocation can be shifted to --cache. + essential: true + - name: sql.new_conns + exported_name: sql_new_conns + description: Number of SQL connections created + y_axis_label: Connections + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: The rate of this metric shows how frequently new connections are being established. This can be useful in determining if a high rate of incoming new connections is causing additional load on the server due to a misconfigured application. + essential: true + - name: sql.select.count + exported_name: sql_select_count + labeled_name: 'sql.count{query_type: select}' + description: Number of SQL SELECT statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This high-level metric reflects workload volume. Monitor this metric to identify abnormal application behavior or patterns over time. If abnormal patterns emerge, apply the metric's time range to the SQL Activity pages to investigate interesting outliers or patterns. For example, on the Transactions page and the Statements page, sort on the Execution Count column. To find problematic sessions, on the Sessions page, sort on the Transaction Count column. Find the sessions with high transaction counts and trace back to a user or application. + essential: true + - name: sql.select.count.internal + exported_name: sql_select_count_internal + labeled_name: 'sql.count{query_type: select, query_internal: true}' + description: Number of SQL SELECT statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.service.latency + exported_name: sql_service_latency + description: Latency of SQL request execution + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + how_to_use: These high-level metrics reflect workload performance. Monitor these metrics to understand latency over time. If abnormal patterns emerge, apply the metric's time range to the SQL Activity pages to investigate interesting outliers or patterns. The Statements page has P90 Latency and P99 latency columns to enable correlation with this metric. + essential: true + - name: sql.service.latency.internal + exported_name: sql_service_latency_internal + description: Latency of SQL request execution (internal queries) + y_axis_label: SQL Internal Statements + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.statements.active + exported_name: sql_statements_active + description: Number of currently active user SQL statements + y_axis_label: Active Statements + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: This high-level metric reflects workload volume. + essential: true + - name: sql.statements.active.internal + exported_name: sql_statements_active_internal + description: Number of currently active user SQL statements (internal queries) + y_axis_label: SQL Internal Statements + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.txn.abort.count + exported_name: sql_txn_abort_count + description: Number of SQL transaction abort errors + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This high-level metric reflects workload performance. A persistently high number of SQL transaction abort errors may negatively impact the workload performance and needs to be investigated. + essential: true + - name: sql.txn.abort.count.internal + exported_name: sql_txn_abort_count_internal + description: Number of SQL transaction abort errors (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.begin.count + exported_name: sql_txn_begin_count + description: Number of SQL transaction BEGIN statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric reflects workload volume by counting explicit transactions. Use this metric to determine whether explicit transactions can be refactored as implicit transactions (individual statements). + essential: true + - name: sql.txn.begin.count.internal + exported_name: sql_txn_begin_count_internal + description: Number of SQL transaction BEGIN statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.commit.count + exported_name: sql_txn_commit_count + description: Number of SQL transaction COMMIT statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric shows the number of transactions that completed successfully. This metric can be used as a proxy to measure the number of successful explicit transactions. + essential: true + - name: sql.txn.commit.count.internal + exported_name: sql_txn_commit_count_internal + description: Number of SQL transaction COMMIT statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.latency + exported_name: sql_txn_latency + description: Latency of SQL transactions + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + how_to_use: These high-level metrics provide a latency histogram of all executed SQL transactions. These metrics provide an overview of the current SQL workload. + essential: true + - name: sql.txn.latency.internal + exported_name: sql_txn_latency_internal + description: Latency of SQL transactions (internal queries) + y_axis_label: SQL Internal Statements + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.txn.rollback.count + exported_name: sql_txn_rollback_count + description: Number of SQL transaction ROLLBACK statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric shows the number of orderly transaction rollbacks. A persistently high number of rollbacks may negatively impact the workload performance and needs to be investigated. + essential: true + - name: sql.txn.rollback.count.internal + exported_name: sql_txn_rollback_count_internal + description: Number of SQL transaction ROLLBACK statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txns.open + exported_name: sql_txns_open + description: Number of currently open user SQL transactions + y_axis_label: Open SQL Transactions + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: This metric should roughly correspond to the number of cores * 4. If this metric is consistently larger, scale out the cluster. + essential: true + - name: sql.txns.open.internal + exported_name: sql_txns_open_internal + description: Number of currently open user SQL transactions (internal queries) + y_axis_label: SQL Internal Statements + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.update.count + exported_name: sql_update_count + labeled_name: 'sql.count{query_type: update}' + description: Number of SQL UPDATE statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This high-level metric reflects workload volume. Monitor this metric to identify abnormal application behavior or patterns over time. If abnormal patterns emerge, apply the metric's time range to the SQL Activity pages to investigate interesting outliers or patterns. For example, on the Transactions page and the Statements page, sort on the Execution Count column. To find problematic sessions, on the Sessions page, sort on the Transaction Count column. Find the sessions with high transaction counts and trace back to a user or application. + essential: true + - name: sql.update.count.internal + exported_name: sql_update_count_internal + labeled_name: 'sql.count{query_type: update, query_internal: true}' + description: Number of SQL UPDATE statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.restarts.serializable + exported_name: txn_restarts_serializable + description: Number of restarts due to a forwarded commit timestamp and isolation=SERIALIZABLE + y_axis_label: Restarted Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric is one measure of the impact of contention conflicts on workload performance. For guidance on contention conflicts, review transaction contention best practices and performance tuning recipes. Tens of restarts per minute may be a high value, a signal of an elevated degree of contention in the workload, which should be investigated. + essential: true + - name: txn.restarts.txnaborted + exported_name: txn_restarts_txnaborted + description: Number of restarts due to an abort by a concurrent transaction (usually due to deadlock) + y_axis_label: Restarted Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: The errors tracked by this metric are generally due to deadlocks. Deadlocks can often be prevented with a considered transaction design. Identify the conflicting transactions involved in the deadlocks, then, if possible, redesign the business logic implementation prone to deadlocks. + essential: true + - name: txn.restarts.txnpush + exported_name: txn_restarts_txnpush + description: Number of restarts due to a transaction push failure + y_axis_label: Restarted Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric is one measure of the impact of contention conflicts on workload performance. For guidance on contention conflicts, review transaction contention best practices and performance tuning recipes. Tens of restarts per minute may be a high value, a signal of an elevated degree of contention in the workload, which should be investigated. + essential: true + - name: txn.restarts.unknown + exported_name: txn_restarts_unknown + description: Number of restarts due to a unknown reasons + y_axis_label: Restarted Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric is one measure of the impact of contention conflicts on workload performance. For guidance on contention conflicts, review transaction contention best practices and performance tuning recipes. Tens of restarts per minute may be a high value, a signal of an elevated degree of contention in the workload, which should be investigated. + essential: true + - name: txn.restarts.writetooold + exported_name: txn_restarts_writetooold + description: Number of restarts due to a concurrent writer committing first + y_axis_label: Restarted Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric is one measure of the impact of contention conflicts on workload performance. For guidance on contention conflicts, review transaction contention best practices and performance tuning recipes. Tens of restarts per minute may be a high value, a signal of an elevated degree of contention in the workload, which should be investigated. + essential: true + - name: TTL + metrics: + - name: jobs.row_level_ttl.currently_paused + exported_name: jobs_row_level_ttl_currently_paused + labeled_name: 'jobs{name: row_level_ttl, status: currently_paused}' + description: Number of row_level_ttl jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: Monitor this metric to ensure the Row Level TTL job does not remain paused inadvertently for an extended period. + essential: true + - name: jobs.row_level_ttl.currently_running + exported_name: jobs_row_level_ttl_currently_running + labeled_name: 'jobs{type: row_level_ttl, status: currently_running}' + description: Number of row_level_ttl jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: Monitor this metric to ensure there are not too many Row Level TTL jobs running at the same time. Generally, this metric should be in the low single digits. + essential: true + - name: jobs.row_level_ttl.delete_duration + exported_name: jobs_row_level_ttl_delete_duration + description: Duration for delete requests during row level TTL. + y_axis_label: nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + how_to_use: See Description. + essential: true + - name: jobs.row_level_ttl.num_active_spans + exported_name: jobs_row_level_ttl_num_active_spans + description: Number of active spans the TTL job is deleting from. + y_axis_label: num_active_spans + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: See Description. + essential: true + - name: jobs.row_level_ttl.resume_completed + exported_name: jobs_row_level_ttl_resume_completed + labeled_name: 'jobs.resume{name: row_level_ttl, status: completed}' + description: Number of row_level_ttl jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: If Row Level TTL is enabled, this metric should be nonzero and correspond to the ttl_cron setting that was chosen. If this metric is zero, it means the job is not running + essential: true + - name: jobs.row_level_ttl.resume_failed + exported_name: jobs_row_level_ttl_resume_failed + labeled_name: 'jobs.resume{name: row_level_ttl, status: failed}' + description: Number of row_level_ttl jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric should remain at zero. Repeated errors means the Row Level TTL job is not deleting data. + essential: true + - name: jobs.row_level_ttl.rows_deleted + exported_name: jobs_row_level_ttl_rows_deleted + description: Number of rows deleted by the row level TTL job. + y_axis_label: num_rows + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: Correlate this metric with the metric jobs.row_level_ttl.rows_selected to ensure all the rows that should be deleted are actually getting deleted. + essential: true + - name: jobs.row_level_ttl.rows_selected + exported_name: jobs_row_level_ttl_rows_selected + description: Number of rows selected for deletion by the row level TTL job. + y_axis_label: num_rows + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: Correlate this metric with the metric jobs.row_level_ttl.rows_deleted to ensure all the rows that should be deleted are actually getting deleted. + essential: true + - name: jobs.row_level_ttl.select_duration + exported_name: jobs_row_level_ttl_select_duration + description: Duration for select requests during row level TTL. + y_axis_label: nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + how_to_use: See Description. + essential: true + - name: jobs.row_level_ttl.span_total_duration + exported_name: jobs_row_level_ttl_span_total_duration + description: Duration for processing a span during row level TTL. + y_axis_label: nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + how_to_use: See Description. + essential: true + - name: jobs.row_level_ttl.total_expired_rows + exported_name: jobs_row_level_ttl_total_expired_rows + description: Approximate number of rows that have expired the TTL on the TTL table. + y_axis_label: total_expired_rows + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: See Description. + essential: true + - name: jobs.row_level_ttl.total_rows + exported_name: jobs_row_level_ttl_total_rows + description: Approximate number of rows on the TTL table. + y_axis_label: total_rows + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: See Description. + essential: true + - name: schedules.scheduled-row-level-ttl-executor.failed + exported_name: schedules_scheduled_row_level_ttl_executor_failed + labeled_name: 'schedules{name: scheduled-row-level-ttl-executor, status: failed}' + description: Number of scheduled-row-level-ttl-executor jobs failed + y_axis_label: Jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: Monitor this metric to ensure the Row Level TTL job is running. If it is non-zero, it means the job could not be created. + essential: true + - name: UNSET + metrics: + - name: auth.cert.conn.latency + exported_name: auth_cert_conn_latency + description: Latency to establish and authenticate a SQL connection using certificate + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: auth.gss.conn.latency + exported_name: auth_gss_conn_latency + description: Latency to establish and authenticate a SQL connection using GSS + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: auth.jwt.conn.latency + exported_name: auth_jwt_conn_latency + description: Latency to establish and authenticate a SQL connection using JWT Token + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: auth.ldap.conn.latency + exported_name: auth_ldap_conn_latency + description: Latency to establish and authenticate a SQL connection using LDAP + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: auth.password.conn.latency + exported_name: auth_password_conn_latency + description: Latency to establish and authenticate a SQL connection using password + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: auth.scram.conn.latency + exported_name: auth_scram_conn_latency + description: Latency to establish and authenticate a SQL connection using SCRAM + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: backup.last-failed-time.kms-inaccessible + exported_name: backup_last_failed_time_kms_inaccessible + description: The unix timestamp of the most recent failure of backup due to errKMSInaccessible by a backup specified as maintaining this metric + y_axis_label: Jobs + type: GAUGE + unit: TIMESTAMP_SEC + aggregation: AVG + derivative: NONE + - name: changefeed.admit_latency + exported_name: changefeed_admit_latency + description: 'Event admission latency: a difference between event MVCC timestamp and the time it was admitted into changefeed pipeline; Note: this metric includes the time spent waiting until event can be processed due to backpressure or time spent resolving schema descriptors. Also note, this metric excludes latency during backfill' + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.aggregator_progress + exported_name: changefeed_aggregator_progress + description: The earliest timestamp up to which any aggregator is guaranteed to have emitted all values for + y_axis_label: Unix Timestamp Nanoseconds + type: GAUGE + unit: TIMESTAMP_NS + aggregation: AVG + derivative: NONE + - name: changefeed.backfill_count + exported_name: changefeed_backfill_count + description: Number of changefeeds currently executing backfill + y_axis_label: Count + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: changefeed.backfill_pending_ranges + exported_name: changefeed_backfill_pending_ranges + description: Number of ranges in an ongoing backfill that are yet to be fully emitted + y_axis_label: Count + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: changefeed.batch_reduction_count + exported_name: changefeed_batch_reduction_count + description: Number of times a changefeed aggregator node attempted to reduce the size of message batches it emitted to the sink + y_axis_label: Batch Size Reductions + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: changefeed.buffer_entries.allocated_mem + exported_name: changefeed_buffer_entries_allocated_mem + description: Current quota pool memory allocation + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: changefeed.buffer_entries.allocated_mem.aggregator + exported_name: changefeed_buffer_entries_allocated_mem_aggregator + description: Current quota pool memory allocation - between the kvfeed and the sink + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: changefeed.buffer_entries.allocated_mem.rangefeed + exported_name: changefeed_buffer_entries_allocated_mem_rangefeed + description: Current quota pool memory allocation - between the rangefeed and the kvfeed + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: changefeed.buffer_entries.flush + exported_name: changefeed_buffer_entries_flush + description: Number of flush elements added to the buffer + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries.flush.aggregator + exported_name: changefeed_buffer_entries_flush_aggregator + description: Number of flush elements added to the buffer - between the kvfeed and the sink + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries.flush.rangefeed + exported_name: changefeed_buffer_entries_flush_rangefeed + description: Number of flush elements added to the buffer - between the rangefeed and the kvfeed + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries.in + exported_name: changefeed_buffer_entries_in + description: Total entries entering the buffer between raft and changefeed sinks + y_axis_label: Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries.in.aggregator + exported_name: changefeed_buffer_entries_in_aggregator + description: Total entries entering the buffer between raft and changefeed sinks - between the kvfeed and the sink + y_axis_label: Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries.in.rangefeed + exported_name: changefeed_buffer_entries_in_rangefeed + description: Total entries entering the buffer between raft and changefeed sinks - between the rangefeed and the kvfeed + y_axis_label: Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries.kv + exported_name: changefeed_buffer_entries_kv + description: Number of kv elements added to the buffer + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries.kv.aggregator + exported_name: changefeed_buffer_entries_kv_aggregator + description: Number of kv elements added to the buffer - between the kvfeed and the sink + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries.kv.rangefeed + exported_name: changefeed_buffer_entries_kv_rangefeed + description: Number of kv elements added to the buffer - between the rangefeed and the kvfeed + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries.out + exported_name: changefeed_buffer_entries_out + description: Total entries leaving the buffer between raft and changefeed sinks + y_axis_label: Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries.out.aggregator + exported_name: changefeed_buffer_entries_out_aggregator + description: Total entries leaving the buffer between raft and changefeed sinks - between the kvfeed and the sink + y_axis_label: Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries.out.rangefeed + exported_name: changefeed_buffer_entries_out_rangefeed + description: Total entries leaving the buffer between raft and changefeed sinks - between the rangefeed and the kvfeed + y_axis_label: Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries.released + exported_name: changefeed_buffer_entries_released + description: Total entries processed, emitted and acknowledged by the sinks + y_axis_label: Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries.released.aggregator + exported_name: changefeed_buffer_entries_released_aggregator + description: Total entries processed, emitted and acknowledged by the sinks - between the kvfeed and the sink + y_axis_label: Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries.released.rangefeed + exported_name: changefeed_buffer_entries_released_rangefeed + description: Total entries processed, emitted and acknowledged by the sinks - between the rangefeed and the kvfeed + y_axis_label: Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries.resolved + exported_name: changefeed_buffer_entries_resolved + description: Number of resolved elements added to the buffer + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries.resolved.aggregator + exported_name: changefeed_buffer_entries_resolved_aggregator + description: Number of resolved elements added to the buffer - between the kvfeed and the sink + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries.resolved.rangefeed + exported_name: changefeed_buffer_entries_resolved_rangefeed + description: Number of resolved elements added to the buffer - between the rangefeed and the kvfeed + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries_mem.acquired + exported_name: changefeed_buffer_entries_mem_acquired + description: Total amount of memory acquired for entries as they enter the system + y_axis_label: Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries_mem.acquired.aggregator + exported_name: changefeed_buffer_entries_mem_acquired_aggregator + description: Total amount of memory acquired for entries as they enter the system - between the kvfeed and the sink + y_axis_label: Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries_mem.acquired.rangefeed + exported_name: changefeed_buffer_entries_mem_acquired_rangefeed + description: Total amount of memory acquired for entries as they enter the system - between the rangefeed and the kvfeed + y_axis_label: Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries_mem.released + exported_name: changefeed_buffer_entries_mem_released + description: Total amount of memory released by the entries after they have been emitted + y_axis_label: Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries_mem.released.aggregator + exported_name: changefeed_buffer_entries_mem_released_aggregator + description: Total amount of memory released by the entries after they have been emitted - between the kvfeed and the sink + y_axis_label: Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries_mem.released.rangefeed + exported_name: changefeed_buffer_entries_mem_released_rangefeed + description: Total amount of memory released by the entries after they have been emitted - between the rangefeed and the kvfeed + y_axis_label: Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_pushback_nanos + exported_name: changefeed_buffer_pushback_nanos + description: Total time spent waiting while the buffer was full + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_pushback_nanos.aggregator + exported_name: changefeed_buffer_pushback_nanos_aggregator + description: Total time spent waiting while the buffer was full - between the kvfeed and the sink + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_pushback_nanos.rangefeed + exported_name: changefeed_buffer_pushback_nanos_rangefeed + description: Total time spent waiting while the buffer was full - between the rangefeed and the kvfeed + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.bytes.messages_pushback_nanos + exported_name: changefeed_bytes_messages_pushback_nanos + description: Total time spent throttled for bytes quota + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.checkpoint.create_nanos + exported_name: changefeed_checkpoint_create_nanos + description: Time it takes to create a changefeed checkpoint + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.checkpoint.span_count + exported_name: changefeed_checkpoint_span_count + description: Number of spans in a changefeed checkpoint + y_axis_label: Spans + type: HISTOGRAM + unit: COUNT + aggregation: AVG + derivative: NONE + - name: changefeed.checkpoint.timestamp_count + exported_name: changefeed_checkpoint_timestamp_count + description: Number of unique timestamps in a changefeed checkpoint + y_axis_label: Timestamps + type: HISTOGRAM + unit: COUNT + aggregation: AVG + derivative: NONE + - name: changefeed.checkpoint.total_bytes + exported_name: changefeed_checkpoint_total_bytes + description: Total size of a changefeed checkpoint + y_axis_label: Bytes + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: changefeed.checkpoint_hist_nanos + exported_name: changefeed_checkpoint_hist_nanos + description: Time spent checkpointing changefeed progress + y_axis_label: Changefeeds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.checkpoint_progress + exported_name: changefeed_checkpoint_progress + description: The earliest timestamp of any changefeed's persisted checkpoint (values prior to this timestamp will never need to be re-emitted) + y_axis_label: Unix Timestamp Nanoseconds + type: GAUGE + unit: TIMESTAMP_NS + aggregation: AVG + derivative: NONE + - name: changefeed.cloudstorage_buffered_bytes + exported_name: changefeed_cloudstorage_buffered_bytes + description: The number of bytes buffered in cloudstorage sink files which have not been emitted yet + y_axis_label: Bytes + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: changefeed.emitted_batch_sizes + exported_name: changefeed_emitted_batch_sizes + description: Size of batches emitted emitted by all feeds + y_axis_label: Number of Messages in Batch + type: HISTOGRAM + unit: COUNT + aggregation: AVG + derivative: NONE + - name: changefeed.filtered_messages + exported_name: changefeed_filtered_messages + description: Messages filtered out by all feeds. This count does not include the number of messages that may be filtered due to the range constraints. + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.flush.messages_pushback_nanos + exported_name: changefeed_flush_messages_pushback_nanos + description: Total time spent throttled for flush quota + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.flush_hist_nanos + exported_name: changefeed_flush_hist_nanos + description: Time spent flushing messages across all changefeeds + y_axis_label: Changefeeds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.flushed_bytes + exported_name: changefeed_flushed_bytes + description: Bytes emitted by all feeds; maybe different from changefeed.emitted_bytes when compression is enabled + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.flushes + exported_name: changefeed_flushes + description: Total flushes across all feeds + y_axis_label: Flushes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.forwarded_resolved_messages + exported_name: changefeed_forwarded_resolved_messages + description: Resolved timestamps forwarded from the change aggregator to the change frontier + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.frontier_updates + exported_name: changefeed_frontier_updates + description: Number of change frontier updates across all feeds + y_axis_label: Updates + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.internal_retry_message_count + exported_name: changefeed_internal_retry_message_count + description: Number of messages for which an attempt to retry them within an aggregator node was made + y_axis_label: Messages + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: changefeed.kafka_throttling_hist_nanos + exported_name: changefeed_kafka_throttling_hist_nanos + description: Time spent in throttling due to exceeding kafka quota + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.lagging_ranges + exported_name: changefeed_lagging_ranges + description: The number of ranges considered to be lagging behind + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: changefeed.max_behind_nanos + exported_name: changefeed_max_behind_nanos + description: The most any changefeed's persisted checkpoint is behind the present + y_axis_label: Nanoseconds + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.message_size_hist + exported_name: changefeed_message_size_hist + description: Message size histogram + y_axis_label: Bytes + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: changefeed.messages.messages_pushback_nanos + exported_name: changefeed_messages_messages_pushback_nanos + description: Total time spent throttled for messages quota + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.network.bytes_in + exported_name: changefeed_network_bytes_in + description: The number of bytes received from the network by changefeeds + y_axis_label: Bytes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.network.bytes_out + exported_name: changefeed_network_bytes_out + description: The number of bytes sent over the network by changefeeds + y_axis_label: Bytes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.nprocs_consume_event_nanos + exported_name: changefeed_nprocs_consume_event_nanos + description: Total time spent waiting to add an event to the parallel consumer + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.nprocs_flush_nanos + exported_name: changefeed_nprocs_flush_nanos + description: Total time spent idle waiting for the parallel consumer to flush + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.nprocs_in_flight_count + exported_name: changefeed_nprocs_in_flight_count + description: Number of buffered events in the parallel consumer + y_axis_label: Count of Events + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: changefeed.parallel_io_in_flight_keys + exported_name: changefeed_parallel_io_in_flight_keys + description: The number of keys currently in-flight which may contend with batches pending to be emitted + y_axis_label: Keys + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: changefeed.parallel_io_pending_rows + exported_name: changefeed_parallel_io_pending_rows + description: Number of rows which are blocked from being sent due to conflicting in-flight keys + y_axis_label: Keys + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: changefeed.parallel_io_queue_nanos + exported_name: changefeed_parallel_io_queue_nanos + description: Time that outgoing requests to the sink spend waiting in a queue due to in-flight requests with conflicting keys + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.parallel_io_result_queue_nanos + exported_name: changefeed_parallel_io_result_queue_nanos + description: Time that incoming results from the sink spend waiting in parallel io emitter before they are acknowledged by the changefeed + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.queue_time_nanos + exported_name: changefeed_queue_time_nanos + description: Time KV event spent waiting to be processed + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.schema_registry.registrations + exported_name: changefeed_schema_registry_registrations + description: Number of registration attempts with the schema registry + y_axis_label: Registrations + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.schema_registry.retry_count + exported_name: changefeed_schema_registry_retry_count + description: Number of retries encountered when sending requests to the schema registry + y_axis_label: Retries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.schemafeed.table_history_scans + exported_name: changefeed_schemafeed_table_history_scans + description: The number of table history scans during polling + y_axis_label: Counts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.schemafeed.table_metadata_nanos + exported_name: changefeed_schemafeed_table_metadata_nanos + description: Time blocked while verifying table metadata histories + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.sink_batch_hist_nanos + exported_name: changefeed_sink_batch_hist_nanos + description: Time spent batched in the sink buffer before being flushed and acknowledged + y_axis_label: Changefeeds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.sink_errors + exported_name: changefeed_sink_errors + description: Number of changefeed errors caused by the sink + y_axis_label: Count + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.sink_io_inflight + exported_name: changefeed_sink_io_inflight + description: The number of keys currently inflight as IO requests being sent to the sink + y_axis_label: Messages + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: changefeed.size_based_flushes + exported_name: changefeed_size_based_flushes + description: Total size based flushes across all feeds + y_axis_label: Flushes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.stage.checkpoint_job_progress.latency + exported_name: changefeed_stage_checkpoint_job_progress_latency + description: 'Latency of the changefeed stage: checkpointing job progress' + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.stage.downstream_client_send.latency + exported_name: changefeed_stage_downstream_client_send_latency + description: 'Latency of the changefeed stage: flushing messages from the sink''s client to its downstream. This includes sends that failed for most but not all sinks.' + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.stage.emit_row.latency + exported_name: changefeed_stage_emit_row_latency + description: 'Latency of the changefeed stage: emitting row to sink' + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.stage.encode.latency + exported_name: changefeed_stage_encode_latency + description: 'Latency of the changefeed stage: encoding data' + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.stage.kv_feed_buffer.latency + exported_name: changefeed_stage_kv_feed_buffer_latency + description: 'Latency of the changefeed stage: waiting to buffer kv events' + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.stage.kv_feed_wait_for_table_event.latency + exported_name: changefeed_stage_kv_feed_wait_for_table_event_latency + description: 'Latency of the changefeed stage: waiting for a table schema event to join to the kv event' + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.stage.rangefeed_buffer_checkpoint.latency + exported_name: changefeed_stage_rangefeed_buffer_checkpoint_latency + description: 'Latency of the changefeed stage: buffering rangefeed checkpoint events' + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.stage.rangefeed_buffer_value.latency + exported_name: changefeed_stage_rangefeed_buffer_value_latency + description: 'Latency of the changefeed stage: buffering rangefeed value events' + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.total_ranges + exported_name: changefeed_total_ranges + description: The total number of ranges being watched by changefeed aggregators + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: changefeed.usage.error_count + exported_name: changefeed_usage_error_count + description: Count of errors encountered while generating usage metrics for changefeeds + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.usage.query_duration + exported_name: changefeed_usage_query_duration + description: Time taken by the queries used to generate usage metrics for changefeeds + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.usage.table_bytes + exported_name: changefeed_usage_table_bytes + description: Aggregated number of bytes of data per table watched by changefeeds + y_axis_label: Storage + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: clock-offset.medianabsdevnanos + exported_name: clock_offset_medianabsdevnanos + description: Median Absolute Deviation (MAD) with other nodes + y_axis_label: Clock Offset + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: clock-offset.mediannanos + exported_name: clock_offset_mediannanos + description: Median clock offset with other nodes + y_axis_label: Clock Offset + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: clock-offset.stddevnanos + exported_name: clock_offset_stddevnanos + description: Stddev clock offset with other nodes + y_axis_label: Clock Offset + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: cloud.conns_opened + exported_name: cloud_conns_opened + description: HTTP connections opened by cloud operations + y_axis_label: Connections + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: cloud.conns_reused + exported_name: cloud_conns_reused + description: HTTP connections reused by cloud operations + y_axis_label: Connections + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: cloud.listing_results + exported_name: cloud_listing_results + description: Listing results by all cloud operations + y_axis_label: Results + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: cloud.listings + exported_name: cloud_listings + description: Listing operations by all cloud operations + y_axis_label: Calls + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: cloud.open_readers + exported_name: cloud_open_readers + description: Currently open readers for cloud IO + y_axis_label: Readers + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: cloud.open_writers + exported_name: cloud_open_writers + description: Currently open writers for cloud IO + y_axis_label: Writers + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: cloud.read_bytes + exported_name: cloud_read_bytes + description: Bytes read from all cloud operations + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: cloud.readers_opened + exported_name: cloud_readers_opened + description: Readers opened by all cloud operations + y_axis_label: Files + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: cloud.tls_handshakes + exported_name: cloud_tls_handshakes + description: TLS handshakes done by cloud operations + y_axis_label: Handshakes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: cloud.write_bytes + exported_name: cloud_write_bytes + description: Bytes written by all cloud operations + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: cloud.writers_opened + exported_name: cloud_writers_opened + description: Writers opened by all cloud operations + y_axis_label: files + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: cluster.preserve-downgrade-option.last-updated + exported_name: cluster_preserve_downgrade_option_last_updated + description: Unix timestamp of last updated time for cluster.preserve_downgrade_option + y_axis_label: Timestamp + type: GAUGE + unit: TIMESTAMP_SEC + aggregation: AVG + derivative: NONE + - name: distsender.batch_requests.cross_region.bytes + exported_name: distsender_batch_requests_cross_region_bytes + description: "Total byte count of replica-addressed batch requests processed cross\n\t\tregion when region tiers are configured" + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.batch_requests.cross_zone.bytes + exported_name: distsender_batch_requests_cross_zone_bytes + description: "Total byte count of replica-addressed batch requests processed cross\n\t\tzone within the same region when zone tiers are configured. If region tiers\n\t\tare not set, it is assumed to be within the same region. To ensure accurate\n\t\tmonitoring of cross-zone data transfer, region and zone tiers should be\n\t\tconsistently configured across all nodes." + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.batch_requests.replica_addressed.bytes + exported_name: distsender_batch_requests_replica_addressed_bytes + description: Total byte count of replica-addressed batch requests processed + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.batch_responses.cross_region.bytes + exported_name: distsender_batch_responses_cross_region_bytes + description: "Total byte count of replica-addressed batch responses received cross\n\t\tregion when region tiers are configured" + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.batch_responses.cross_zone.bytes + exported_name: distsender_batch_responses_cross_zone_bytes + description: "Total byte count of replica-addressed batch responses received cross\n\t\tzone within the same region when zone tiers are configured. If region tiers\n\t\tare not set, it is assumed to be within the same region. To ensure accurate\n\t\tmonitoring of cross-zone data transfer, region and zone tiers should be\n\t\tconsistently configured across all nodes." + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.batch_responses.replica_addressed.bytes + exported_name: distsender_batch_responses_replica_addressed_bytes + description: Total byte count of replica-addressed batch responses received + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.batches + exported_name: distsender_batches + description: Number of batches processed + y_axis_label: Batches + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.batches.async.in_progress + exported_name: distsender_batches_async_in_progress + description: Number of partial batches currently being executed asynchronously + y_axis_label: Partial Batches + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: distsender.batches.async.sent + exported_name: distsender_batches_async_sent + description: Number of partial batches sent asynchronously + y_axis_label: Partial Batches + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.batches.async.throttled + exported_name: distsender_batches_async_throttled + description: Number of partial batches not sent asynchronously due to throttling + y_axis_label: Partial Batches + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.batches.async.throttled_cumulative_duration_nanos + exported_name: distsender_batches_async_throttled_cumulative_duration_nanos + description: Cumulative duration of partial batches being throttled (in nanoseconds) + y_axis_label: Throttled Duration + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.batches.partial + exported_name: distsender_batches_partial + description: Number of partial batches processed after being divided on range boundaries + y_axis_label: Partial Batches + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.circuit_breaker.replicas.count + exported_name: distsender_circuit_breaker_replicas_count + description: Number of replicas currently tracked by DistSender circuit breakers + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: distsender.circuit_breaker.replicas.probes.failure + exported_name: distsender_circuit_breaker_replicas_probes_failure + description: Cumulative number of failed DistSender replica circuit breaker probes + y_axis_label: Probes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.circuit_breaker.replicas.probes.running + exported_name: distsender_circuit_breaker_replicas_probes_running + description: Number of currently running DistSender replica circuit breaker probes + y_axis_label: Probes + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: distsender.circuit_breaker.replicas.probes.success + exported_name: distsender_circuit_breaker_replicas_probes_success + description: Cumulative number of successful DistSender replica circuit breaker probes + y_axis_label: Probes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.circuit_breaker.replicas.requests.cancelled + exported_name: distsender_circuit_breaker_replicas_requests_cancelled + description: Cumulative number of requests cancelled when DistSender replica circuit breakers trip + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.circuit_breaker.replicas.requests.rejected + exported_name: distsender_circuit_breaker_replicas_requests_rejected + description: Cumulative number of requests rejected by tripped DistSender replica circuit breakers + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.circuit_breaker.replicas.tripped + exported_name: distsender_circuit_breaker_replicas_tripped + description: Number of DistSender replica circuit breakers currently tripped + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: distsender.circuit_breaker.replicas.tripped_events + exported_name: distsender_circuit_breaker_replicas_tripped_events + description: Cumulative number of DistSender replica circuit breakers tripped over time + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.errors.inleasetransferbackoffs + exported_name: distsender_errors_inleasetransferbackoffs + description: Number of times backed off due to NotLeaseHolderErrors during lease transfer + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.catchup_ranges + exported_name: distsender_rangefeed_catchup_ranges + description: | + Number of ranges in catchup mode + + This counts the number of ranges with an active rangefeed that are performing catchup scan. + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: distsender.rangefeed.catchup_ranges_waiting_client_side + exported_name: distsender_rangefeed_catchup_ranges_waiting_client_side + description: Number of ranges waiting on the client-side limiter to perform catchup scans + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: distsender.rangefeed.error_catchup_ranges + exported_name: distsender_rangefeed_error_catchup_ranges + description: Number of ranges in catchup mode which experienced an error + y_axis_label: Ranges + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.local_ranges + exported_name: distsender_rangefeed_local_ranges + description: Number of ranges connected to local node. + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: distsender.rangefeed.restart_ranges + exported_name: distsender_rangefeed_restart_ranges + description: Number of ranges that were restarted due to transient errors + y_axis_label: Ranges + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.retry.logical_ops_missing + exported_name: distsender_rangefeed_retry_logical_ops_missing + description: Number of ranges that encountered retryable LOGICAL_OPS_MISSING error + y_axis_label: Ranges + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.retry.manual_range_split + exported_name: distsender_rangefeed_retry_manual_range_split + description: Number of ranges that encountered retryable MANUAL_RANGE_SPLIT error + y_axis_label: Ranges + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.retry.no_leaseholder + exported_name: distsender_rangefeed_retry_no_leaseholder + description: Number of ranges that encountered retryable NO_LEASEHOLDER error + y_axis_label: Ranges + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.retry.node_not_found + exported_name: distsender_rangefeed_retry_node_not_found + description: Number of ranges that encountered retryable node not found error + y_axis_label: Ranges + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.retry.raft_snapshot + exported_name: distsender_rangefeed_retry_raft_snapshot + description: Number of ranges that encountered retryable RAFT_SNAPSHOT error + y_axis_label: Ranges + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.retry.range_key_mismatch + exported_name: distsender_rangefeed_retry_range_key_mismatch + description: Number of ranges that encountered retryable range key mismatch error + y_axis_label: Ranges + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.retry.range_merged + exported_name: distsender_rangefeed_retry_range_merged + description: Number of ranges that encountered retryable RANGE_MERGED error + y_axis_label: Ranges + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.retry.range_not_found + exported_name: distsender_rangefeed_retry_range_not_found + description: Number of ranges that encountered retryable range not found error + y_axis_label: Ranges + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.retry.range_split + exported_name: distsender_rangefeed_retry_range_split + description: Number of ranges that encountered retryable RANGE_SPLIT error + y_axis_label: Ranges + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.retry.rangefeed_closed + exported_name: distsender_rangefeed_retry_rangefeed_closed + description: Number of ranges that encountered retryable RANGEFEED_CLOSED error + y_axis_label: Ranges + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.retry.replica_removed + exported_name: distsender_rangefeed_retry_replica_removed + description: Number of ranges that encountered retryable REPLICA_REMOVED error + y_axis_label: Ranges + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.retry.send + exported_name: distsender_rangefeed_retry_send + description: Number of ranges that encountered retryable send error + y_axis_label: Ranges + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.retry.slow_consumer + exported_name: distsender_rangefeed_retry_slow_consumer + description: Number of ranges that encountered retryable SLOW_CONSUMER error + y_axis_label: Ranges + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.retry.store_not_found + exported_name: distsender_rangefeed_retry_store_not_found + description: Number of ranges that encountered retryable store not found error + y_axis_label: Ranges + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.retry.unknown + exported_name: distsender_rangefeed_retry_unknown + description: Number of ranges that encountered retryable unknown error + y_axis_label: Ranges + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.total_ranges + exported_name: distsender_rangefeed_total_ranges + description: | + Number of ranges executing rangefeed + + This counts the number of ranges with an active rangefeed. + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: distsender.rangelookups + exported_name: distsender_rangelookups + description: Number of range lookups + y_axis_label: Range Lookups + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.addsstable.sent + exported_name: distsender_rpc_addsstable_sent + description: |- + Number of AddSSTable requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.adminchangereplicas.sent + exported_name: distsender_rpc_adminchangereplicas_sent + description: |- + Number of AdminChangeReplicas requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.adminmerge.sent + exported_name: distsender_rpc_adminmerge_sent + description: |- + Number of AdminMerge requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.adminrelocaterange.sent + exported_name: distsender_rpc_adminrelocaterange_sent + description: |- + Number of AdminRelocateRange requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.adminscatter.sent + exported_name: distsender_rpc_adminscatter_sent + description: |- + Number of AdminScatter requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.adminsplit.sent + exported_name: distsender_rpc_adminsplit_sent + description: |- + Number of AdminSplit requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.admintransferlease.sent + exported_name: distsender_rpc_admintransferlease_sent + description: |- + Number of AdminTransferLease requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.adminunsplit.sent + exported_name: distsender_rpc_adminunsplit_sent + description: |- + Number of AdminUnsplit requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.barrier.sent + exported_name: distsender_rpc_barrier_sent + description: |- + Number of Barrier requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.checkconsistency.sent + exported_name: distsender_rpc_checkconsistency_sent + description: |- + Number of CheckConsistency requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.clearrange.sent + exported_name: distsender_rpc_clearrange_sent + description: |- + Number of ClearRange requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.computechecksum.sent + exported_name: distsender_rpc_computechecksum_sent + description: |- + Number of ComputeChecksum requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.conditionalput.sent + exported_name: distsender_rpc_conditionalput_sent + description: |- + Number of ConditionalPut requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.delete.sent + exported_name: distsender_rpc_delete_sent + description: |- + Number of Delete requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.deleterange.sent + exported_name: distsender_rpc_deleterange_sent + description: |- + Number of DeleteRange requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.endtxn.sent + exported_name: distsender_rpc_endtxn_sent + description: |- + Number of EndTxn requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.ambiguousresulterrtype + exported_name: distsender_rpc_err_ambiguousresulterrtype + description: | + Number of AmbiguousResultErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.batchtimestampbeforegcerrtype + exported_name: distsender_rpc_err_batchtimestampbeforegcerrtype + description: | + Number of BatchTimestampBeforeGCErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.communicationerrtype + exported_name: distsender_rpc_err_communicationerrtype + description: | + Number of CommunicationErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.conditionfailederrtype + exported_name: distsender_rpc_err_conditionfailederrtype + description: | + Number of ConditionFailedErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.errordetailtype(0) + exported_name: distsender_rpc_err_errordetailtype_0_ + description: | + Number of ErrorDetailType(0) errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.errordetailtype(15) + exported_name: distsender_rpc_err_errordetailtype_15_ + description: | + Number of ErrorDetailType(15) errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.errordetailtype(19) + exported_name: distsender_rpc_err_errordetailtype_19_ + description: | + Number of ErrorDetailType(19) errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.errordetailtype(20) + exported_name: distsender_rpc_err_errordetailtype_20_ + description: | + Number of ErrorDetailType(20) errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.errordetailtype(21) + exported_name: distsender_rpc_err_errordetailtype_21_ + description: | + Number of ErrorDetailType(21) errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.errordetailtype(23) + exported_name: distsender_rpc_err_errordetailtype_23_ + description: | + Number of ErrorDetailType(23) errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.errordetailtype(24) + exported_name: distsender_rpc_err_errordetailtype_24_ + description: | + Number of ErrorDetailType(24) errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.errordetailtype(29) + exported_name: distsender_rpc_err_errordetailtype_29_ + description: | + Number of ErrorDetailType(29) errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.errordetailtype(30) + exported_name: distsender_rpc_err_errordetailtype_30_ + description: | + Number of ErrorDetailType(30) errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.errordetailtype(33) + exported_name: distsender_rpc_err_errordetailtype_33_ + description: | + Number of ErrorDetailType(33) errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.exclusionviolationerrtype + exported_name: distsender_rpc_err_exclusionviolationerrtype + description: | + Number of ExclusionViolationErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.indeterminatecommiterrtype + exported_name: distsender_rpc_err_indeterminatecommiterrtype + description: | + Number of IndeterminateCommitErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.integeroverflowerrtype + exported_name: distsender_rpc_err_integeroverflowerrtype + description: | + Number of IntegerOverflowErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.intentmissingerrtype + exported_name: distsender_rpc_err_intentmissingerrtype + description: | + Number of IntentMissingErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.internalerrtype + exported_name: distsender_rpc_err_internalerrtype + description: | + Number of InternalErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.invalidleaseerrtype + exported_name: distsender_rpc_err_invalidleaseerrtype + description: | + Number of InvalidLeaseErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.leaserejectederrtype + exported_name: distsender_rpc_err_leaserejectederrtype + description: | + Number of LeaseRejectedErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.lockconflicterrtype + exported_name: distsender_rpc_err_lockconflicterrtype + description: | + Number of LockConflictErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.mergeinprogresserrtype + exported_name: distsender_rpc_err_mergeinprogresserrtype + description: | + Number of MergeInProgressErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.mintimestampboundunsatisfiableerrtype + exported_name: distsender_rpc_err_mintimestampboundunsatisfiableerrtype + description: | + Number of MinTimestampBoundUnsatisfiableErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.mvcchistorymutationerrtype + exported_name: distsender_rpc_err_mvcchistorymutationerrtype + description: | + Number of MVCCHistoryMutationErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.nodeunavailableerrtype + exported_name: distsender_rpc_err_nodeunavailableerrtype + description: | + Number of NodeUnavailableErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.notleaseholdererrtype + exported_name: distsender_rpc_err_notleaseholdererrtype + description: | + Number of NotLeaseHolderErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.oprequirestxnerrtype + exported_name: distsender_rpc_err_oprequirestxnerrtype + description: | + Number of OpRequiresTxnErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.optimisticevalconflictserrtype + exported_name: distsender_rpc_err_optimisticevalconflictserrtype + description: | + Number of OptimisticEvalConflictsErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.proxyfailederrtype + exported_name: distsender_rpc_err_proxyfailederrtype + description: | + Number of ProxyFailedErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.raftgroupdeletederrtype + exported_name: distsender_rpc_err_raftgroupdeletederrtype + description: | + Number of RaftGroupDeletedErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.rangefeedretryerrtype + exported_name: distsender_rpc_err_rangefeedretryerrtype + description: | + Number of RangeFeedRetryErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.rangekeymismatcherrtype + exported_name: distsender_rpc_err_rangekeymismatcherrtype + description: | + Number of RangeKeyMismatchErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.rangenotfounderrtype + exported_name: distsender_rpc_err_rangenotfounderrtype + description: | + Number of RangeNotFoundErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.readwithinuncertaintyintervalerrtype + exported_name: distsender_rpc_err_readwithinuncertaintyintervalerrtype + description: | + Number of ReadWithinUncertaintyIntervalErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.refreshfailederrtype + exported_name: distsender_rpc_err_refreshfailederrtype + description: | + Number of RefreshFailedErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.replicacorruptionerrtype + exported_name: distsender_rpc_err_replicacorruptionerrtype + description: | + Number of ReplicaCorruptionErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.replicatooolderrtype + exported_name: distsender_rpc_err_replicatooolderrtype + description: | + Number of ReplicaTooOldErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.replicaunavailableerrtype + exported_name: distsender_rpc_err_replicaunavailableerrtype + description: | + Number of ReplicaUnavailableErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.storenotfounderrtype + exported_name: distsender_rpc_err_storenotfounderrtype + description: | + Number of StoreNotFoundErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.transactionabortederrtype + exported_name: distsender_rpc_err_transactionabortederrtype + description: | + Number of TransactionAbortedErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.transactionpusherrtype + exported_name: distsender_rpc_err_transactionpusherrtype + description: | + Number of TransactionPushErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.transactionretryerrtype + exported_name: distsender_rpc_err_transactionretryerrtype + description: | + Number of TransactionRetryErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.transactionretrywithprotorefresherrtype + exported_name: distsender_rpc_err_transactionretrywithprotorefresherrtype + description: | + Number of TransactionRetryWithProtoRefreshErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.transactionstatuserrtype + exported_name: distsender_rpc_err_transactionstatuserrtype + description: | + Number of TransactionStatusErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.txnalreadyencounterederrtype + exported_name: distsender_rpc_err_txnalreadyencounterederrtype + description: | + Number of TxnAlreadyEncounteredErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.unsupportedrequesterrtype + exported_name: distsender_rpc_err_unsupportedrequesterrtype + description: | + Number of UnsupportedRequestErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.writeintenterrtype + exported_name: distsender_rpc_err_writeintenterrtype + description: | + Number of WriteIntentErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.writetooolderrtype + exported_name: distsender_rpc_err_writetooolderrtype + description: | + Number of WriteTooOldErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.excise.sent + exported_name: distsender_rpc_excise_sent + description: |- + Number of Excise requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.export.sent + exported_name: distsender_rpc_export_sent + description: |- + Number of Export requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.gc.sent + exported_name: distsender_rpc_gc_sent + description: |- + Number of GC requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.get.sent + exported_name: distsender_rpc_get_sent + description: |- + Number of Get requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.heartbeattxn.sent + exported_name: distsender_rpc_heartbeattxn_sent + description: |- + Number of HeartbeatTxn requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.increment.sent + exported_name: distsender_rpc_increment_sent + description: |- + Number of Increment requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.isspanempty.sent + exported_name: distsender_rpc_isspanempty_sent + description: |- + Number of IsSpanEmpty requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.leaseinfo.sent + exported_name: distsender_rpc_leaseinfo_sent + description: |- + Number of LeaseInfo requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.linkexternalsstable.sent + exported_name: distsender_rpc_linkexternalsstable_sent + description: |- + Number of LinkExternalSSTable requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.merge.sent + exported_name: distsender_rpc_merge_sent + description: |- + Number of Merge requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.migrate.sent + exported_name: distsender_rpc_migrate_sent + description: |- + Number of Migrate requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.probe.sent + exported_name: distsender_rpc_probe_sent + description: |- + Number of Probe requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.proxy.err + exported_name: distsender_rpc_proxy_err + description: Number of attempts by a gateway to proxy a request which resulted in a failure. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.proxy.forward.err + exported_name: distsender_rpc_proxy_forward_err + description: Number of attempts on a follower replica to proxy a request which resulted in a failure. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.proxy.forward.sent + exported_name: distsender_rpc_proxy_forward_sent + description: Number of attempts on a follower replica to proxy a request to an unreachable leaseholder. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.proxy.sent + exported_name: distsender_rpc_proxy_sent + description: Number of attempts by a gateway to proxy a request to an unreachable leaseholder via a follower replica. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.pushtxn.sent + exported_name: distsender_rpc_pushtxn_sent + description: |- + Number of PushTxn requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.put.sent + exported_name: distsender_rpc_put_sent + description: |- + Number of Put requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.queryintent.sent + exported_name: distsender_rpc_queryintent_sent + description: |- + Number of QueryIntent requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.querylocks.sent + exported_name: distsender_rpc_querylocks_sent + description: |- + Number of QueryLocks requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.queryresolvedtimestamp.sent + exported_name: distsender_rpc_queryresolvedtimestamp_sent + description: |- + Number of QueryResolvedTimestamp requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.querytxn.sent + exported_name: distsender_rpc_querytxn_sent + description: |- + Number of QueryTxn requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.rangestats.sent + exported_name: distsender_rpc_rangestats_sent + description: |- + Number of RangeStats requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.recomputestats.sent + exported_name: distsender_rpc_recomputestats_sent + description: |- + Number of RecomputeStats requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.recovertxn.sent + exported_name: distsender_rpc_recovertxn_sent + description: |- + Number of RecoverTxn requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.refresh.sent + exported_name: distsender_rpc_refresh_sent + description: |- + Number of Refresh requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.refreshrange.sent + exported_name: distsender_rpc_refreshrange_sent + description: |- + Number of RefreshRange requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.requestlease.sent + exported_name: distsender_rpc_requestlease_sent + description: |- + Number of RequestLease requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.resolveintent.sent + exported_name: distsender_rpc_resolveintent_sent + description: |- + Number of ResolveIntent requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.resolveintentrange.sent + exported_name: distsender_rpc_resolveintentrange_sent + description: |- + Number of ResolveIntentRange requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.reversescan.sent + exported_name: distsender_rpc_reversescan_sent + description: |- + Number of ReverseScan requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.revertrange.sent + exported_name: distsender_rpc_revertrange_sent + description: |- + Number of RevertRange requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.scan.sent + exported_name: distsender_rpc_scan_sent + description: |- + Number of Scan requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.sent + exported_name: distsender_rpc_sent + description: Number of replica-addressed RPCs sent + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.sent.local + exported_name: distsender_rpc_sent_local + description: Number of replica-addressed RPCs sent through the local-server optimization + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.subsume.sent + exported_name: distsender_rpc_subsume_sent + description: |- + Number of Subsume requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.transferlease.sent + exported_name: distsender_rpc_transferlease_sent + description: |- + Number of TransferLease requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.truncatelog.sent + exported_name: distsender_rpc_truncatelog_sent + description: |- + Number of TruncateLog requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.writebatch.sent + exported_name: distsender_rpc_writebatch_sent + description: |- + Number of WriteBatch requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.slow.replicarpcs + exported_name: distsender_slow_replicarpcs + description: |- + Number of slow replica-bound RPCs. + + Note that this is not a good signal for KV health. The remote side of the + RPCs tracked here may experience contention, so an end user can easily + cause values for this metric to be emitted by leaving a transaction open + for a long time and contending with it using a second transaction. + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.adopt_iterations + exported_name: jobs_adopt_iterations + description: number of job-adopt iterations performed by the registry + y_axis_label: iterations + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_env_runner.currently_idle + exported_name: jobs_auto_config_env_runner_currently_idle + labeled_name: 'jobs{type: auto_config_env_runner, status: currently_idle}' + description: Number of auto_config_env_runner jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_config_env_runner.currently_paused + exported_name: jobs_auto_config_env_runner_currently_paused + labeled_name: 'jobs{name: auto_config_env_runner, status: currently_paused}' + description: Number of auto_config_env_runner jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_config_env_runner.currently_running + exported_name: jobs_auto_config_env_runner_currently_running + labeled_name: 'jobs{type: auto_config_env_runner, status: currently_running}' + description: Number of auto_config_env_runner jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_config_env_runner.expired_pts_records + exported_name: jobs_auto_config_env_runner_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: auto_config_env_runner}' + description: Number of expired protected timestamp records owned by auto_config_env_runner jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_env_runner.fail_or_cancel_completed + exported_name: jobs_auto_config_env_runner_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: auto_config_env_runner, status: completed}' + description: Number of auto_config_env_runner jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_env_runner.fail_or_cancel_failed + exported_name: jobs_auto_config_env_runner_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: auto_config_env_runner, status: failed}' + description: Number of auto_config_env_runner jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_env_runner.fail_or_cancel_retry_error + exported_name: jobs_auto_config_env_runner_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: auto_config_env_runner, status: retry_error}' + description: Number of auto_config_env_runner jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_env_runner.protected_age_sec + exported_name: jobs_auto_config_env_runner_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: auto_config_env_runner}' + description: The age of the oldest PTS record protected by auto_config_env_runner jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.auto_config_env_runner.protected_record_count + exported_name: jobs_auto_config_env_runner_protected_record_count + labeled_name: 'jobs.protected_record_count{type: auto_config_env_runner}' + description: Number of protected timestamp records held by auto_config_env_runner jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_config_env_runner.resume_completed + exported_name: jobs_auto_config_env_runner_resume_completed + labeled_name: 'jobs.resume{name: auto_config_env_runner, status: completed}' + description: Number of auto_config_env_runner jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_env_runner.resume_failed + exported_name: jobs_auto_config_env_runner_resume_failed + labeled_name: 'jobs.resume{name: auto_config_env_runner, status: failed}' + description: Number of auto_config_env_runner jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_env_runner.resume_retry_error + exported_name: jobs_auto_config_env_runner_resume_retry_error + labeled_name: 'jobs.resume{name: auto_config_env_runner, status: retry_error}' + description: Number of auto_config_env_runner jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_runner.currently_idle + exported_name: jobs_auto_config_runner_currently_idle + labeled_name: 'jobs{type: auto_config_runner, status: currently_idle}' + description: Number of auto_config_runner jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_config_runner.currently_paused + exported_name: jobs_auto_config_runner_currently_paused + labeled_name: 'jobs{name: auto_config_runner, status: currently_paused}' + description: Number of auto_config_runner jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_config_runner.currently_running + exported_name: jobs_auto_config_runner_currently_running + labeled_name: 'jobs{type: auto_config_runner, status: currently_running}' + description: Number of auto_config_runner jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_config_runner.expired_pts_records + exported_name: jobs_auto_config_runner_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: auto_config_runner}' + description: Number of expired protected timestamp records owned by auto_config_runner jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_runner.fail_or_cancel_completed + exported_name: jobs_auto_config_runner_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: auto_config_runner, status: completed}' + description: Number of auto_config_runner jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_runner.fail_or_cancel_failed + exported_name: jobs_auto_config_runner_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: auto_config_runner, status: failed}' + description: Number of auto_config_runner jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_runner.fail_or_cancel_retry_error + exported_name: jobs_auto_config_runner_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: auto_config_runner, status: retry_error}' + description: Number of auto_config_runner jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_runner.protected_age_sec + exported_name: jobs_auto_config_runner_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: auto_config_runner}' + description: The age of the oldest PTS record protected by auto_config_runner jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.auto_config_runner.protected_record_count + exported_name: jobs_auto_config_runner_protected_record_count + labeled_name: 'jobs.protected_record_count{type: auto_config_runner}' + description: Number of protected timestamp records held by auto_config_runner jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_config_runner.resume_completed + exported_name: jobs_auto_config_runner_resume_completed + labeled_name: 'jobs.resume{name: auto_config_runner, status: completed}' + description: Number of auto_config_runner jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_runner.resume_failed + exported_name: jobs_auto_config_runner_resume_failed + labeled_name: 'jobs.resume{name: auto_config_runner, status: failed}' + description: Number of auto_config_runner jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_runner.resume_retry_error + exported_name: jobs_auto_config_runner_resume_retry_error + labeled_name: 'jobs.resume{name: auto_config_runner, status: retry_error}' + description: Number of auto_config_runner jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_task.currently_idle + exported_name: jobs_auto_config_task_currently_idle + labeled_name: 'jobs{type: auto_config_task, status: currently_idle}' + description: Number of auto_config_task jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_config_task.currently_paused + exported_name: jobs_auto_config_task_currently_paused + labeled_name: 'jobs{name: auto_config_task, status: currently_paused}' + description: Number of auto_config_task jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_config_task.currently_running + exported_name: jobs_auto_config_task_currently_running + labeled_name: 'jobs{type: auto_config_task, status: currently_running}' + description: Number of auto_config_task jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_config_task.expired_pts_records + exported_name: jobs_auto_config_task_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: auto_config_task}' + description: Number of expired protected timestamp records owned by auto_config_task jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_task.fail_or_cancel_completed + exported_name: jobs_auto_config_task_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: auto_config_task, status: completed}' + description: Number of auto_config_task jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_task.fail_or_cancel_failed + exported_name: jobs_auto_config_task_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: auto_config_task, status: failed}' + description: Number of auto_config_task jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_task.fail_or_cancel_retry_error + exported_name: jobs_auto_config_task_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: auto_config_task, status: retry_error}' + description: Number of auto_config_task jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_task.protected_age_sec + exported_name: jobs_auto_config_task_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: auto_config_task}' + description: The age of the oldest PTS record protected by auto_config_task jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.auto_config_task.protected_record_count + exported_name: jobs_auto_config_task_protected_record_count + labeled_name: 'jobs.protected_record_count{type: auto_config_task}' + description: Number of protected timestamp records held by auto_config_task jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_config_task.resume_completed + exported_name: jobs_auto_config_task_resume_completed + labeled_name: 'jobs.resume{name: auto_config_task, status: completed}' + description: Number of auto_config_task jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_task.resume_failed + exported_name: jobs_auto_config_task_resume_failed + labeled_name: 'jobs.resume{name: auto_config_task, status: failed}' + description: Number of auto_config_task jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_task.resume_retry_error + exported_name: jobs_auto_config_task_resume_retry_error + labeled_name: 'jobs.resume{name: auto_config_task, status: retry_error}' + description: Number of auto_config_task jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_create_partial_stats.currently_idle + exported_name: jobs_auto_create_partial_stats_currently_idle + labeled_name: 'jobs{type: auto_create_partial_stats, status: currently_idle}' + description: Number of auto_create_partial_stats jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_create_partial_stats.currently_paused + exported_name: jobs_auto_create_partial_stats_currently_paused + labeled_name: 'jobs{name: auto_create_partial_stats, status: currently_paused}' + description: Number of auto_create_partial_stats jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_create_partial_stats.currently_running + exported_name: jobs_auto_create_partial_stats_currently_running + labeled_name: 'jobs{type: auto_create_partial_stats, status: currently_running}' + description: Number of auto_create_partial_stats jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_create_partial_stats.expired_pts_records + exported_name: jobs_auto_create_partial_stats_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: auto_create_partial_stats}' + description: Number of expired protected timestamp records owned by auto_create_partial_stats jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_create_partial_stats.fail_or_cancel_completed + exported_name: jobs_auto_create_partial_stats_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: auto_create_partial_stats, status: completed}' + description: Number of auto_create_partial_stats jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_create_partial_stats.fail_or_cancel_failed + exported_name: jobs_auto_create_partial_stats_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: auto_create_partial_stats, status: failed}' + description: Number of auto_create_partial_stats jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_create_partial_stats.fail_or_cancel_retry_error + exported_name: jobs_auto_create_partial_stats_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: auto_create_partial_stats, status: retry_error}' + description: Number of auto_create_partial_stats jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_create_partial_stats.protected_age_sec + exported_name: jobs_auto_create_partial_stats_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: auto_create_partial_stats}' + description: The age of the oldest PTS record protected by auto_create_partial_stats jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.auto_create_partial_stats.protected_record_count + exported_name: jobs_auto_create_partial_stats_protected_record_count + labeled_name: 'jobs.protected_record_count{type: auto_create_partial_stats}' + description: Number of protected timestamp records held by auto_create_partial_stats jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_create_partial_stats.resume_completed + exported_name: jobs_auto_create_partial_stats_resume_completed + labeled_name: 'jobs.resume{name: auto_create_partial_stats, status: completed}' + description: Number of auto_create_partial_stats jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_create_partial_stats.resume_failed + exported_name: jobs_auto_create_partial_stats_resume_failed + labeled_name: 'jobs.resume{name: auto_create_partial_stats, status: failed}' + description: Number of auto_create_partial_stats jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_create_partial_stats.resume_retry_error + exported_name: jobs_auto_create_partial_stats_resume_retry_error + labeled_name: 'jobs.resume{name: auto_create_partial_stats, status: retry_error}' + description: Number of auto_create_partial_stats jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_create_stats.currently_idle + exported_name: jobs_auto_create_stats_currently_idle + labeled_name: 'jobs{type: auto_create_stats, status: currently_idle}' + description: Number of auto_create_stats jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_create_stats.expired_pts_records + exported_name: jobs_auto_create_stats_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: auto_create_stats}' + description: Number of expired protected timestamp records owned by auto_create_stats jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_create_stats.fail_or_cancel_completed + exported_name: jobs_auto_create_stats_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: auto_create_stats, status: completed}' + description: Number of auto_create_stats jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_create_stats.fail_or_cancel_failed + exported_name: jobs_auto_create_stats_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: auto_create_stats, status: failed}' + description: Number of auto_create_stats jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_create_stats.fail_or_cancel_retry_error + exported_name: jobs_auto_create_stats_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: auto_create_stats, status: retry_error}' + description: Number of auto_create_stats jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_create_stats.protected_age_sec + exported_name: jobs_auto_create_stats_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: auto_create_stats}' + description: The age of the oldest PTS record protected by auto_create_stats jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.auto_create_stats.protected_record_count + exported_name: jobs_auto_create_stats_protected_record_count + labeled_name: 'jobs.protected_record_count{type: auto_create_stats}' + description: Number of protected timestamp records held by auto_create_stats jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_create_stats.resume_completed + exported_name: jobs_auto_create_stats_resume_completed + labeled_name: 'jobs.resume{name: auto_create_stats, status: completed}' + description: Number of auto_create_stats jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_create_stats.resume_retry_error + exported_name: jobs_auto_create_stats_resume_retry_error + labeled_name: 'jobs.resume{name: auto_create_stats, status: retry_error}' + description: Number of auto_create_stats jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_schema_telemetry.currently_idle + exported_name: jobs_auto_schema_telemetry_currently_idle + labeled_name: 'jobs{type: auto_schema_telemetry, status: currently_idle}' + description: Number of auto_schema_telemetry jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_schema_telemetry.currently_paused + exported_name: jobs_auto_schema_telemetry_currently_paused + labeled_name: 'jobs{name: auto_schema_telemetry, status: currently_paused}' + description: Number of auto_schema_telemetry jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_schema_telemetry.currently_running + exported_name: jobs_auto_schema_telemetry_currently_running + labeled_name: 'jobs{type: auto_schema_telemetry, status: currently_running}' + description: Number of auto_schema_telemetry jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_schema_telemetry.expired_pts_records + exported_name: jobs_auto_schema_telemetry_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: auto_schema_telemetry}' + description: Number of expired protected timestamp records owned by auto_schema_telemetry jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_schema_telemetry.fail_or_cancel_completed + exported_name: jobs_auto_schema_telemetry_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: auto_schema_telemetry, status: completed}' + description: Number of auto_schema_telemetry jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_schema_telemetry.fail_or_cancel_failed + exported_name: jobs_auto_schema_telemetry_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: auto_schema_telemetry, status: failed}' + description: Number of auto_schema_telemetry jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_schema_telemetry.fail_or_cancel_retry_error + exported_name: jobs_auto_schema_telemetry_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: auto_schema_telemetry, status: retry_error}' + description: Number of auto_schema_telemetry jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_schema_telemetry.protected_age_sec + exported_name: jobs_auto_schema_telemetry_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: auto_schema_telemetry}' + description: The age of the oldest PTS record protected by auto_schema_telemetry jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.auto_schema_telemetry.protected_record_count + exported_name: jobs_auto_schema_telemetry_protected_record_count + labeled_name: 'jobs.protected_record_count{type: auto_schema_telemetry}' + description: Number of protected timestamp records held by auto_schema_telemetry jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_schema_telemetry.resume_completed + exported_name: jobs_auto_schema_telemetry_resume_completed + labeled_name: 'jobs.resume{name: auto_schema_telemetry, status: completed}' + description: Number of auto_schema_telemetry jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_schema_telemetry.resume_failed + exported_name: jobs_auto_schema_telemetry_resume_failed + labeled_name: 'jobs.resume{name: auto_schema_telemetry, status: failed}' + description: Number of auto_schema_telemetry jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_schema_telemetry.resume_retry_error + exported_name: jobs_auto_schema_telemetry_resume_retry_error + labeled_name: 'jobs.resume{name: auto_schema_telemetry, status: retry_error}' + description: Number of auto_schema_telemetry jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_span_config_reconciliation.currently_idle + exported_name: jobs_auto_span_config_reconciliation_currently_idle + labeled_name: 'jobs{type: auto_span_config_reconciliation, status: currently_idle}' + description: Number of auto_span_config_reconciliation jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_span_config_reconciliation.currently_paused + exported_name: jobs_auto_span_config_reconciliation_currently_paused + labeled_name: 'jobs{name: auto_span_config_reconciliation, status: currently_paused}' + description: Number of auto_span_config_reconciliation jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_span_config_reconciliation.currently_running + exported_name: jobs_auto_span_config_reconciliation_currently_running + labeled_name: 'jobs{type: auto_span_config_reconciliation, status: currently_running}' + description: Number of auto_span_config_reconciliation jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_span_config_reconciliation.expired_pts_records + exported_name: jobs_auto_span_config_reconciliation_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: auto_span_config_reconciliation}' + description: Number of expired protected timestamp records owned by auto_span_config_reconciliation jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_span_config_reconciliation.fail_or_cancel_completed + exported_name: jobs_auto_span_config_reconciliation_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: auto_span_config_reconciliation, status: completed}' + description: Number of auto_span_config_reconciliation jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_span_config_reconciliation.fail_or_cancel_failed + exported_name: jobs_auto_span_config_reconciliation_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: auto_span_config_reconciliation, status: failed}' + description: Number of auto_span_config_reconciliation jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_span_config_reconciliation.fail_or_cancel_retry_error + exported_name: jobs_auto_span_config_reconciliation_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: auto_span_config_reconciliation, status: retry_error}' + description: Number of auto_span_config_reconciliation jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_span_config_reconciliation.protected_age_sec + exported_name: jobs_auto_span_config_reconciliation_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: auto_span_config_reconciliation}' + description: The age of the oldest PTS record protected by auto_span_config_reconciliation jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.auto_span_config_reconciliation.protected_record_count + exported_name: jobs_auto_span_config_reconciliation_protected_record_count + labeled_name: 'jobs.protected_record_count{type: auto_span_config_reconciliation}' + description: Number of protected timestamp records held by auto_span_config_reconciliation jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_span_config_reconciliation.resume_completed + exported_name: jobs_auto_span_config_reconciliation_resume_completed + labeled_name: 'jobs.resume{name: auto_span_config_reconciliation, status: completed}' + description: Number of auto_span_config_reconciliation jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_span_config_reconciliation.resume_failed + exported_name: jobs_auto_span_config_reconciliation_resume_failed + labeled_name: 'jobs.resume{name: auto_span_config_reconciliation, status: failed}' + description: Number of auto_span_config_reconciliation jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_span_config_reconciliation.resume_retry_error + exported_name: jobs_auto_span_config_reconciliation_resume_retry_error + labeled_name: 'jobs.resume{name: auto_span_config_reconciliation, status: retry_error}' + description: Number of auto_span_config_reconciliation jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_sql_stats_compaction.currently_idle + exported_name: jobs_auto_sql_stats_compaction_currently_idle + labeled_name: 'jobs{type: auto_sql_stats_compaction, status: currently_idle}' + description: Number of auto_sql_stats_compaction jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_sql_stats_compaction.currently_paused + exported_name: jobs_auto_sql_stats_compaction_currently_paused + labeled_name: 'jobs{name: auto_sql_stats_compaction, status: currently_paused}' + description: Number of auto_sql_stats_compaction jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_sql_stats_compaction.currently_running + exported_name: jobs_auto_sql_stats_compaction_currently_running + labeled_name: 'jobs{type: auto_sql_stats_compaction, status: currently_running}' + description: Number of auto_sql_stats_compaction jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_sql_stats_compaction.expired_pts_records + exported_name: jobs_auto_sql_stats_compaction_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: auto_sql_stats_compaction}' + description: Number of expired protected timestamp records owned by auto_sql_stats_compaction jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_sql_stats_compaction.fail_or_cancel_completed + exported_name: jobs_auto_sql_stats_compaction_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: auto_sql_stats_compaction, status: completed}' + description: Number of auto_sql_stats_compaction jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_sql_stats_compaction.fail_or_cancel_failed + exported_name: jobs_auto_sql_stats_compaction_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: auto_sql_stats_compaction, status: failed}' + description: Number of auto_sql_stats_compaction jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_sql_stats_compaction.fail_or_cancel_retry_error + exported_name: jobs_auto_sql_stats_compaction_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: auto_sql_stats_compaction, status: retry_error}' + description: Number of auto_sql_stats_compaction jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_sql_stats_compaction.protected_age_sec + exported_name: jobs_auto_sql_stats_compaction_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: auto_sql_stats_compaction}' + description: The age of the oldest PTS record protected by auto_sql_stats_compaction jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.auto_sql_stats_compaction.protected_record_count + exported_name: jobs_auto_sql_stats_compaction_protected_record_count + labeled_name: 'jobs.protected_record_count{type: auto_sql_stats_compaction}' + description: Number of protected timestamp records held by auto_sql_stats_compaction jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_sql_stats_compaction.resume_completed + exported_name: jobs_auto_sql_stats_compaction_resume_completed + labeled_name: 'jobs.resume{name: auto_sql_stats_compaction, status: completed}' + description: Number of auto_sql_stats_compaction jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_sql_stats_compaction.resume_failed + exported_name: jobs_auto_sql_stats_compaction_resume_failed + labeled_name: 'jobs.resume{name: auto_sql_stats_compaction, status: failed}' + description: Number of auto_sql_stats_compaction jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_sql_stats_compaction.resume_retry_error + exported_name: jobs_auto_sql_stats_compaction_resume_retry_error + labeled_name: 'jobs.resume{name: auto_sql_stats_compaction, status: retry_error}' + description: Number of auto_sql_stats_compaction jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_update_sql_activity.currently_idle + exported_name: jobs_auto_update_sql_activity_currently_idle + labeled_name: 'jobs{type: auto_update_sql_activity, status: currently_idle}' + description: Number of auto_update_sql_activity jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_update_sql_activity.currently_paused + exported_name: jobs_auto_update_sql_activity_currently_paused + labeled_name: 'jobs{name: auto_update_sql_activity, status: currently_paused}' + description: Number of auto_update_sql_activity jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_update_sql_activity.currently_running + exported_name: jobs_auto_update_sql_activity_currently_running + labeled_name: 'jobs{type: auto_update_sql_activity, status: currently_running}' + description: Number of auto_update_sql_activity jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_update_sql_activity.expired_pts_records + exported_name: jobs_auto_update_sql_activity_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: auto_update_sql_activity}' + description: Number of expired protected timestamp records owned by auto_update_sql_activity jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_update_sql_activity.fail_or_cancel_completed + exported_name: jobs_auto_update_sql_activity_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: auto_update_sql_activity, status: completed}' + description: Number of auto_update_sql_activity jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_update_sql_activity.fail_or_cancel_failed + exported_name: jobs_auto_update_sql_activity_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: auto_update_sql_activity, status: failed}' + description: Number of auto_update_sql_activity jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_update_sql_activity.fail_or_cancel_retry_error + exported_name: jobs_auto_update_sql_activity_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: auto_update_sql_activity, status: retry_error}' + description: Number of auto_update_sql_activity jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_update_sql_activity.protected_age_sec + exported_name: jobs_auto_update_sql_activity_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: auto_update_sql_activity}' + description: The age of the oldest PTS record protected by auto_update_sql_activity jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.auto_update_sql_activity.protected_record_count + exported_name: jobs_auto_update_sql_activity_protected_record_count + labeled_name: 'jobs.protected_record_count{type: auto_update_sql_activity}' + description: Number of protected timestamp records held by auto_update_sql_activity jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_update_sql_activity.resume_completed + exported_name: jobs_auto_update_sql_activity_resume_completed + labeled_name: 'jobs.resume{name: auto_update_sql_activity, status: completed}' + description: Number of auto_update_sql_activity jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_update_sql_activity.resume_failed + exported_name: jobs_auto_update_sql_activity_resume_failed + labeled_name: 'jobs.resume{name: auto_update_sql_activity, status: failed}' + description: Number of auto_update_sql_activity jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_update_sql_activity.resume_retry_error + exported_name: jobs_auto_update_sql_activity_resume_retry_error + labeled_name: 'jobs.resume{name: auto_update_sql_activity, status: retry_error}' + description: Number of auto_update_sql_activity jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.backup.currently_idle + exported_name: jobs_backup_currently_idle + labeled_name: 'jobs{type: backup, status: currently_idle}' + description: Number of backup jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.backup.expired_pts_records + exported_name: jobs_backup_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: backup}' + description: Number of expired protected timestamp records owned by backup jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.backup.fail_or_cancel_completed + exported_name: jobs_backup_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: backup, status: completed}' + description: Number of backup jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.backup.fail_or_cancel_failed + exported_name: jobs_backup_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: backup, status: failed}' + description: Number of backup jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.backup.fail_or_cancel_retry_error + exported_name: jobs_backup_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: backup, status: retry_error}' + description: Number of backup jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.backup.protected_age_sec + exported_name: jobs_backup_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: backup}' + description: The age of the oldest PTS record protected by backup jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.backup.protected_record_count + exported_name: jobs_backup_protected_record_count + labeled_name: 'jobs.protected_record_count{type: backup}' + description: Number of protected timestamp records held by backup jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.backup.resume_completed + exported_name: jobs_backup_resume_completed + labeled_name: 'jobs.resume{name: backup, status: completed}' + description: Number of backup jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.backup.resume_failed + exported_name: jobs_backup_resume_failed + labeled_name: 'jobs.resume{name: backup, status: failed}' + description: Number of backup jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.backup.resume_retry_error + exported_name: jobs_backup_resume_retry_error + labeled_name: 'jobs.resume{name: backup, status: retry_error}' + description: Number of backup jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.changefeed.currently_idle + exported_name: jobs_changefeed_currently_idle + labeled_name: 'jobs{type: changefeed, status: currently_idle}' + description: Number of changefeed jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.changefeed.currently_running + exported_name: jobs_changefeed_currently_running + labeled_name: 'jobs{type: changefeed, status: currently_running}' + description: Number of changefeed jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.changefeed.expired_pts_records + exported_name: jobs_changefeed_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: changefeed}' + description: Number of expired protected timestamp records owned by changefeed jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.changefeed.fail_or_cancel_completed + exported_name: jobs_changefeed_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: changefeed, status: completed}' + description: Number of changefeed jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.changefeed.fail_or_cancel_failed + exported_name: jobs_changefeed_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: changefeed, status: failed}' + description: Number of changefeed jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.changefeed.fail_or_cancel_retry_error + exported_name: jobs_changefeed_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: changefeed, status: retry_error}' + description: Number of changefeed jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.changefeed.protected_record_count + exported_name: jobs_changefeed_protected_record_count + labeled_name: 'jobs.protected_record_count{type: changefeed}' + description: Number of protected timestamp records held by changefeed jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.changefeed.resume_completed + exported_name: jobs_changefeed_resume_completed + labeled_name: 'jobs.resume{name: changefeed, status: completed}' + description: Number of changefeed jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.changefeed.resume_failed + exported_name: jobs_changefeed_resume_failed + labeled_name: 'jobs.resume{name: changefeed, status: failed}' + description: Number of changefeed jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.changefeed.resume_retry_error + exported_name: jobs_changefeed_resume_retry_error + labeled_name: 'jobs.resume{name: changefeed, status: retry_error}' + description: Number of changefeed jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.claimed_jobs + exported_name: jobs_claimed_jobs + description: number of jobs claimed in job-adopt iterations + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.create_stats.currently_idle + exported_name: jobs_create_stats_currently_idle + labeled_name: 'jobs{type: create_stats, status: currently_idle}' + description: Number of create_stats jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.create_stats.currently_paused + exported_name: jobs_create_stats_currently_paused + labeled_name: 'jobs{name: create_stats, status: currently_paused}' + description: Number of create_stats jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.create_stats.expired_pts_records + exported_name: jobs_create_stats_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: create_stats}' + description: Number of expired protected timestamp records owned by create_stats jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.create_stats.fail_or_cancel_completed + exported_name: jobs_create_stats_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: create_stats, status: completed}' + description: Number of create_stats jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.create_stats.fail_or_cancel_failed + exported_name: jobs_create_stats_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: create_stats, status: failed}' + description: Number of create_stats jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.create_stats.fail_or_cancel_retry_error + exported_name: jobs_create_stats_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: create_stats, status: retry_error}' + description: Number of create_stats jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.create_stats.protected_age_sec + exported_name: jobs_create_stats_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: create_stats}' + description: The age of the oldest PTS record protected by create_stats jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.create_stats.protected_record_count + exported_name: jobs_create_stats_protected_record_count + labeled_name: 'jobs.protected_record_count{type: create_stats}' + description: Number of protected timestamp records held by create_stats jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.create_stats.resume_completed + exported_name: jobs_create_stats_resume_completed + labeled_name: 'jobs.resume{name: create_stats, status: completed}' + description: Number of create_stats jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.create_stats.resume_failed + exported_name: jobs_create_stats_resume_failed + labeled_name: 'jobs.resume{name: create_stats, status: failed}' + description: Number of create_stats jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.create_stats.resume_retry_error + exported_name: jobs_create_stats_resume_retry_error + labeled_name: 'jobs.resume{name: create_stats, status: retry_error}' + description: Number of create_stats jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.history_retention.currently_idle + exported_name: jobs_history_retention_currently_idle + labeled_name: 'jobs{type: history_retention, status: currently_idle}' + description: Number of history_retention jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.history_retention.currently_paused + exported_name: jobs_history_retention_currently_paused + labeled_name: 'jobs{name: history_retention, status: currently_paused}' + description: Number of history_retention jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.history_retention.currently_running + exported_name: jobs_history_retention_currently_running + labeled_name: 'jobs{type: history_retention, status: currently_running}' + description: Number of history_retention jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.history_retention.expired_pts_records + exported_name: jobs_history_retention_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: history_retention}' + description: Number of expired protected timestamp records owned by history_retention jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.history_retention.fail_or_cancel_completed + exported_name: jobs_history_retention_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: history_retention, status: completed}' + description: Number of history_retention jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.history_retention.fail_or_cancel_failed + exported_name: jobs_history_retention_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: history_retention, status: failed}' + description: Number of history_retention jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.history_retention.fail_or_cancel_retry_error + exported_name: jobs_history_retention_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: history_retention, status: retry_error}' + description: Number of history_retention jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.history_retention.protected_age_sec + exported_name: jobs_history_retention_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: history_retention}' + description: The age of the oldest PTS record protected by history_retention jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.history_retention.protected_record_count + exported_name: jobs_history_retention_protected_record_count + labeled_name: 'jobs.protected_record_count{type: history_retention}' + description: Number of protected timestamp records held by history_retention jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.history_retention.resume_completed + exported_name: jobs_history_retention_resume_completed + labeled_name: 'jobs.resume{name: history_retention, status: completed}' + description: Number of history_retention jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.history_retention.resume_failed + exported_name: jobs_history_retention_resume_failed + labeled_name: 'jobs.resume{name: history_retention, status: failed}' + description: Number of history_retention jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.history_retention.resume_retry_error + exported_name: jobs_history_retention_resume_retry_error + labeled_name: 'jobs.resume{name: history_retention, status: retry_error}' + description: Number of history_retention jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.hot_ranges_logger.currently_idle + exported_name: jobs_hot_ranges_logger_currently_idle + labeled_name: 'jobs{type: hot_ranges_logger, status: currently_idle}' + description: Number of hot_ranges_logger jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.hot_ranges_logger.currently_paused + exported_name: jobs_hot_ranges_logger_currently_paused + labeled_name: 'jobs{name: hot_ranges_logger, status: currently_paused}' + description: Number of hot_ranges_logger jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.hot_ranges_logger.currently_running + exported_name: jobs_hot_ranges_logger_currently_running + labeled_name: 'jobs{type: hot_ranges_logger, status: currently_running}' + description: Number of hot_ranges_logger jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.hot_ranges_logger.expired_pts_records + exported_name: jobs_hot_ranges_logger_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: hot_ranges_logger}' + description: Number of expired protected timestamp records owned by hot_ranges_logger jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.hot_ranges_logger.fail_or_cancel_completed + exported_name: jobs_hot_ranges_logger_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: hot_ranges_logger, status: completed}' + description: Number of hot_ranges_logger jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.hot_ranges_logger.fail_or_cancel_failed + exported_name: jobs_hot_ranges_logger_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: hot_ranges_logger, status: failed}' + description: Number of hot_ranges_logger jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.hot_ranges_logger.fail_or_cancel_retry_error + exported_name: jobs_hot_ranges_logger_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: hot_ranges_logger, status: retry_error}' + description: Number of hot_ranges_logger jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.hot_ranges_logger.protected_age_sec + exported_name: jobs_hot_ranges_logger_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: hot_ranges_logger}' + description: The age of the oldest PTS record protected by hot_ranges_logger jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.hot_ranges_logger.protected_record_count + exported_name: jobs_hot_ranges_logger_protected_record_count + labeled_name: 'jobs.protected_record_count{type: hot_ranges_logger}' + description: Number of protected timestamp records held by hot_ranges_logger jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.hot_ranges_logger.resume_completed + exported_name: jobs_hot_ranges_logger_resume_completed + labeled_name: 'jobs.resume{name: hot_ranges_logger, status: completed}' + description: Number of hot_ranges_logger jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.hot_ranges_logger.resume_failed + exported_name: jobs_hot_ranges_logger_resume_failed + labeled_name: 'jobs.resume{name: hot_ranges_logger, status: failed}' + description: Number of hot_ranges_logger jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.hot_ranges_logger.resume_retry_error + exported_name: jobs_hot_ranges_logger_resume_retry_error + labeled_name: 'jobs.resume{name: hot_ranges_logger, status: retry_error}' + description: Number of hot_ranges_logger jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.import.currently_idle + exported_name: jobs_import_currently_idle + labeled_name: 'jobs{type: import, status: currently_idle}' + description: Number of import jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.import.currently_paused + exported_name: jobs_import_currently_paused + labeled_name: 'jobs{name: import, status: currently_paused}' + description: Number of import jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.import.currently_running + exported_name: jobs_import_currently_running + labeled_name: 'jobs{type: import, status: currently_running}' + description: Number of import jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.import.expired_pts_records + exported_name: jobs_import_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: import}' + description: Number of expired protected timestamp records owned by import jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.import.fail_or_cancel_completed + exported_name: jobs_import_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: import, status: completed}' + description: Number of import jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.import.fail_or_cancel_failed + exported_name: jobs_import_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: import, status: failed}' + description: Number of import jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.import.fail_or_cancel_retry_error + exported_name: jobs_import_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: import, status: retry_error}' + description: Number of import jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.import.protected_age_sec + exported_name: jobs_import_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: import}' + description: The age of the oldest PTS record protected by import jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.import.protected_record_count + exported_name: jobs_import_protected_record_count + labeled_name: 'jobs.protected_record_count{type: import}' + description: Number of protected timestamp records held by import jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.import.resume_completed + exported_name: jobs_import_resume_completed + labeled_name: 'jobs.resume{name: import, status: completed}' + description: Number of import jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.import.resume_failed + exported_name: jobs_import_resume_failed + labeled_name: 'jobs.resume{name: import, status: failed}' + description: Number of import jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.import.resume_retry_error + exported_name: jobs_import_resume_retry_error + labeled_name: 'jobs.resume{name: import, status: retry_error}' + description: Number of import jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.import_rollback.currently_idle + exported_name: jobs_import_rollback_currently_idle + labeled_name: 'jobs{type: import_rollback, status: currently_idle}' + description: Number of import_rollback jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.import_rollback.currently_paused + exported_name: jobs_import_rollback_currently_paused + labeled_name: 'jobs{name: import_rollback, status: currently_paused}' + description: Number of import_rollback jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.import_rollback.currently_running + exported_name: jobs_import_rollback_currently_running + labeled_name: 'jobs{type: import_rollback, status: currently_running}' + description: Number of import_rollback jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.import_rollback.expired_pts_records + exported_name: jobs_import_rollback_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: import_rollback}' + description: Number of expired protected timestamp records owned by import_rollback jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.import_rollback.fail_or_cancel_completed + exported_name: jobs_import_rollback_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: import_rollback, status: completed}' + description: Number of import_rollback jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.import_rollback.fail_or_cancel_failed + exported_name: jobs_import_rollback_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: import_rollback, status: failed}' + description: Number of import_rollback jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.import_rollback.fail_or_cancel_retry_error + exported_name: jobs_import_rollback_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: import_rollback, status: retry_error}' + description: Number of import_rollback jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.import_rollback.protected_age_sec + exported_name: jobs_import_rollback_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: import_rollback}' + description: The age of the oldest PTS record protected by import_rollback jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.import_rollback.protected_record_count + exported_name: jobs_import_rollback_protected_record_count + labeled_name: 'jobs.protected_record_count{type: import_rollback}' + description: Number of protected timestamp records held by import_rollback jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.import_rollback.resume_completed + exported_name: jobs_import_rollback_resume_completed + labeled_name: 'jobs.resume{name: import_rollback, status: completed}' + description: Number of import_rollback jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.import_rollback.resume_failed + exported_name: jobs_import_rollback_resume_failed + labeled_name: 'jobs.resume{name: import_rollback, status: failed}' + description: Number of import_rollback jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.import_rollback.resume_retry_error + exported_name: jobs_import_rollback_resume_retry_error + labeled_name: 'jobs.resume{name: import_rollback, status: retry_error}' + description: Number of import_rollback jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.key_visualizer.currently_idle + exported_name: jobs_key_visualizer_currently_idle + labeled_name: 'jobs{type: key_visualizer, status: currently_idle}' + description: Number of key_visualizer jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.key_visualizer.currently_paused + exported_name: jobs_key_visualizer_currently_paused + labeled_name: 'jobs{name: key_visualizer, status: currently_paused}' + description: Number of key_visualizer jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.key_visualizer.currently_running + exported_name: jobs_key_visualizer_currently_running + labeled_name: 'jobs{type: key_visualizer, status: currently_running}' + description: Number of key_visualizer jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.key_visualizer.expired_pts_records + exported_name: jobs_key_visualizer_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: key_visualizer}' + description: Number of expired protected timestamp records owned by key_visualizer jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.key_visualizer.fail_or_cancel_completed + exported_name: jobs_key_visualizer_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: key_visualizer, status: completed}' + description: Number of key_visualizer jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.key_visualizer.fail_or_cancel_failed + exported_name: jobs_key_visualizer_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: key_visualizer, status: failed}' + description: Number of key_visualizer jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.key_visualizer.fail_or_cancel_retry_error + exported_name: jobs_key_visualizer_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: key_visualizer, status: retry_error}' + description: Number of key_visualizer jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.key_visualizer.protected_age_sec + exported_name: jobs_key_visualizer_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: key_visualizer}' + description: The age of the oldest PTS record protected by key_visualizer jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.key_visualizer.protected_record_count + exported_name: jobs_key_visualizer_protected_record_count + labeled_name: 'jobs.protected_record_count{type: key_visualizer}' + description: Number of protected timestamp records held by key_visualizer jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.key_visualizer.resume_completed + exported_name: jobs_key_visualizer_resume_completed + labeled_name: 'jobs.resume{name: key_visualizer, status: completed}' + description: Number of key_visualizer jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.key_visualizer.resume_failed + exported_name: jobs_key_visualizer_resume_failed + labeled_name: 'jobs.resume{name: key_visualizer, status: failed}' + description: Number of key_visualizer jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.key_visualizer.resume_retry_error + exported_name: jobs_key_visualizer_resume_retry_error + labeled_name: 'jobs.resume{name: key_visualizer, status: retry_error}' + description: Number of key_visualizer jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.logical_replication.currently_idle + exported_name: jobs_logical_replication_currently_idle + labeled_name: 'jobs{type: logical_replication, status: currently_idle}' + description: Number of logical_replication jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.logical_replication.currently_paused + exported_name: jobs_logical_replication_currently_paused + labeled_name: 'jobs{name: logical_replication, status: currently_paused}' + description: Number of logical_replication jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.logical_replication.currently_running + exported_name: jobs_logical_replication_currently_running + labeled_name: 'jobs{type: logical_replication, status: currently_running}' + description: Number of logical_replication jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.logical_replication.expired_pts_records + exported_name: jobs_logical_replication_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: logical_replication}' + description: Number of expired protected timestamp records owned by logical_replication jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.logical_replication.fail_or_cancel_completed + exported_name: jobs_logical_replication_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: logical_replication, status: completed}' + description: Number of logical_replication jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.logical_replication.fail_or_cancel_failed + exported_name: jobs_logical_replication_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: logical_replication, status: failed}' + description: Number of logical_replication jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.logical_replication.fail_or_cancel_retry_error + exported_name: jobs_logical_replication_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: logical_replication, status: retry_error}' + description: Number of logical_replication jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.logical_replication.protected_age_sec + exported_name: jobs_logical_replication_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: logical_replication}' + description: The age of the oldest PTS record protected by logical_replication jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.logical_replication.protected_record_count + exported_name: jobs_logical_replication_protected_record_count + labeled_name: 'jobs.protected_record_count{type: logical_replication}' + description: Number of protected timestamp records held by logical_replication jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.logical_replication.resume_completed + exported_name: jobs_logical_replication_resume_completed + labeled_name: 'jobs.resume{name: logical_replication, status: completed}' + description: Number of logical_replication jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.logical_replication.resume_failed + exported_name: jobs_logical_replication_resume_failed + labeled_name: 'jobs.resume{name: logical_replication, status: failed}' + description: Number of logical_replication jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.logical_replication.resume_retry_error + exported_name: jobs_logical_replication_resume_retry_error + labeled_name: 'jobs.resume{name: logical_replication, status: retry_error}' + description: Number of logical_replication jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.metrics.task_failed + exported_name: jobs_metrics_task_failed + description: Number of metrics poller tasks that failed + y_axis_label: errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.migration.currently_idle + exported_name: jobs_migration_currently_idle + labeled_name: 'jobs{type: migration, status: currently_idle}' + description: Number of migration jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.migration.currently_paused + exported_name: jobs_migration_currently_paused + labeled_name: 'jobs{name: migration, status: currently_paused}' + description: Number of migration jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.migration.currently_running + exported_name: jobs_migration_currently_running + labeled_name: 'jobs{type: migration, status: currently_running}' + description: Number of migration jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.migration.expired_pts_records + exported_name: jobs_migration_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: migration}' + description: Number of expired protected timestamp records owned by migration jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.migration.fail_or_cancel_completed + exported_name: jobs_migration_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: migration, status: completed}' + description: Number of migration jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.migration.fail_or_cancel_failed + exported_name: jobs_migration_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: migration, status: failed}' + description: Number of migration jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.migration.fail_or_cancel_retry_error + exported_name: jobs_migration_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: migration, status: retry_error}' + description: Number of migration jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.migration.protected_age_sec + exported_name: jobs_migration_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: migration}' + description: The age of the oldest PTS record protected by migration jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.migration.protected_record_count + exported_name: jobs_migration_protected_record_count + labeled_name: 'jobs.protected_record_count{type: migration}' + description: Number of protected timestamp records held by migration jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.migration.resume_completed + exported_name: jobs_migration_resume_completed + labeled_name: 'jobs.resume{name: migration, status: completed}' + description: Number of migration jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.migration.resume_failed + exported_name: jobs_migration_resume_failed + labeled_name: 'jobs.resume{name: migration, status: failed}' + description: Number of migration jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.migration.resume_retry_error + exported_name: jobs_migration_resume_retry_error + labeled_name: 'jobs.resume{name: migration, status: retry_error}' + description: Number of migration jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.mvcc_statistics_update.currently_idle + exported_name: jobs_mvcc_statistics_update_currently_idle + labeled_name: 'jobs{type: mvcc_statistics_update, status: currently_idle}' + description: Number of mvcc_statistics_update jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.mvcc_statistics_update.currently_paused + exported_name: jobs_mvcc_statistics_update_currently_paused + labeled_name: 'jobs{name: mvcc_statistics_update, status: currently_paused}' + description: Number of mvcc_statistics_update jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.mvcc_statistics_update.currently_running + exported_name: jobs_mvcc_statistics_update_currently_running + labeled_name: 'jobs{type: mvcc_statistics_update, status: currently_running}' + description: Number of mvcc_statistics_update jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.mvcc_statistics_update.expired_pts_records + exported_name: jobs_mvcc_statistics_update_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: mvcc_statistics_update}' + description: Number of expired protected timestamp records owned by mvcc_statistics_update jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.mvcc_statistics_update.fail_or_cancel_completed + exported_name: jobs_mvcc_statistics_update_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: mvcc_statistics_update, status: completed}' + description: Number of mvcc_statistics_update jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.mvcc_statistics_update.fail_or_cancel_failed + exported_name: jobs_mvcc_statistics_update_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: mvcc_statistics_update, status: failed}' + description: Number of mvcc_statistics_update jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.mvcc_statistics_update.fail_or_cancel_retry_error + exported_name: jobs_mvcc_statistics_update_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: mvcc_statistics_update, status: retry_error}' + description: Number of mvcc_statistics_update jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.mvcc_statistics_update.protected_age_sec + exported_name: jobs_mvcc_statistics_update_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: mvcc_statistics_update}' + description: The age of the oldest PTS record protected by mvcc_statistics_update jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.mvcc_statistics_update.protected_record_count + exported_name: jobs_mvcc_statistics_update_protected_record_count + labeled_name: 'jobs.protected_record_count{type: mvcc_statistics_update}' + description: Number of protected timestamp records held by mvcc_statistics_update jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.mvcc_statistics_update.resume_completed + exported_name: jobs_mvcc_statistics_update_resume_completed + labeled_name: 'jobs.resume{name: mvcc_statistics_update, status: completed}' + description: Number of mvcc_statistics_update jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.mvcc_statistics_update.resume_failed + exported_name: jobs_mvcc_statistics_update_resume_failed + labeled_name: 'jobs.resume{name: mvcc_statistics_update, status: failed}' + description: Number of mvcc_statistics_update jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.mvcc_statistics_update.resume_retry_error + exported_name: jobs_mvcc_statistics_update_resume_retry_error + labeled_name: 'jobs.resume{name: mvcc_statistics_update, status: retry_error}' + description: Number of mvcc_statistics_update jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.new_schema_change.currently_idle + exported_name: jobs_new_schema_change_currently_idle + labeled_name: 'jobs{type: new_schema_change, status: currently_idle}' + description: Number of new_schema_change jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.new_schema_change.currently_paused + exported_name: jobs_new_schema_change_currently_paused + labeled_name: 'jobs{name: new_schema_change, status: currently_paused}' + description: Number of new_schema_change jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.new_schema_change.currently_running + exported_name: jobs_new_schema_change_currently_running + labeled_name: 'jobs{type: new_schema_change, status: currently_running}' + description: Number of new_schema_change jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.new_schema_change.expired_pts_records + exported_name: jobs_new_schema_change_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: new_schema_change}' + description: Number of expired protected timestamp records owned by new_schema_change jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.new_schema_change.fail_or_cancel_completed + exported_name: jobs_new_schema_change_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: new_schema_change, status: completed}' + description: Number of new_schema_change jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.new_schema_change.fail_or_cancel_failed + exported_name: jobs_new_schema_change_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: new_schema_change, status: failed}' + description: Number of new_schema_change jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.new_schema_change.fail_or_cancel_retry_error + exported_name: jobs_new_schema_change_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: new_schema_change, status: retry_error}' + description: Number of new_schema_change jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.new_schema_change.protected_age_sec + exported_name: jobs_new_schema_change_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: new_schema_change}' + description: The age of the oldest PTS record protected by new_schema_change jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.new_schema_change.protected_record_count + exported_name: jobs_new_schema_change_protected_record_count + labeled_name: 'jobs.protected_record_count{type: new_schema_change}' + description: Number of protected timestamp records held by new_schema_change jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.new_schema_change.resume_completed + exported_name: jobs_new_schema_change_resume_completed + labeled_name: 'jobs.resume{name: new_schema_change, status: completed}' + description: Number of new_schema_change jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.new_schema_change.resume_failed + exported_name: jobs_new_schema_change_resume_failed + labeled_name: 'jobs.resume{name: new_schema_change, status: failed}' + description: Number of new_schema_change jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.new_schema_change.resume_retry_error + exported_name: jobs_new_schema_change_resume_retry_error + labeled_name: 'jobs.resume{name: new_schema_change, status: retry_error}' + description: Number of new_schema_change jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.poll_jobs_stats.currently_idle + exported_name: jobs_poll_jobs_stats_currently_idle + labeled_name: 'jobs{type: poll_jobs_stats, status: currently_idle}' + description: Number of poll_jobs_stats jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.poll_jobs_stats.currently_paused + exported_name: jobs_poll_jobs_stats_currently_paused + labeled_name: 'jobs{name: poll_jobs_stats, status: currently_paused}' + description: Number of poll_jobs_stats jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.poll_jobs_stats.currently_running + exported_name: jobs_poll_jobs_stats_currently_running + labeled_name: 'jobs{type: poll_jobs_stats, status: currently_running}' + description: Number of poll_jobs_stats jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.poll_jobs_stats.expired_pts_records + exported_name: jobs_poll_jobs_stats_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: poll_jobs_stats}' + description: Number of expired protected timestamp records owned by poll_jobs_stats jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.poll_jobs_stats.fail_or_cancel_completed + exported_name: jobs_poll_jobs_stats_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: poll_jobs_stats, status: completed}' + description: Number of poll_jobs_stats jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.poll_jobs_stats.fail_or_cancel_failed + exported_name: jobs_poll_jobs_stats_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: poll_jobs_stats, status: failed}' + description: Number of poll_jobs_stats jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.poll_jobs_stats.fail_or_cancel_retry_error + exported_name: jobs_poll_jobs_stats_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: poll_jobs_stats, status: retry_error}' + description: Number of poll_jobs_stats jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.poll_jobs_stats.protected_age_sec + exported_name: jobs_poll_jobs_stats_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: poll_jobs_stats}' + description: The age of the oldest PTS record protected by poll_jobs_stats jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.poll_jobs_stats.protected_record_count + exported_name: jobs_poll_jobs_stats_protected_record_count + labeled_name: 'jobs.protected_record_count{type: poll_jobs_stats}' + description: Number of protected timestamp records held by poll_jobs_stats jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.poll_jobs_stats.resume_completed + exported_name: jobs_poll_jobs_stats_resume_completed + labeled_name: 'jobs.resume{name: poll_jobs_stats, status: completed}' + description: Number of poll_jobs_stats jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.poll_jobs_stats.resume_failed + exported_name: jobs_poll_jobs_stats_resume_failed + labeled_name: 'jobs.resume{name: poll_jobs_stats, status: failed}' + description: Number of poll_jobs_stats jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.poll_jobs_stats.resume_retry_error + exported_name: jobs_poll_jobs_stats_resume_retry_error + labeled_name: 'jobs.resume{name: poll_jobs_stats, status: retry_error}' + description: Number of poll_jobs_stats jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.replication_stream_ingestion.currently_idle + exported_name: jobs_replication_stream_ingestion_currently_idle + labeled_name: 'jobs{type: replication_stream_ingestion, status: currently_idle}' + description: Number of replication_stream_ingestion jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.replication_stream_ingestion.currently_paused + exported_name: jobs_replication_stream_ingestion_currently_paused + labeled_name: 'jobs{name: replication_stream_ingestion, status: currently_paused}' + description: Number of replication_stream_ingestion jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.replication_stream_ingestion.currently_running + exported_name: jobs_replication_stream_ingestion_currently_running + labeled_name: 'jobs{type: replication_stream_ingestion, status: currently_running}' + description: Number of replication_stream_ingestion jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.replication_stream_ingestion.expired_pts_records + exported_name: jobs_replication_stream_ingestion_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: replication_stream_ingestion}' + description: Number of expired protected timestamp records owned by replication_stream_ingestion jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.replication_stream_ingestion.fail_or_cancel_completed + exported_name: jobs_replication_stream_ingestion_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: replication_stream_ingestion, status: completed}' + description: Number of replication_stream_ingestion jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.replication_stream_ingestion.fail_or_cancel_failed + exported_name: jobs_replication_stream_ingestion_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: replication_stream_ingestion, status: failed}' + description: Number of replication_stream_ingestion jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.replication_stream_ingestion.fail_or_cancel_retry_error + exported_name: jobs_replication_stream_ingestion_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: replication_stream_ingestion, status: retry_error}' + description: Number of replication_stream_ingestion jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.replication_stream_ingestion.protected_age_sec + exported_name: jobs_replication_stream_ingestion_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: replication_stream_ingestion}' + description: The age of the oldest PTS record protected by replication_stream_ingestion jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.replication_stream_ingestion.protected_record_count + exported_name: jobs_replication_stream_ingestion_protected_record_count + labeled_name: 'jobs.protected_record_count{type: replication_stream_ingestion}' + description: Number of protected timestamp records held by replication_stream_ingestion jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.replication_stream_ingestion.resume_completed + exported_name: jobs_replication_stream_ingestion_resume_completed + labeled_name: 'jobs.resume{name: replication_stream_ingestion, status: completed}' + description: Number of replication_stream_ingestion jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.replication_stream_ingestion.resume_failed + exported_name: jobs_replication_stream_ingestion_resume_failed + labeled_name: 'jobs.resume{name: replication_stream_ingestion, status: failed}' + description: Number of replication_stream_ingestion jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.replication_stream_ingestion.resume_retry_error + exported_name: jobs_replication_stream_ingestion_resume_retry_error + labeled_name: 'jobs.resume{name: replication_stream_ingestion, status: retry_error}' + description: Number of replication_stream_ingestion jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.replication_stream_producer.currently_idle + exported_name: jobs_replication_stream_producer_currently_idle + labeled_name: 'jobs{type: replication_stream_producer, status: currently_idle}' + description: Number of replication_stream_producer jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.replication_stream_producer.currently_paused + exported_name: jobs_replication_stream_producer_currently_paused + labeled_name: 'jobs{name: replication_stream_producer, status: currently_paused}' + description: Number of replication_stream_producer jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.replication_stream_producer.currently_running + exported_name: jobs_replication_stream_producer_currently_running + labeled_name: 'jobs{type: replication_stream_producer, status: currently_running}' + description: Number of replication_stream_producer jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.replication_stream_producer.expired_pts_records + exported_name: jobs_replication_stream_producer_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: replication_stream_producer}' + description: Number of expired protected timestamp records owned by replication_stream_producer jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.replication_stream_producer.fail_or_cancel_completed + exported_name: jobs_replication_stream_producer_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: replication_stream_producer, status: completed}' + description: Number of replication_stream_producer jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.replication_stream_producer.fail_or_cancel_failed + exported_name: jobs_replication_stream_producer_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: replication_stream_producer, status: failed}' + description: Number of replication_stream_producer jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.replication_stream_producer.fail_or_cancel_retry_error + exported_name: jobs_replication_stream_producer_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: replication_stream_producer, status: retry_error}' + description: Number of replication_stream_producer jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.replication_stream_producer.protected_age_sec + exported_name: jobs_replication_stream_producer_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: replication_stream_producer}' + description: The age of the oldest PTS record protected by replication_stream_producer jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.replication_stream_producer.protected_record_count + exported_name: jobs_replication_stream_producer_protected_record_count + labeled_name: 'jobs.protected_record_count{type: replication_stream_producer}' + description: Number of protected timestamp records held by replication_stream_producer jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.replication_stream_producer.resume_completed + exported_name: jobs_replication_stream_producer_resume_completed + labeled_name: 'jobs.resume{name: replication_stream_producer, status: completed}' + description: Number of replication_stream_producer jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.replication_stream_producer.resume_failed + exported_name: jobs_replication_stream_producer_resume_failed + labeled_name: 'jobs.resume{name: replication_stream_producer, status: failed}' + description: Number of replication_stream_producer jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.replication_stream_producer.resume_retry_error + exported_name: jobs_replication_stream_producer_resume_retry_error + labeled_name: 'jobs.resume{name: replication_stream_producer, status: retry_error}' + description: Number of replication_stream_producer jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.restore.currently_idle + exported_name: jobs_restore_currently_idle + labeled_name: 'jobs{type: restore, status: currently_idle}' + description: Number of restore jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.restore.currently_paused + exported_name: jobs_restore_currently_paused + labeled_name: 'jobs{name: restore, status: currently_paused}' + description: Number of restore jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.restore.currently_running + exported_name: jobs_restore_currently_running + labeled_name: 'jobs{type: restore, status: currently_running}' + description: Number of restore jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.restore.expired_pts_records + exported_name: jobs_restore_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: restore}' + description: Number of expired protected timestamp records owned by restore jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.restore.fail_or_cancel_completed + exported_name: jobs_restore_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: restore, status: completed}' + description: Number of restore jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.restore.fail_or_cancel_failed + exported_name: jobs_restore_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: restore, status: failed}' + description: Number of restore jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.restore.fail_or_cancel_retry_error + exported_name: jobs_restore_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: restore, status: retry_error}' + description: Number of restore jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.restore.protected_age_sec + exported_name: jobs_restore_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: restore}' + description: The age of the oldest PTS record protected by restore jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.restore.protected_record_count + exported_name: jobs_restore_protected_record_count + labeled_name: 'jobs.protected_record_count{type: restore}' + description: Number of protected timestamp records held by restore jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.restore.resume_completed + exported_name: jobs_restore_resume_completed + labeled_name: 'jobs.resume{name: restore, status: completed}' + description: Number of restore jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.restore.resume_failed + exported_name: jobs_restore_resume_failed + labeled_name: 'jobs.resume{name: restore, status: failed}' + description: Number of restore jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.restore.resume_retry_error + exported_name: jobs_restore_resume_retry_error + labeled_name: 'jobs.resume{name: restore, status: retry_error}' + description: Number of restore jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.resumed_claimed_jobs + exported_name: jobs_resumed_claimed_jobs + description: number of claimed-jobs resumed in job-adopt iterations + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.row_level_ttl.currently_idle + exported_name: jobs_row_level_ttl_currently_idle + labeled_name: 'jobs{type: row_level_ttl, status: currently_idle}' + description: Number of row_level_ttl jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.row_level_ttl.expired_pts_records + exported_name: jobs_row_level_ttl_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: row_level_ttl}' + description: Number of expired protected timestamp records owned by row_level_ttl jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.row_level_ttl.fail_or_cancel_completed + exported_name: jobs_row_level_ttl_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: row_level_ttl, status: completed}' + description: Number of row_level_ttl jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.row_level_ttl.fail_or_cancel_failed + exported_name: jobs_row_level_ttl_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: row_level_ttl, status: failed}' + description: Number of row_level_ttl jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.row_level_ttl.fail_or_cancel_retry_error + exported_name: jobs_row_level_ttl_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: row_level_ttl, status: retry_error}' + description: Number of row_level_ttl jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.row_level_ttl.num_delete_batch_retries + exported_name: jobs_row_level_ttl_num_delete_batch_retries + description: Number of times the row level TTL job had to reduce the delete batch size and retry. + y_axis_label: num_retries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.row_level_ttl.protected_age_sec + exported_name: jobs_row_level_ttl_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: row_level_ttl}' + description: The age of the oldest PTS record protected by row_level_ttl jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.row_level_ttl.protected_record_count + exported_name: jobs_row_level_ttl_protected_record_count + labeled_name: 'jobs.protected_record_count{type: row_level_ttl}' + description: Number of protected timestamp records held by row_level_ttl jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.row_level_ttl.resume_retry_error + exported_name: jobs_row_level_ttl_resume_retry_error + labeled_name: 'jobs.resume{name: row_level_ttl, status: retry_error}' + description: Number of row_level_ttl jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.running_non_idle + exported_name: jobs_running_non_idle + description: number of running jobs that are not idle + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.schema_change.currently_idle + exported_name: jobs_schema_change_currently_idle + labeled_name: 'jobs{type: schema_change, status: currently_idle}' + description: Number of schema_change jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.schema_change.currently_paused + exported_name: jobs_schema_change_currently_paused + labeled_name: 'jobs{name: schema_change, status: currently_paused}' + description: Number of schema_change jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.schema_change.currently_running + exported_name: jobs_schema_change_currently_running + labeled_name: 'jobs{type: schema_change, status: currently_running}' + description: Number of schema_change jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.schema_change.expired_pts_records + exported_name: jobs_schema_change_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: schema_change}' + description: Number of expired protected timestamp records owned by schema_change jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.schema_change.fail_or_cancel_completed + exported_name: jobs_schema_change_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: schema_change, status: completed}' + description: Number of schema_change jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.schema_change.fail_or_cancel_failed + exported_name: jobs_schema_change_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: schema_change, status: failed}' + description: Number of schema_change jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.schema_change.fail_or_cancel_retry_error + exported_name: jobs_schema_change_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: schema_change, status: retry_error}' + description: Number of schema_change jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.schema_change.protected_age_sec + exported_name: jobs_schema_change_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: schema_change}' + description: The age of the oldest PTS record protected by schema_change jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.schema_change.protected_record_count + exported_name: jobs_schema_change_protected_record_count + labeled_name: 'jobs.protected_record_count{type: schema_change}' + description: Number of protected timestamp records held by schema_change jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.schema_change.resume_completed + exported_name: jobs_schema_change_resume_completed + labeled_name: 'jobs.resume{name: schema_change, status: completed}' + description: Number of schema_change jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.schema_change.resume_failed + exported_name: jobs_schema_change_resume_failed + labeled_name: 'jobs.resume{name: schema_change, status: failed}' + description: Number of schema_change jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.schema_change.resume_retry_error + exported_name: jobs_schema_change_resume_retry_error + labeled_name: 'jobs.resume{name: schema_change, status: retry_error}' + description: Number of schema_change jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.schema_change_gc.currently_idle + exported_name: jobs_schema_change_gc_currently_idle + labeled_name: 'jobs{type: schema_change_gc, status: currently_idle}' + description: Number of schema_change_gc jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.schema_change_gc.currently_paused + exported_name: jobs_schema_change_gc_currently_paused + labeled_name: 'jobs{name: schema_change_gc, status: currently_paused}' + description: Number of schema_change_gc jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.schema_change_gc.currently_running + exported_name: jobs_schema_change_gc_currently_running + labeled_name: 'jobs{type: schema_change_gc, status: currently_running}' + description: Number of schema_change_gc jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.schema_change_gc.expired_pts_records + exported_name: jobs_schema_change_gc_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: schema_change_gc}' + description: Number of expired protected timestamp records owned by schema_change_gc jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.schema_change_gc.fail_or_cancel_completed + exported_name: jobs_schema_change_gc_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: schema_change_gc, status: completed}' + description: Number of schema_change_gc jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.schema_change_gc.fail_or_cancel_failed + exported_name: jobs_schema_change_gc_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: schema_change_gc, status: failed}' + description: Number of schema_change_gc jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.schema_change_gc.fail_or_cancel_retry_error + exported_name: jobs_schema_change_gc_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: schema_change_gc, status: retry_error}' + description: Number of schema_change_gc jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.schema_change_gc.protected_age_sec + exported_name: jobs_schema_change_gc_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: schema_change_gc}' + description: The age of the oldest PTS record protected by schema_change_gc jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.schema_change_gc.protected_record_count + exported_name: jobs_schema_change_gc_protected_record_count + labeled_name: 'jobs.protected_record_count{type: schema_change_gc}' + description: Number of protected timestamp records held by schema_change_gc jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.schema_change_gc.resume_completed + exported_name: jobs_schema_change_gc_resume_completed + labeled_name: 'jobs.resume{name: schema_change_gc, status: completed}' + description: Number of schema_change_gc jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.schema_change_gc.resume_failed + exported_name: jobs_schema_change_gc_resume_failed + labeled_name: 'jobs.resume{name: schema_change_gc, status: failed}' + description: Number of schema_change_gc jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.schema_change_gc.resume_retry_error + exported_name: jobs_schema_change_gc_resume_retry_error + labeled_name: 'jobs.resume{name: schema_change_gc, status: retry_error}' + description: Number of schema_change_gc jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.sql_activity_flush.currently_idle + exported_name: jobs_sql_activity_flush_currently_idle + labeled_name: 'jobs{type: sql_activity_flush, status: currently_idle}' + description: Number of sql_activity_flush jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.sql_activity_flush.currently_paused + exported_name: jobs_sql_activity_flush_currently_paused + labeled_name: 'jobs{name: sql_activity_flush, status: currently_paused}' + description: Number of sql_activity_flush jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.sql_activity_flush.currently_running + exported_name: jobs_sql_activity_flush_currently_running + labeled_name: 'jobs{type: sql_activity_flush, status: currently_running}' + description: Number of sql_activity_flush jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.sql_activity_flush.expired_pts_records + exported_name: jobs_sql_activity_flush_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: sql_activity_flush}' + description: Number of expired protected timestamp records owned by sql_activity_flush jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.sql_activity_flush.fail_or_cancel_completed + exported_name: jobs_sql_activity_flush_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: sql_activity_flush, status: completed}' + description: Number of sql_activity_flush jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.sql_activity_flush.fail_or_cancel_failed + exported_name: jobs_sql_activity_flush_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: sql_activity_flush, status: failed}' + description: Number of sql_activity_flush jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.sql_activity_flush.fail_or_cancel_retry_error + exported_name: jobs_sql_activity_flush_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: sql_activity_flush, status: retry_error}' + description: Number of sql_activity_flush jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.sql_activity_flush.protected_age_sec + exported_name: jobs_sql_activity_flush_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: sql_activity_flush}' + description: The age of the oldest PTS record protected by sql_activity_flush jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.sql_activity_flush.protected_record_count + exported_name: jobs_sql_activity_flush_protected_record_count + labeled_name: 'jobs.protected_record_count{type: sql_activity_flush}' + description: Number of protected timestamp records held by sql_activity_flush jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.sql_activity_flush.resume_completed + exported_name: jobs_sql_activity_flush_resume_completed + labeled_name: 'jobs.resume{name: sql_activity_flush, status: completed}' + description: Number of sql_activity_flush jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.sql_activity_flush.resume_failed + exported_name: jobs_sql_activity_flush_resume_failed + labeled_name: 'jobs.resume{name: sql_activity_flush, status: failed}' + description: Number of sql_activity_flush jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.sql_activity_flush.resume_retry_error + exported_name: jobs_sql_activity_flush_resume_retry_error + labeled_name: 'jobs.resume{name: sql_activity_flush, status: retry_error}' + description: Number of sql_activity_flush jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.standby_read_ts_poller.currently_idle + exported_name: jobs_standby_read_ts_poller_currently_idle + labeled_name: 'jobs{type: standby_read_ts_poller, status: currently_idle}' + description: Number of standby_read_ts_poller jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.standby_read_ts_poller.currently_paused + exported_name: jobs_standby_read_ts_poller_currently_paused + labeled_name: 'jobs{name: standby_read_ts_poller, status: currently_paused}' + description: Number of standby_read_ts_poller jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.standby_read_ts_poller.currently_running + exported_name: jobs_standby_read_ts_poller_currently_running + labeled_name: 'jobs{type: standby_read_ts_poller, status: currently_running}' + description: Number of standby_read_ts_poller jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.standby_read_ts_poller.expired_pts_records + exported_name: jobs_standby_read_ts_poller_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: standby_read_ts_poller}' + description: Number of expired protected timestamp records owned by standby_read_ts_poller jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.standby_read_ts_poller.fail_or_cancel_completed + exported_name: jobs_standby_read_ts_poller_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: standby_read_ts_poller, status: completed}' + description: Number of standby_read_ts_poller jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.standby_read_ts_poller.fail_or_cancel_failed + exported_name: jobs_standby_read_ts_poller_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: standby_read_ts_poller, status: failed}' + description: Number of standby_read_ts_poller jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.standby_read_ts_poller.fail_or_cancel_retry_error + exported_name: jobs_standby_read_ts_poller_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: standby_read_ts_poller, status: retry_error}' + description: Number of standby_read_ts_poller jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.standby_read_ts_poller.protected_age_sec + exported_name: jobs_standby_read_ts_poller_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: standby_read_ts_poller}' + description: The age of the oldest PTS record protected by standby_read_ts_poller jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.standby_read_ts_poller.protected_record_count + exported_name: jobs_standby_read_ts_poller_protected_record_count + labeled_name: 'jobs.protected_record_count{type: standby_read_ts_poller}' + description: Number of protected timestamp records held by standby_read_ts_poller jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.standby_read_ts_poller.resume_completed + exported_name: jobs_standby_read_ts_poller_resume_completed + labeled_name: 'jobs.resume{name: standby_read_ts_poller, status: completed}' + description: Number of standby_read_ts_poller jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.standby_read_ts_poller.resume_failed + exported_name: jobs_standby_read_ts_poller_resume_failed + labeled_name: 'jobs.resume{name: standby_read_ts_poller, status: failed}' + description: Number of standby_read_ts_poller jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.standby_read_ts_poller.resume_retry_error + exported_name: jobs_standby_read_ts_poller_resume_retry_error + labeled_name: 'jobs.resume{name: standby_read_ts_poller, status: retry_error}' + description: Number of standby_read_ts_poller jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.typedesc_schema_change.currently_idle + exported_name: jobs_typedesc_schema_change_currently_idle + labeled_name: 'jobs{type: typedesc_schema_change, status: currently_idle}' + description: Number of typedesc_schema_change jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.typedesc_schema_change.currently_paused + exported_name: jobs_typedesc_schema_change_currently_paused + labeled_name: 'jobs{name: typedesc_schema_change, status: currently_paused}' + description: Number of typedesc_schema_change jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.typedesc_schema_change.currently_running + exported_name: jobs_typedesc_schema_change_currently_running + labeled_name: 'jobs{type: typedesc_schema_change, status: currently_running}' + description: Number of typedesc_schema_change jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.typedesc_schema_change.expired_pts_records + exported_name: jobs_typedesc_schema_change_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: typedesc_schema_change}' + description: Number of expired protected timestamp records owned by typedesc_schema_change jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.typedesc_schema_change.fail_or_cancel_completed + exported_name: jobs_typedesc_schema_change_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: typedesc_schema_change, status: completed}' + description: Number of typedesc_schema_change jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.typedesc_schema_change.fail_or_cancel_failed + exported_name: jobs_typedesc_schema_change_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: typedesc_schema_change, status: failed}' + description: Number of typedesc_schema_change jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.typedesc_schema_change.fail_or_cancel_retry_error + exported_name: jobs_typedesc_schema_change_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: typedesc_schema_change, status: retry_error}' + description: Number of typedesc_schema_change jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.typedesc_schema_change.protected_age_sec + exported_name: jobs_typedesc_schema_change_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: typedesc_schema_change}' + description: The age of the oldest PTS record protected by typedesc_schema_change jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.typedesc_schema_change.protected_record_count + exported_name: jobs_typedesc_schema_change_protected_record_count + labeled_name: 'jobs.protected_record_count{type: typedesc_schema_change}' + description: Number of protected timestamp records held by typedesc_schema_change jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.typedesc_schema_change.resume_completed + exported_name: jobs_typedesc_schema_change_resume_completed + labeled_name: 'jobs.resume{name: typedesc_schema_change, status: completed}' + description: Number of typedesc_schema_change jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.typedesc_schema_change.resume_failed + exported_name: jobs_typedesc_schema_change_resume_failed + labeled_name: 'jobs.resume{name: typedesc_schema_change, status: failed}' + description: Number of typedesc_schema_change jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.typedesc_schema_change.resume_retry_error + exported_name: jobs_typedesc_schema_change_resume_retry_error + labeled_name: 'jobs.resume{name: typedesc_schema_change, status: retry_error}' + description: Number of typedesc_schema_change jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.update_table_metadata_cache.currently_idle + exported_name: jobs_update_table_metadata_cache_currently_idle + labeled_name: 'jobs{type: update_table_metadata_cache, status: currently_idle}' + description: Number of update_table_metadata_cache jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.update_table_metadata_cache.currently_paused + exported_name: jobs_update_table_metadata_cache_currently_paused + labeled_name: 'jobs{name: update_table_metadata_cache, status: currently_paused}' + description: Number of update_table_metadata_cache jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.update_table_metadata_cache.currently_running + exported_name: jobs_update_table_metadata_cache_currently_running + labeled_name: 'jobs{type: update_table_metadata_cache, status: currently_running}' + description: Number of update_table_metadata_cache jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.update_table_metadata_cache.expired_pts_records + exported_name: jobs_update_table_metadata_cache_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: update_table_metadata_cache}' + description: Number of expired protected timestamp records owned by update_table_metadata_cache jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.update_table_metadata_cache.fail_or_cancel_completed + exported_name: jobs_update_table_metadata_cache_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: update_table_metadata_cache, status: completed}' + description: Number of update_table_metadata_cache jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.update_table_metadata_cache.fail_or_cancel_failed + exported_name: jobs_update_table_metadata_cache_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: update_table_metadata_cache, status: failed}' + description: Number of update_table_metadata_cache jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.update_table_metadata_cache.fail_or_cancel_retry_error + exported_name: jobs_update_table_metadata_cache_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: update_table_metadata_cache, status: retry_error}' + description: Number of update_table_metadata_cache jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.update_table_metadata_cache.protected_age_sec + exported_name: jobs_update_table_metadata_cache_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: update_table_metadata_cache}' + description: The age of the oldest PTS record protected by update_table_metadata_cache jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.update_table_metadata_cache.protected_record_count + exported_name: jobs_update_table_metadata_cache_protected_record_count + labeled_name: 'jobs.protected_record_count{type: update_table_metadata_cache}' + description: Number of protected timestamp records held by update_table_metadata_cache jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.update_table_metadata_cache.resume_completed + exported_name: jobs_update_table_metadata_cache_resume_completed + labeled_name: 'jobs.resume{name: update_table_metadata_cache, status: completed}' + description: Number of update_table_metadata_cache jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.update_table_metadata_cache.resume_failed + exported_name: jobs_update_table_metadata_cache_resume_failed + labeled_name: 'jobs.resume{name: update_table_metadata_cache, status: failed}' + description: Number of update_table_metadata_cache jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.update_table_metadata_cache.resume_retry_error + exported_name: jobs_update_table_metadata_cache_resume_retry_error + labeled_name: 'jobs.resume{name: update_table_metadata_cache, status: retry_error}' + description: Number of update_table_metadata_cache jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.protectedts.reconciliation.errors + exported_name: kv_protectedts_reconciliation_errors + description: number of errors encountered during reconciliation runs on this node + y_axis_label: Count + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.protectedts.reconciliation.num_runs + exported_name: kv_protectedts_reconciliation_num_runs + description: number of successful reconciliation runs on this node + y_axis_label: Count + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.protectedts.reconciliation.records_processed + exported_name: kv_protectedts_reconciliation_records_processed + description: number of records processed without error during reconciliation on this node + y_axis_label: Count + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.protectedts.reconciliation.records_removed + exported_name: kv_protectedts_reconciliation_records_removed + description: number of records removed during reconciliation runs on this node + y_axis_label: Count + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.streamer.batches.in_progress + exported_name: kv_streamer_batches_in_progress + description: Number of BatchRequests in progress across all KV Streamer operators + y_axis_label: Batches + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.streamer.batches.sent + exported_name: kv_streamer_batches_sent + description: Number of BatchRequests sent across all KV Streamer operators + y_axis_label: Batches + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.streamer.batches.throttled + exported_name: kv_streamer_batches_throttled + description: Number of BatchRequests currently being throttled due to reaching the concurrency limit, across all KV Streamer operators + y_axis_label: Batches + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.streamer.operators.active + exported_name: kv_streamer_operators_active + description: Number of KV Streamer operators currently in use + y_axis_label: Operators + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: logical_replication.batch_hist_nanos + exported_name: logical_replication_batch_hist_nanos + description: Time spent per row flushing a batch + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: logical_replication.catchup_ranges + exported_name: logical_replication_catchup_ranges + description: Source side ranges undergoing catch up scans (inaccurate with multiple LDR jobs) + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: logical_replication.catchup_ranges_by_label + exported_name: logical_replication_catchup_ranges_by_label + description: Source side ranges undergoing catch up scans + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: logical_replication.checkpoint_events_ingested + exported_name: logical_replication_checkpoint_events_ingested + description: Checkpoint events ingested by all replication jobs + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: logical_replication.commit_latency + exported_name: logical_replication_commit_latency + description: 'Event commit latency: a difference between event MVCC timestamp and the time it was flushed into disk. If we batch events, then the difference between the oldest event in the batch and flush is recorded' + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: logical_replication.events_dlqed + exported_name: logical_replication_events_dlqed + description: Row update events sent to DLQ + y_axis_label: Failures + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: logical_replication.events_dlqed_age + exported_name: logical_replication_events_dlqed_age + description: Row update events sent to DLQ due to reaching the maximum time allowed in the retry queue + y_axis_label: Failures + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: logical_replication.events_dlqed_by_label + exported_name: logical_replication_events_dlqed_by_label + description: Row update events sent to DLQ by label + y_axis_label: Failures + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: logical_replication.events_dlqed_errtype + exported_name: logical_replication_events_dlqed_errtype + description: Row update events sent to DLQ due to an error not considered retryable + y_axis_label: Failures + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: logical_replication.events_dlqed_space + exported_name: logical_replication_events_dlqed_space + description: Row update events sent to DLQ due to capacity of the retry queue + y_axis_label: Failures + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: logical_replication.events_ingested + exported_name: logical_replication_events_ingested + description: Events ingested by all replication jobs + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: logical_replication.events_ingested_by_label + exported_name: logical_replication_events_ingested_by_label + description: Events ingested by all replication jobs by label + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: logical_replication.events_initial_failure + exported_name: logical_replication_events_initial_failure + description: Failed attempts to apply an incoming row update + y_axis_label: Failures + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: logical_replication.events_initial_success + exported_name: logical_replication_events_initial_success + description: Successful applications of an incoming row update + y_axis_label: Failures + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: logical_replication.events_retry_failure + exported_name: logical_replication_events_retry_failure + description: Failed re-attempts to apply a row update + y_axis_label: Failures + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: logical_replication.events_retry_success + exported_name: logical_replication_events_retry_success + description: Row update events applied after one or more retries + y_axis_label: Failures + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: logical_replication.kv.update_too_old + exported_name: logical_replication_kv_update_too_old + description: Total number of updates that were not applied because they were too old + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: logical_replication.kv.value_refreshes + exported_name: logical_replication_kv_value_refreshes + description: Total number of batches that refreshed the previous value + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: logical_replication.logical_bytes + exported_name: logical_replication_logical_bytes + description: Logical bytes (sum of keys + values) received by all replication jobs + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: logical_replication.replan_count + exported_name: logical_replication_replan_count + description: Total number of dist sql replanning events + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: logical_replication.replicated_time_by_label + exported_name: logical_replication_replicated_time_by_label + description: Replicated time of the logical replication stream by label + y_axis_label: Seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: logical_replication.replicated_time_seconds + exported_name: logical_replication_replicated_time_seconds + description: The replicated time of the logical replication stream in seconds since the unix epoch. + y_axis_label: Seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: logical_replication.retry_queue_bytes + exported_name: logical_replication_retry_queue_bytes + description: Logical bytes (sum of keys+values) in the retry queue + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: logical_replication.retry_queue_events + exported_name: logical_replication_retry_queue_events + description: Row update events in the retry queue + y_axis_label: Events + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: logical_replication.scanning_ranges + exported_name: logical_replication_scanning_ranges + description: Source side ranges undergoing an initial scan (inaccurate with multiple LDR jobs) + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: logical_replication.scanning_ranges_by_label + exported_name: logical_replication_scanning_ranges_by_label + description: Source side ranges undergoing an initial scan + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: obs.tablemetadata.update_job.duration + exported_name: obs_tablemetadata_update_job_duration + description: Time spent running the update table metadata job. + y_axis_label: Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: obs.tablemetadata.update_job.errors + exported_name: obs_tablemetadata_update_job_errors + description: The total number of errors that have been emitted from the update table metadata job. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: obs.tablemetadata.update_job.runs + exported_name: obs_tablemetadata_update_job_runs + description: The total number of runs of the update table metadata job. + y_axis_label: Executions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: obs.tablemetadata.update_job.table_updates + exported_name: obs_tablemetadata_update_job_table_updates + description: The total number of rows that have been updated in system.table_metadata + y_axis_label: Rows Updated + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: physical_replication.admit_latency + exported_name: physical_replication_admit_latency + description: 'Event admission latency: a difference between event MVCC timestamp and the time it was admitted into ingestion processor' + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: physical_replication.commit_latency + exported_name: physical_replication_commit_latency + description: 'Event commit latency: a difference between event MVCC timestamp and the time it was flushed into disk. If we batch events, then the difference between the oldest event in the batch and flush is recorded' + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: physical_replication.distsql_replan_count + exported_name: physical_replication_distsql_replan_count + description: Total number of dist sql replanning events + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: physical_replication.events_ingested + exported_name: physical_replication_events_ingested + description: Events ingested by all replication jobs + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: physical_replication.failover_progress + exported_name: physical_replication_failover_progress + description: The number of ranges left to revert in order to complete an inflight cutover + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: physical_replication.flush_hist_nanos + exported_name: physical_replication_flush_hist_nanos + description: Time spent flushing messages across all replication streams + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: physical_replication.flushes + exported_name: physical_replication_flushes + description: Total flushes across all replication jobs + y_axis_label: Flushes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: physical_replication.logical_bytes + exported_name: physical_replication_logical_bytes + description: Logical bytes (sum of keys + values) ingested by all replication jobs + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: physical_replication.replicated_time_seconds + exported_name: physical_replication_replicated_time_seconds + description: The replicated time of the physical replication stream in seconds since the unix epoch. + y_axis_label: Seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: physical_replication.resolved_events_ingested + exported_name: physical_replication_resolved_events_ingested + description: Resolved events ingested by all replication jobs + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: physical_replication.running + exported_name: physical_replication_running + description: Number of currently running replication streams + y_axis_label: Replication Streams + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: requests.slow.distsender + exported_name: requests_slow_distsender + description: |- + Number of range-bound RPCs currently stuck or retrying for a long time. + + Note that this is not a good signal for KV health. The remote side of the + RPCs tracked here may experience contention, so an end user can easily + cause values for this metric to be emitted by leaving a transaction open + for a long time and contending with it using a second transaction. + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: round-trip-latency + exported_name: round_trip_latency + description: | + Distribution of round-trip latencies with other nodes. + + This only reflects successful heartbeats and measures gRPC overhead as well as + possible head-of-line blocking. Elevated values in this metric may hint at + network issues and/or saturation, but they are no proof of them. CPU overload + can similarly elevate this metric. The operator should look towards OS-level + metrics such as packet loss, retransmits, etc, to conclusively diagnose network + issues. Heartbeats are not very frequent (~seconds), so they may not capture + rare or short-lived degradations. + y_axis_label: Round-trip time + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: rpc.client.bytes.egress + exported_name: rpc_client_bytes_egress + description: Counter of TCP bytes sent via gRPC on connections we initiated. + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.client.bytes.ingress + exported_name: rpc_client_bytes_ingress + description: Counter of TCP bytes received via gRPC on connections we initiated. + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.connection.connected + exported_name: rpc_connection_connected + description: | + Counter of TCP level connected connections. + + This metric is the number of gRPC connections from the TCP level. Unlike rpc.connection.healthy + this metric does not take into account whether the application has been able to heartbeat + over this connection. + y_axis_label: Connections + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: rpc.connection.inactive + exported_name: rpc_connection_inactive + description: Gauge of current connections in an inactive state and pending deletion; these are not healthy but are not tracked as unhealthy either because there is reason to believe that the connection is no longer relevant,for example if the node has since been seen under a new address + y_axis_label: Connections + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: rpc.server.request.duration.nanos + exported_name: rpc_server_request_duration_nanos + description: Duration of an grpc request in nanoseconds. + y_axis_label: Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: schedules.BACKUP.last-completed-time-by-virtual_cluster + exported_name: schedules_BACKUP_last_completed_time_by_virtual_cluster + description: The unix timestamp of the most recently completed host scheduled backup by virtual cluster specified as maintaining this metric + y_axis_label: Jobs + type: GAUGE + unit: TIMESTAMP_SEC + aggregation: AVG + derivative: NONE + - name: schedules.BACKUP.protected_age_sec + exported_name: schedules_BACKUP_protected_age_sec + labeled_name: 'schedules.protected_age_sec{name: BACKUP}' + description: The age of the oldest PTS record protected by BACKUP schedules + y_axis_label: Seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: schedules.BACKUP.protected_record_count + exported_name: schedules_BACKUP_protected_record_count + labeled_name: 'schedules.protected_record_count{name: BACKUP}' + description: Number of PTS records held by BACKUP schedules + y_axis_label: Records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: schedules.BACKUP.started + exported_name: schedules_BACKUP_started + labeled_name: 'schedules{name: BACKUP, status: started}' + description: Number of BACKUP jobs started + y_axis_label: Jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: schedules.BACKUP.succeeded + exported_name: schedules_BACKUP_succeeded + labeled_name: 'schedules{name: BACKUP, status: succeeded}' + description: Number of BACKUP jobs succeeded + y_axis_label: Jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: schedules.CHANGEFEED.failed + exported_name: schedules_CHANGEFEED_failed + labeled_name: 'schedules{name: CHANGEFEED, status: failed}' + description: Number of CHANGEFEED jobs failed + y_axis_label: Jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: schedules.CHANGEFEED.started + exported_name: schedules_CHANGEFEED_started + labeled_name: 'schedules{name: CHANGEFEED, status: started}' + description: Number of CHANGEFEED jobs started + y_axis_label: Jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: schedules.CHANGEFEED.succeeded + exported_name: schedules_CHANGEFEED_succeeded + labeled_name: 'schedules{name: CHANGEFEED, status: succeeded}' + description: Number of CHANGEFEED jobs succeeded + y_axis_label: Jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: schedules.error + exported_name: schedules_error + description: Number of schedules which did not execute successfully + y_axis_label: Schedules + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: schedules.malformed + exported_name: schedules_malformed + description: Number of malformed schedules + y_axis_label: Schedules + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: schedules.round.jobs-started + exported_name: schedules_round_jobs_started + description: The number of jobs started + y_axis_label: Jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: schedules.round.reschedule-skip + exported_name: schedules_round_reschedule_skip + description: The number of schedules rescheduled due to SKIP policy + y_axis_label: Schedules + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: schedules.round.reschedule-wait + exported_name: schedules_round_reschedule_wait + description: The number of schedules rescheduled due to WAIT policy + y_axis_label: Schedules + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: schedules.scheduled-row-level-ttl-executor.started + exported_name: schedules_scheduled_row_level_ttl_executor_started + labeled_name: 'schedules{name: scheduled-row-level-ttl-executor, status: started}' + description: Number of scheduled-row-level-ttl-executor jobs started + y_axis_label: Jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: schedules.scheduled-row-level-ttl-executor.succeeded + exported_name: schedules_scheduled_row_level_ttl_executor_succeeded + labeled_name: 'schedules{name: scheduled-row-level-ttl-executor, status: succeeded}' + description: Number of scheduled-row-level-ttl-executor jobs succeeded + y_axis_label: Jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: schedules.scheduled-schema-telemetry-executor.failed + exported_name: schedules_scheduled_schema_telemetry_executor_failed + labeled_name: 'schedules{name: scheduled-schema-telemetry-executor, status: failed}' + description: Number of scheduled-schema-telemetry-executor jobs failed + y_axis_label: Jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: schedules.scheduled-schema-telemetry-executor.started + exported_name: schedules_scheduled_schema_telemetry_executor_started + labeled_name: 'schedules{name: scheduled-schema-telemetry-executor, status: started}' + description: Number of scheduled-schema-telemetry-executor jobs started + y_axis_label: Jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: schedules.scheduled-schema-telemetry-executor.succeeded + exported_name: schedules_scheduled_schema_telemetry_executor_succeeded + labeled_name: 'schedules{name: scheduled-schema-telemetry-executor, status: succeeded}' + description: Number of scheduled-schema-telemetry-executor jobs succeeded + y_axis_label: Jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: schedules.scheduled-sql-stats-compaction-executor.failed + exported_name: schedules_scheduled_sql_stats_compaction_executor_failed + labeled_name: 'schedules{name: scheduled-sql-stats-compaction-executor, status: failed}' + description: Number of scheduled-sql-stats-compaction-executor jobs failed + y_axis_label: Jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: schedules.scheduled-sql-stats-compaction-executor.started + exported_name: schedules_scheduled_sql_stats_compaction_executor_started + labeled_name: 'schedules{name: scheduled-sql-stats-compaction-executor, status: started}' + description: Number of scheduled-sql-stats-compaction-executor jobs started + y_axis_label: Jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: schedules.scheduled-sql-stats-compaction-executor.succeeded + exported_name: schedules_scheduled_sql_stats_compaction_executor_succeeded + labeled_name: 'schedules{name: scheduled-sql-stats-compaction-executor, status: succeeded}' + description: Number of scheduled-sql-stats-compaction-executor jobs succeeded + y_axis_label: Jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: server.http.request.duration.nanos + exported_name: server_http_request_duration_nanos + description: Duration of an HTTP request in nanoseconds. + y_axis_label: Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.bytesin + exported_name: sql_bytesin + description: Number of SQL bytes received + y_axis_label: SQL Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.bytesout + exported_name: sql_bytesout + description: Number of SQL bytes sent + y_axis_label: SQL Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.call_stored_proc.count + exported_name: sql_call_stored_proc_count + description: Number of successfully executed stored procedure calls + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.call_stored_proc.count.internal + exported_name: sql_call_stored_proc_count_internal + description: Number of successfully executed stored procedure calls (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.call_stored_proc.started.count + exported_name: sql_call_stored_proc_started_count + description: Number of invocation of stored procedures via CALL statements + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.call_stored_proc.started.count.internal + exported_name: sql_call_stored_proc_started_count_internal + description: Number of invocation of stored procedures via CALL statements (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.conns_waiting_to_hash + exported_name: sql_conns_waiting_to_hash + description: Number of SQL connection attempts that are being throttled in order to limit password hashing concurrency + y_axis_label: Connections + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.contention.resolver.failed_resolutions + exported_name: sql_contention_resolver_failed_resolutions + description: Number of failed transaction ID resolution attempts + y_axis_label: Failed transaction ID resolution count + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.contention.resolver.queue_size + exported_name: sql_contention_resolver_queue_size + description: Length of queued unresolved contention events + y_axis_label: Queue length + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.contention.resolver.retries + exported_name: sql_contention_resolver_retries + description: Number of times transaction id resolution has been retried + y_axis_label: Retry count + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.contention.txn_id_cache.miss + exported_name: sql_contention_txn_id_cache_miss + description: Number of cache misses + y_axis_label: Cache miss + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.contention.txn_id_cache.read + exported_name: sql_contention_txn_id_cache_read + description: Number of cache read + y_axis_label: Cache read + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.copy.count + exported_name: sql_copy_count + description: Number of COPY SQL statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.copy.count.internal + exported_name: sql_copy_count_internal + description: Number of COPY SQL statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.copy.nonatomic.count + exported_name: sql_copy_nonatomic_count + description: Number of non-atomic COPY SQL statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.copy.nonatomic.count.internal + exported_name: sql_copy_nonatomic_count_internal + description: Number of non-atomic COPY SQL statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.copy.nonatomic.started.count + exported_name: sql_copy_nonatomic_started_count + description: Number of non-atomic COPY SQL statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.copy.nonatomic.started.count.internal + exported_name: sql_copy_nonatomic_started_count_internal + description: Number of non-atomic COPY SQL statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.copy.started.count + exported_name: sql_copy_started_count + description: Number of COPY SQL statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.copy.started.count.internal + exported_name: sql_copy_started_count_internal + description: Number of COPY SQL statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.crud_query.count + exported_name: sql_crud_query_count + description: Number of SQL SELECT, INSERT, UPDATE, DELETE statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.crud_query.count.internal + exported_name: sql_crud_query_count_internal + description: Number of SQL SELECT, INSERT, UPDATE, DELETE statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.crud_query.started.count + exported_name: sql_crud_query_started_count + description: Number of SQL SELECT, INSERT, UPDATE, DELETE statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.crud_query.started.count.internal + exported_name: sql_crud_query_started_count_internal + description: Number of SQL SELECT, INSERT, UPDATE, DELETE statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.ddl.started.count + exported_name: sql_ddl_started_count + description: Number of SQL DDL statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.ddl.started.count.internal + exported_name: sql_ddl_started_count_internal + description: Number of SQL DDL statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.delete.started.count + exported_name: sql_delete_started_count + description: Number of SQL DELETE statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.delete.started.count.internal + exported_name: sql_delete_started_count_internal + description: Number of SQL DELETE statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.disk.distsql.current + exported_name: sql_disk_distsql_current + description: Current sql statement disk usage for distsql + y_axis_label: Disk + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.disk.distsql.max + exported_name: sql_disk_distsql_max + description: Disk usage per sql statement for distsql + y_axis_label: Disk + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.disk.distsql.spilled.bytes.read + exported_name: sql_disk_distsql_spilled_bytes_read + description: Number of bytes read from temporary disk storage as a result of spilling + y_axis_label: Disk + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.disk.distsql.spilled.bytes.written + exported_name: sql_disk_distsql_spilled_bytes_written + description: Number of bytes written to temporary disk storage as a result of spilling + y_axis_label: Disk + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.distsql.cumulative_contention_nanos + exported_name: sql_distsql_cumulative_contention_nanos + description: Cumulative contention across all queries (in nanoseconds) + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.distsql.dist_query_rerun_locally.count + exported_name: sql_distsql_dist_query_rerun_locally_count + description: Total number of cases when distributed query error resulted in a local rerun + y_axis_label: Queries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.distsql.dist_query_rerun_locally.failure_count + exported_name: sql_distsql_dist_query_rerun_locally_failure_count + description: Total number of cases when the local rerun of a distributed query resulted in an error + y_axis_label: Queries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.distsql.distributed_exec.count + exported_name: sql_distsql_distributed_exec_count + description: Number of invocations of the execution engine executed with full or partial distribution (multiple of which may occur for a single SQL statement) + y_axis_label: DistSQL runs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.distsql.exec.latency + exported_name: sql_distsql_exec_latency + description: Latency of DistSQL statement execution + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.distsql.exec.latency.internal + exported_name: sql_distsql_exec_latency_internal + description: Latency of DistSQL statement execution (internal queries) + y_axis_label: SQL Internal Statements + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.distsql.flows.active + exported_name: sql_distsql_flows_active + description: Number of distributed SQL flows currently active + y_axis_label: Flows + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.distsql.flows.total + exported_name: sql_distsql_flows_total + description: Number of distributed SQL flows executed + y_axis_label: Flows + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.distsql.queries.active + exported_name: sql_distsql_queries_active + description: Number of invocations of the execution engine currently active (multiple of which may occur for a single SQL statement) + y_axis_label: DistSQL runs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.distsql.queries.spilled + exported_name: sql_distsql_queries_spilled + description: Number of queries that have spilled to disk + y_axis_label: Queries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.distsql.queries.total + exported_name: sql_distsql_queries_total + description: Number of invocations of the execution engine executed (multiple of which may occur for a single SQL statement) + y_axis_label: DistSQL runs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.distsql.select.count + exported_name: sql_distsql_select_count + description: Number of SELECT statements planned to be distributed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.distsql.select.count.internal + exported_name: sql_distsql_select_count_internal + description: Number of SELECT statements planned to be distributed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.distsql.select.distributed_exec.count + exported_name: sql_distsql_select_distributed_exec_count + description: Number of SELECT statements that were distributed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.distsql.select.distributed_exec.count.internal + exported_name: sql_distsql_select_distributed_exec_count_internal + description: Number of SELECT statements that were distributed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.distsql.service.latency + exported_name: sql_distsql_service_latency + description: Latency of DistSQL request execution + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.distsql.service.latency.internal + exported_name: sql_distsql_service_latency_internal + description: Latency of DistSQL request execution (internal queries) + y_axis_label: SQL Internal Statements + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.distsql.vec.openfds + exported_name: sql_distsql_vec_openfds + description: Current number of open file descriptors used by vectorized external storage + y_axis_label: Files + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.exec.latency + exported_name: sql_exec_latency + description: Latency of SQL statement execution + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.exec.latency.consistent + exported_name: sql_exec_latency_consistent + description: Latency of SQL statement execution of non-historical queries + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.exec.latency.consistent.internal + exported_name: sql_exec_latency_consistent_internal + description: Latency of SQL statement execution of non-historical queries (internal queries) + y_axis_label: SQL Internal Statements + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.exec.latency.detail + exported_name: sql_exec_latency_detail + description: Latency of SQL statement execution, by statement fingerprint + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.exec.latency.detail.internal + exported_name: sql_exec_latency_detail_internal + description: Latency of SQL statement execution, by statement fingerprint (internal queries) + y_axis_label: SQL Internal Statements + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.exec.latency.historical + exported_name: sql_exec_latency_historical + description: Latency of SQL statement execution of historical queries + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.exec.latency.historical.internal + exported_name: sql_exec_latency_historical_internal + description: Latency of SQL statement execution of historical queries (internal queries) + y_axis_label: SQL Internal Statements + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.exec.latency.internal + exported_name: sql_exec_latency_internal + description: Latency of SQL statement execution (internal queries) + y_axis_label: SQL Internal Statements + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.feature_flag_denial + exported_name: sql_feature_flag_denial + description: Counter of the number of statements denied by a feature flag + y_axis_label: Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.guardrails.full_scan_rejected.count + exported_name: sql_guardrails_full_scan_rejected_count + description: Number of full table or index scans that have been rejected because of `disallow_full_table_scans` guardrail + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.guardrails.full_scan_rejected.count.internal + exported_name: sql_guardrails_full_scan_rejected_count_internal + description: Number of full table or index scans that have been rejected because of `disallow_full_table_scans` guardrail (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.guardrails.max_row_size_err.count + exported_name: sql_guardrails_max_row_size_err_count + description: Number of rows observed violating sql.guardrails.max_row_size_err + y_axis_label: Rows + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.guardrails.max_row_size_err.count.internal + exported_name: sql_guardrails_max_row_size_err_count_internal + description: Number of rows observed violating sql.guardrails.max_row_size_err (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.guardrails.max_row_size_log.count + exported_name: sql_guardrails_max_row_size_log_count + description: Number of rows observed violating sql.guardrails.max_row_size_log + y_axis_label: Rows + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.guardrails.max_row_size_log.count.internal + exported_name: sql_guardrails_max_row_size_log_count_internal + description: Number of rows observed violating sql.guardrails.max_row_size_log (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.guardrails.transaction_rows_read_err.count + exported_name: sql_guardrails_transaction_rows_read_err_count + description: Number of transactions errored because of transaction_rows_read_err guardrail + y_axis_label: Errored transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.guardrails.transaction_rows_read_err.count.internal + exported_name: sql_guardrails_transaction_rows_read_err_count_internal + description: Number of transactions errored because of transaction_rows_read_err guardrail (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.guardrails.transaction_rows_read_log.count + exported_name: sql_guardrails_transaction_rows_read_log_count + description: Number of transactions logged because of transaction_rows_read_log guardrail + y_axis_label: Logged transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.guardrails.transaction_rows_read_log.count.internal + exported_name: sql_guardrails_transaction_rows_read_log_count_internal + description: Number of transactions logged because of transaction_rows_read_log guardrail (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.guardrails.transaction_rows_written_err.count + exported_name: sql_guardrails_transaction_rows_written_err_count + description: Number of transactions errored because of transaction_rows_written_err guardrail + y_axis_label: Errored transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.guardrails.transaction_rows_written_err.count.internal + exported_name: sql_guardrails_transaction_rows_written_err_count_internal + description: Number of transactions errored because of transaction_rows_written_err guardrail (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.guardrails.transaction_rows_written_log.count + exported_name: sql_guardrails_transaction_rows_written_log_count + description: Number of transactions logged because of transaction_rows_written_log guardrail + y_axis_label: Logged transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.guardrails.transaction_rows_written_log.count.internal + exported_name: sql_guardrails_transaction_rows_written_log_count_internal + description: Number of transactions logged because of transaction_rows_written_log guardrail (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.hydrated_schema_cache.hits + exported_name: sql_hydrated_schema_cache_hits + description: counter on the number of cache hits + y_axis_label: reads + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.hydrated_schema_cache.misses + exported_name: sql_hydrated_schema_cache_misses + description: counter on the number of cache misses + y_axis_label: reads + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.hydrated_table_cache.hits + exported_name: sql_hydrated_table_cache_hits + description: counter on the number of cache hits + y_axis_label: reads + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.hydrated_table_cache.misses + exported_name: sql_hydrated_table_cache_misses + description: counter on the number of cache misses + y_axis_label: reads + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.hydrated_type_cache.hits + exported_name: sql_hydrated_type_cache_hits + description: counter on the number of cache hits + y_axis_label: reads + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.hydrated_type_cache.misses + exported_name: sql_hydrated_type_cache_misses + description: counter on the number of cache misses + y_axis_label: reads + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.hydrated_udf_cache.hits + exported_name: sql_hydrated_udf_cache_hits + description: counter on the number of cache hits + y_axis_label: reads + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.hydrated_udf_cache.misses + exported_name: sql_hydrated_udf_cache_misses + description: counter on the number of cache misses + y_axis_label: reads + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.insert.started.count + exported_name: sql_insert_started_count + description: Number of SQL INSERT statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.insert.started.count.internal + exported_name: sql_insert_started_count_internal + description: Number of SQL INSERT statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.insights.anomaly_detection.evictions + exported_name: sql_insights_anomaly_detection_evictions + description: Evictions of fingerprint latency summaries due to memory pressure + y_axis_label: Evictions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.insights.anomaly_detection.fingerprints + exported_name: sql_insights_anomaly_detection_fingerprints + description: Current number of statement fingerprints being monitored for anomaly detection + y_axis_label: Fingerprints + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.insights.anomaly_detection.memory + exported_name: sql_insights_anomaly_detection_memory + description: Current memory used to support anomaly detection + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.leases.active + exported_name: sql_leases_active + description: The number of outstanding SQL schema leases. + y_axis_label: Outstanding leases + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.leases.expired + exported_name: sql_leases_expired + description: The number of outstanding session based SQL schema leases expired. + y_axis_label: Leases expired because of a new version + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.leases.long_wait_for_initial_version + exported_name: sql_leases_long_wait_for_initial_version + description: The number of wait for initial version routines taking more than the lease duration. + y_axis_label: Number of wait for initial version routines executing + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.leases.long_wait_for_no_version + exported_name: sql_leases_long_wait_for_no_version + description: The number of wait for no versions that are taking more than the lease duration. + y_axis_label: Number of wait for long wait for no version routines executing + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.leases.long_wait_for_one_version + exported_name: sql_leases_long_wait_for_one_version + description: The number of wait for one versions that are taking more than the lease duration. + y_axis_label: Number of wait for long wait for one version routines executing + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.leases.long_wait_for_two_version_invariant + exported_name: sql_leases_long_wait_for_two_version_invariant + description: The number of two version invariant waits that are taking more than the lease duration. + y_axis_label: Number of two version invariant wait routines executing + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.leases.waiting_to_expire + exported_name: sql_leases_waiting_to_expire + description: The number of outstanding session based SQL schema leases with expiry. + y_axis_label: Outstanding Leases Waiting to Expire + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.mem.bulk.current + exported_name: sql_mem_bulk_current + description: Current sql statement memory usage for bulk operations + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.bulk.max + exported_name: sql_mem_bulk_max + description: Memory usage per sql statement for bulk operations + y_axis_label: Memory + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.conns.current + exported_name: sql_mem_conns_current + description: Current sql statement memory usage for conns + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.conns.max + exported_name: sql_mem_conns_max + description: Memory usage per sql statement for conns + y_axis_label: Memory + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.distsql.current + exported_name: sql_mem_distsql_current + description: Current sql statement memory usage for distsql + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.distsql.max + exported_name: sql_mem_distsql_max + description: Memory usage per sql statement for distsql + y_axis_label: Memory + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.internal.current + exported_name: sql_mem_internal_current + description: Current sql statement memory usage for internal + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.internal.max + exported_name: sql_mem_internal_max + description: Memory usage per sql statement for internal + y_axis_label: Memory + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.internal.session.current + exported_name: sql_mem_internal_session_current + description: Current sql session memory usage for internal + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.internal.session.max + exported_name: sql_mem_internal_session_max + description: Memory usage per sql session for internal + y_axis_label: Memory + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.internal.session.prepared.current + exported_name: sql_mem_internal_session_prepared_current + description: Current sql session memory usage by prepared statements for internal + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.internal.session.prepared.max + exported_name: sql_mem_internal_session_prepared_max + description: Memory usage by prepared statements per sql session for internal + y_axis_label: Memory + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.internal.txn.current + exported_name: sql_mem_internal_txn_current + description: Current sql transaction memory usage for internal + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.internal.txn.max + exported_name: sql_mem_internal_txn_max + description: Memory usage per sql transaction for internal + y_axis_label: Memory + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.root.max + exported_name: sql_mem_root_max + description: Memory usage per sql statement for root + y_axis_label: Memory + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.sql.current + exported_name: sql_mem_sql_current + description: Current sql statement memory usage for sql + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.sql.max + exported_name: sql_mem_sql_max + description: Memory usage per sql statement for sql + y_axis_label: Memory + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.sql.session.current + exported_name: sql_mem_sql_session_current + description: Current sql session memory usage for sql + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.sql.session.max + exported_name: sql_mem_sql_session_max + description: Memory usage per sql session for sql + y_axis_label: Memory + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.sql.session.prepared.current + exported_name: sql_mem_sql_session_prepared_current + description: Current sql session memory usage by prepared statements for sql + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.sql.session.prepared.max + exported_name: sql_mem_sql_session_prepared_max + description: Memory usage by prepared statements per sql session for sql + y_axis_label: Memory + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.sql.txn.current + exported_name: sql_mem_sql_txn_current + description: Current sql transaction memory usage for sql + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.sql.txn.max + exported_name: sql_mem_sql_txn_max + description: Memory usage per sql transaction for sql + y_axis_label: Memory + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.misc.count + exported_name: sql_misc_count + description: Number of other SQL statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.misc.count.internal + exported_name: sql_misc_count_internal + description: Number of other SQL statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.misc.started.count + exported_name: sql_misc_started_count + description: Number of other SQL statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.misc.started.count.internal + exported_name: sql_misc_started_count_internal + description: Number of other SQL statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.optimizer.plan_cache.hits + exported_name: sql_optimizer_plan_cache_hits + description: Number of non-prepared statements for which a cached plan was used + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.optimizer.plan_cache.hits.internal + exported_name: sql_optimizer_plan_cache_hits_internal + description: Number of non-prepared statements for which a cached plan was used (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.optimizer.plan_cache.misses + exported_name: sql_optimizer_plan_cache_misses + description: Number of non-prepared statements for which a cached plan was not used + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.optimizer.plan_cache.misses.internal + exported_name: sql_optimizer_plan_cache_misses_internal + description: Number of non-prepared statements for which a cached plan was not used (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.pgwire.pipeline.count + exported_name: sql_pgwire_pipeline_count + description: Number of pgwire commands received by the server that have not yet begun processing + y_axis_label: Commands + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.pgwire_cancel.ignored + exported_name: sql_pgwire_cancel_ignored + description: Number of pgwire query cancel requests that were ignored due to rate limiting + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.pgwire_cancel.successful + exported_name: sql_pgwire_cancel_successful + description: Number of pgwire query cancel requests that were successful + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.pgwire_cancel.total + exported_name: sql_pgwire_cancel_total + description: Number of pgwire query cancel requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.pre_serve.bytesin + exported_name: sql_pre_serve_bytesin + description: Number of SQL bytes received prior to routing the connection to the target SQL server + y_axis_label: SQL Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.pre_serve.bytesout + exported_name: sql_pre_serve_bytesout + description: Number of SQL bytes sent prior to routing the connection to the target SQL server + y_axis_label: SQL Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.pre_serve.conn.failures + exported_name: sql_pre_serve_conn_failures + description: Number of SQL connection failures prior to routing the connection to the target SQL server + y_axis_label: Connections + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.pre_serve.mem.cur + exported_name: sql_pre_serve_mem_cur + description: Current memory usage for SQL connections prior to routing the connection to the target SQL server + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.pre_serve.mem.max + exported_name: sql_pre_serve_mem_max + description: Memory usage for SQL connections prior to routing the connection to the target SQL server + y_axis_label: Memory + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.pre_serve.new_conns + exported_name: sql_pre_serve_new_conns + description: Number of SQL connections created prior to routing the connection to the target SQL server + y_axis_label: Connections + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.query.count + exported_name: sql_query_count + description: Number of SQL operations started including queries, and transaction control statements + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.query.count.internal + exported_name: sql_query_count_internal + description: Number of SQL operations started including queries, and transaction control statements (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.query.started.count + exported_name: sql_query_started_count + description: Number of SQL operations started including queries, and transaction control statements + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.query.started.count.internal + exported_name: sql_query_started_count_internal + description: Number of SQL operations started including queries, and transaction control statements (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.query.unique.count + exported_name: sql_query_unique_count + description: Cardinality estimate of the set of statement fingerprints + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.query.unique.count.internal + exported_name: sql_query_unique_count_internal + description: Cardinality estimate of the set of statement fingerprints (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.restart_savepoint.count + exported_name: sql_restart_savepoint_count + description: Number of `SAVEPOINT cockroach_restart` statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.restart_savepoint.count.internal + exported_name: sql_restart_savepoint_count_internal + description: Number of `SAVEPOINT cockroach_restart` statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.restart_savepoint.release.count + exported_name: sql_restart_savepoint_release_count + description: Number of `RELEASE SAVEPOINT cockroach_restart` statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.restart_savepoint.release.count.internal + exported_name: sql_restart_savepoint_release_count_internal + description: Number of `RELEASE SAVEPOINT cockroach_restart` statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.restart_savepoint.release.started.count + exported_name: sql_restart_savepoint_release_started_count + description: Number of `RELEASE SAVEPOINT cockroach_restart` statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.restart_savepoint.release.started.count.internal + exported_name: sql_restart_savepoint_release_started_count_internal + description: Number of `RELEASE SAVEPOINT cockroach_restart` statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.restart_savepoint.rollback.count + exported_name: sql_restart_savepoint_rollback_count + description: Number of `ROLLBACK TO SAVEPOINT cockroach_restart` statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.restart_savepoint.rollback.count.internal + exported_name: sql_restart_savepoint_rollback_count_internal + description: Number of `ROLLBACK TO SAVEPOINT cockroach_restart` statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.restart_savepoint.rollback.started.count + exported_name: sql_restart_savepoint_rollback_started_count + description: Number of `ROLLBACK TO SAVEPOINT cockroach_restart` statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.restart_savepoint.rollback.started.count.internal + exported_name: sql_restart_savepoint_rollback_started_count_internal + description: Number of `ROLLBACK TO SAVEPOINT cockroach_restart` statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.restart_savepoint.started.count + exported_name: sql_restart_savepoint_started_count + description: Number of `SAVEPOINT cockroach_restart` statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.restart_savepoint.started.count.internal + exported_name: sql_restart_savepoint_started_count_internal + description: Number of `SAVEPOINT cockroach_restart` statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.savepoint.count + exported_name: sql_savepoint_count + description: Number of SQL SAVEPOINT statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.savepoint.count.internal + exported_name: sql_savepoint_count_internal + description: Number of SQL SAVEPOINT statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.savepoint.release.count + exported_name: sql_savepoint_release_count + description: Number of `RELEASE SAVEPOINT` statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.savepoint.release.count.internal + exported_name: sql_savepoint_release_count_internal + description: Number of `RELEASE SAVEPOINT` statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.savepoint.release.started.count + exported_name: sql_savepoint_release_started_count + description: Number of `RELEASE SAVEPOINT` statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.savepoint.release.started.count.internal + exported_name: sql_savepoint_release_started_count_internal + description: Number of `RELEASE SAVEPOINT` statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.savepoint.rollback.count + exported_name: sql_savepoint_rollback_count + description: Number of `ROLLBACK TO SAVEPOINT` statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.savepoint.rollback.count.internal + exported_name: sql_savepoint_rollback_count_internal + description: Number of `ROLLBACK TO SAVEPOINT` statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.savepoint.rollback.started.count + exported_name: sql_savepoint_rollback_started_count + description: Number of `ROLLBACK TO SAVEPOINT` statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.savepoint.rollback.started.count.internal + exported_name: sql_savepoint_rollback_started_count_internal + description: Number of `ROLLBACK TO SAVEPOINT` statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.savepoint.started.count + exported_name: sql_savepoint_started_count + description: Number of SQL SAVEPOINT statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.savepoint.started.count.internal + exported_name: sql_savepoint_started_count_internal + description: Number of SQL SAVEPOINT statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.schema.invalid_objects + exported_name: sql_schema_invalid_objects + description: Gauge of detected invalid objects within the system.descriptor table (measured by querying crdb_internal.invalid_objects) + y_axis_label: Objects + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.schema_changer.object_count + exported_name: sql_schema_changer_object_count + description: Counter of the number of objects in the cluster + y_axis_label: Objects + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.select.started.count + exported_name: sql_select_started_count + description: Number of SQL SELECT statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.select.started.count.internal + exported_name: sql_select_started_count_internal + description: Number of SQL SELECT statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.service.latency.consistent + exported_name: sql_service_latency_consistent + description: Latency of SQL request execution of non-historical queries + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.service.latency.consistent.internal + exported_name: sql_service_latency_consistent_internal + description: Latency of SQL request execution of non-historical queries (internal queries) + y_axis_label: SQL Internal Statements + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.service.latency.historical + exported_name: sql_service_latency_historical + description: Latency of SQL request execution of historical queries + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.service.latency.historical.internal + exported_name: sql_service_latency_historical_internal + description: Latency of SQL request execution of historical queries (internal queries) + y_axis_label: SQL Internal Statements + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.statement_timeout.count + exported_name: sql_statement_timeout_count + description: Count of statements that failed because they exceeded the statement timeout + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.statement_timeout.count.internal + exported_name: sql_statement_timeout_count_internal + description: Count of statements that failed because they exceeded the statement timeout (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.statements.auto_retry.count + exported_name: sql_statements_auto_retry_count + description: Number of SQL statement automatic retries + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.statements.auto_retry.count.internal + exported_name: sql_statements_auto_retry_count_internal + description: Number of SQL statement automatic retries (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.stats.activity.update.latency + exported_name: sql_stats_activity_update_latency + description: The latency of updates made by the SQL activity updater job. Includes failed update attempts + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.stats.activity.updates.failed + exported_name: sql_stats_activity_updates_failed + description: Number of update attempts made by the SQL activity updater job that failed with errors + y_axis_label: failed updates + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.stats.activity.updates.successful + exported_name: sql_stats_activity_updates_successful + description: Number of successful updates made by the SQL activity updater job + y_axis_label: successful updates + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.stats.cleanup.rows_removed + exported_name: sql_stats_cleanup_rows_removed + description: Number of stale statistics rows that are removed + y_axis_label: SQL Stats Cleanup + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.stats.discarded.current + exported_name: sql_stats_discarded_current + description: Number of fingerprint statistics being discarded + y_axis_label: Discarded SQL Stats + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.stats.flush.done_signals.ignored + exported_name: sql_stats_flush_done_signals_ignored + description: Number of times the SQL Stats activity update job ignored the signal sent to it indicating a flush has completed + y_axis_label: flush done signals ignored + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.stats.flush.fingerprint.count + exported_name: sql_stats_flush_fingerprint_count + description: The number of unique statement and transaction fingerprints included in the SQL Stats flush + y_axis_label: statement & transaction fingerprints + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.stats.flush.latency + exported_name: sql_stats_flush_latency + description: The latency of SQL Stats flushes to persistent storage. Includes failed flush attempts + y_axis_label: nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.stats.flushes.failed + exported_name: sql_stats_flushes_failed + description: Number of attempted SQL Stats flushes that failed with errors + y_axis_label: failed flushes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.stats.flushes.successful + exported_name: sql_stats_flushes_successful + description: Number of times SQL Stats are flushed successfully to persistent storage + y_axis_label: successful flushes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.stats.mem.current + exported_name: sql_stats_mem_current + description: Current memory usage for fingerprint storage + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.stats.mem.max + exported_name: sql_stats_mem_max + description: Memory usage for fingerprint storage + y_axis_label: Memory + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.stats.reported.mem.current + exported_name: sql_stats_reported_mem_current + description: Current memory usage for reported fingerprint storage + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.stats.reported.mem.max + exported_name: sql_stats_reported_mem_max + description: Memory usage for reported fingerprint storage + y_axis_label: Memory + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.stats.txn_stats_collection.duration + exported_name: sql_stats_txn_stats_collection_duration + description: Time took in nanoseconds to collect transaction stats + y_axis_label: SQL Transaction Stats Collection Overhead + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.temp_object_cleaner.active_cleaners + exported_name: sql_temp_object_cleaner_active_cleaners + description: number of cleaner tasks currently running on this node + y_axis_label: Count + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.temp_object_cleaner.schemas_deletion_error + exported_name: sql_temp_object_cleaner_schemas_deletion_error + description: number of errored schema deletions by the temp object cleaner on this node + y_axis_label: Count + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.temp_object_cleaner.schemas_deletion_success + exported_name: sql_temp_object_cleaner_schemas_deletion_success + description: number of successful schema deletions by the temp object cleaner on this node + y_axis_label: Count + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.temp_object_cleaner.schemas_to_delete + exported_name: sql_temp_object_cleaner_schemas_to_delete + description: number of schemas to be deleted by the temp object cleaner on this node + y_axis_label: Count + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.transaction_timeout.count + exported_name: sql_transaction_timeout_count + description: Count of statements that failed because they exceeded the transaction timeout + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.transaction_timeout.count.internal + exported_name: sql_transaction_timeout_count_internal + description: Count of statements that failed because they exceeded the transaction timeout (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.auto_retry.count + exported_name: sql_txn_auto_retry_count + description: Number of SQL transaction automatic retries + y_axis_label: SQL Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.auto_retry.count.internal + exported_name: sql_txn_auto_retry_count_internal + description: Number of SQL transaction automatic retries (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.begin.started.count + exported_name: sql_txn_begin_started_count + description: Number of SQL transaction BEGIN statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.begin.started.count.internal + exported_name: sql_txn_begin_started_count_internal + description: Number of SQL transaction BEGIN statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.commit.started.count + exported_name: sql_txn_commit_started_count + description: Number of SQL transaction COMMIT statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.commit.started.count.internal + exported_name: sql_txn_commit_started_count_internal + description: Number of SQL transaction COMMIT statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.commit_prepared.count + exported_name: sql_txn_commit_prepared_count + description: Number of SQL COMMIT PREPARED statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.commit_prepared.count.internal + exported_name: sql_txn_commit_prepared_count_internal + description: Number of SQL COMMIT PREPARED statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.commit_prepared.started.count + exported_name: sql_txn_commit_prepared_started_count + description: Number of SQL COMMIT PREPARED statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.commit_prepared.started.count.internal + exported_name: sql_txn_commit_prepared_started_count_internal + description: Number of SQL COMMIT PREPARED statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.contended.count + exported_name: sql_txn_contended_count + description: Number of SQL transactions experienced contention + y_axis_label: Contention + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.contended.count.internal + exported_name: sql_txn_contended_count_internal + description: Number of SQL transactions experienced contention (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.prepare.count + exported_name: sql_txn_prepare_count + description: Number of SQL PREPARE TRANSACTION statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.prepare.count.internal + exported_name: sql_txn_prepare_count_internal + description: Number of SQL PREPARE TRANSACTION statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.prepare.started.count + exported_name: sql_txn_prepare_started_count + description: Number of SQL PREPARE TRANSACTION statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.prepare.started.count.internal + exported_name: sql_txn_prepare_started_count_internal + description: Number of SQL PREPARE TRANSACTION statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.rollback.started.count + exported_name: sql_txn_rollback_started_count + description: Number of SQL transaction ROLLBACK statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.rollback.started.count.internal + exported_name: sql_txn_rollback_started_count_internal + description: Number of SQL transaction ROLLBACK statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.rollback_prepared.count + exported_name: sql_txn_rollback_prepared_count + description: Number of SQL ROLLBACK PREPARED statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.rollback_prepared.count.internal + exported_name: sql_txn_rollback_prepared_count_internal + description: Number of SQL ROLLBACK PREPARED statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.rollback_prepared.started.count + exported_name: sql_txn_rollback_prepared_started_count + description: Number of SQL ROLLBACK PREPARED statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.rollback_prepared.started.count.internal + exported_name: sql_txn_rollback_prepared_started_count_internal + description: Number of SQL ROLLBACK PREPARED statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.upgraded_iso_level.count + exported_name: sql_txn_upgraded_iso_level_count + description: Number of times a weak isolation level was automatically upgraded to a stronger one + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.upgraded_iso_level.count.internal + exported_name: sql_txn_upgraded_iso_level_count_internal + description: Number of times a weak isolation level was automatically upgraded to a stronger one (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.update.started.count + exported_name: sql_update_started_count + description: Number of SQL UPDATE statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.update.started.count.internal + exported_name: sql_update_started_count_internal + description: Number of SQL UPDATE statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.vecindex.pending_splits_merges + exported_name: sql_vecindex_pending_splits_merges + description: Total number of vector index splits and merges waiting to be processed + y_axis_label: Pending Splits/Merges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.vecindex.successful_splits + exported_name: sql_vecindex_successful_splits + description: Total number of vector index partitions split without error + y_axis_label: Splits + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sqlliveness.is_alive.cache_hits + exported_name: sqlliveness_is_alive_cache_hits + description: Number of calls to IsAlive that return from the cache + y_axis_label: Calls + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sqlliveness.is_alive.cache_misses + exported_name: sqlliveness_is_alive_cache_misses + description: Number of calls to IsAlive that do not return from the cache + y_axis_label: Calls + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sqlliveness.sessions_deleted + exported_name: sqlliveness_sessions_deleted + description: Number of expired sessions which have been deleted + y_axis_label: Sessions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sqlliveness.sessions_deletion_runs + exported_name: sqlliveness_sessions_deletion_runs + description: Number of calls to delete sessions which have been performed + y_axis_label: Sessions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sqlliveness.write_failures + exported_name: sqlliveness_write_failures + description: Number of update or insert calls which have failed + y_axis_label: Writes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sqlliveness.write_successes + exported_name: sqlliveness_write_successes + description: Number of update or insert calls successfully performed + y_axis_label: Writes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.cost_client.blocked_requests + exported_name: tenant_cost_client_blocked_requests + description: Number of requests currently blocked by the rate limiter + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: tenant.sql_usage.cross_region_network_ru + exported_name: tenant_sql_usage_cross_region_network_ru + description: Total number of RUs charged for cross-region network traffic + y_axis_label: Request Units + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.sql_usage.estimated_cpu_seconds + exported_name: tenant_sql_usage_estimated_cpu_seconds + description: Estimated amount of CPU consumed by a virtual cluster + y_axis_label: CPU Seconds + type: COUNTER + unit: SECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.sql_usage.estimated_kv_cpu_seconds + exported_name: tenant_sql_usage_estimated_kv_cpu_seconds + description: Estimated amount of CPU consumed by a virtual cluster, in the KV layer + y_axis_label: CPU Seconds + type: COUNTER + unit: SECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.sql_usage.estimated_replication_bytes + exported_name: tenant_sql_usage_estimated_replication_bytes + description: Total number of estimated bytes for KV replication traffic + y_axis_label: Bytes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.sql_usage.external_io_egress_bytes + exported_name: tenant_sql_usage_external_io_egress_bytes + description: Total number of bytes written to external services such as cloud storage providers + y_axis_label: Bytes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.sql_usage.external_io_ingress_bytes + exported_name: tenant_sql_usage_external_io_ingress_bytes + description: Total number of bytes read from external services such as cloud storage providers + y_axis_label: Bytes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.sql_usage.kv_request_units + exported_name: tenant_sql_usage_kv_request_units + description: RU consumption attributable to KV + y_axis_label: Request Units + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.sql_usage.pgwire_egress_bytes + exported_name: tenant_sql_usage_pgwire_egress_bytes + description: Total number of bytes transferred from a SQL pod to the client + y_axis_label: Bytes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.sql_usage.provisioned_vcpus + exported_name: tenant_sql_usage_provisioned_vcpus + description: Number of vcpus available to the virtual cluster + y_axis_label: Count + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: tenant.sql_usage.read_batches + exported_name: tenant_sql_usage_read_batches + description: Total number of KV read batches + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.sql_usage.read_bytes + exported_name: tenant_sql_usage_read_bytes + description: Total number of bytes read from KV + y_axis_label: Bytes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.sql_usage.read_requests + exported_name: tenant_sql_usage_read_requests + description: Total number of KV read requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.sql_usage.request_units + exported_name: tenant_sql_usage_request_units + description: RU consumption + y_axis_label: Request Units + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.sql_usage.sql_pods_cpu_seconds + exported_name: tenant_sql_usage_sql_pods_cpu_seconds + description: Total amount of CPU used by SQL pods + y_axis_label: CPU Seconds + type: COUNTER + unit: SECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.sql_usage.write_batches + exported_name: tenant_sql_usage_write_batches + description: Total number of KV write batches + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.sql_usage.write_bytes + exported_name: tenant_sql_usage_write_bytes + description: Total number of bytes written to KV + y_axis_label: Bytes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.sql_usage.write_requests + exported_name: tenant_sql_usage_write_requests + description: Total number of KV write requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.aborts + exported_name: txn_aborts + description: Number of aborted KV transactions + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.commit_waits + exported_name: txn_commit_waits + description: Number of KV transactions that had to commit-wait on commit in order to ensure linearizability. This generally happens to transactions writing to global ranges. + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.commits + exported_name: txn_commits + description: Number of committed KV transactions (including 1PC) + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.commits1PC + exported_name: txn_commits1PC + description: Number of KV transaction one-phase commits + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.commits_read_only + exported_name: txn_commits_read_only + description: Number of read only KV transaction commits + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.condensed_intent_spans + exported_name: txn_condensed_intent_spans + description: KV transactions that have exceeded their intent tracking memory budget (kv.transaction.max_intents_bytes). See also txn.condensed_intent_spans_gauge for a gauge of such transactions currently running. + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.condensed_intent_spans_gauge + exported_name: txn_condensed_intent_spans_gauge + description: KV transactions currently running that have exceeded their intent tracking memory budget (kv.transaction.max_intents_bytes). See also txn.condensed_intent_spans for a perpetual counter/rate. + y_axis_label: KV Transactions + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: txn.condensed_intent_spans_rejected + exported_name: txn_condensed_intent_spans_rejected + description: KV transactions that have been aborted because they exceeded their intent tracking memory budget (kv.transaction.max_intents_bytes). Rejection is caused by kv.transaction.reject_over_max_intents_budget. + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.count_limit_on_response + exported_name: txn_count_limit_on_response + description: KV transactions that have exceeded the count limit on a response + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.count_limit_rejected + exported_name: txn_count_limit_rejected + description: KV transactions that have been aborted because they exceeded the max number of writes and locking reads allowed + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.durations + exported_name: txn_durations + description: KV transaction durations + y_axis_label: KV Txn Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: txn.inflight_locks_over_tracking_budget + exported_name: txn_inflight_locks_over_tracking_budget + description: KV transactions whose in-flight writes and locking reads have exceeded the intent tracking memory budget (kv.transaction.max_intents_bytes). + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.parallelcommits + exported_name: txn_parallelcommits + description: Number of KV transaction parallel commits + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.parallelcommits.auto_retries + exported_name: txn_parallelcommits_auto_retries + description: Number of commit tries after successful failed parallel commit attempts + y_axis_label: Retries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.prepares + exported_name: txn_prepares + description: Number of prepared KV transactions + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.refresh.auto_retries + exported_name: txn_refresh_auto_retries + description: Number of request retries after successful client-side refreshes + y_axis_label: Retries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.refresh.fail + exported_name: txn_refresh_fail + description: Number of failed client-side transaction refreshes + y_axis_label: Refreshes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.refresh.fail_with_condensed_spans + exported_name: txn_refresh_fail_with_condensed_spans + description: Number of failed client-side refreshes for transactions whose read tracking lost fidelity because of condensing. Such a failure could be a false conflict. Failures counted here are also counted in txn.refresh.fail, and the respective transactions are also counted in txn.refresh.memory_limit_exceeded. + y_axis_label: Refreshes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.refresh.memory_limit_exceeded + exported_name: txn_refresh_memory_limit_exceeded + description: Number of transaction which exceed the refresh span bytes limit, causing their read spans to be condensed + y_axis_label: Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.refresh.success + exported_name: txn_refresh_success + description: Number of successful client-side transaction refreshes. A refresh may be preemptive or reactive. A reactive refresh is performed after a request throws an error because a refresh is needed for it to succeed. In these cases, the request will be re-issued as an auto-retry (see txn.refresh.auto_retries) after the refresh succeeds. + y_axis_label: Refreshes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.refresh.success_server_side + exported_name: txn_refresh_success_server_side + description: Number of successful server-side transaction refreshes + y_axis_label: Refreshes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.restarts + exported_name: txn_restarts + description: Number of restarted KV transactions + y_axis_label: KV Transactions + type: HISTOGRAM + unit: COUNT + aggregation: AVG + derivative: NONE + - name: txn.restarts.asyncwritefailure + exported_name: txn_restarts_asyncwritefailure + description: Number of restarts due to async consensus writes that failed to leave intents + y_axis_label: Restarted Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.restarts.commitdeadlineexceeded + exported_name: txn_restarts_commitdeadlineexceeded + description: Number of restarts due to a transaction exceeding its deadline + y_axis_label: Restarted Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.restarts.exclusionviolation + exported_name: txn_restarts_exclusionviolation + description: Number of restarts due to an exclusion violation + y_axis_label: Restarted Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.restarts.readwithinuncertainty + exported_name: txn_restarts_readwithinuncertainty + description: Number of restarts due to reading a new value within the uncertainty interval + y_axis_label: Restarted Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.rollbacks.async.failed + exported_name: txn_rollbacks_async_failed + description: Number of KV transactions that failed to send abort asynchronously which is not always retried + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.rollbacks.failed + exported_name: txn_rollbacks_failed + description: Number of KV transactions that failed to send final abort + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.write_buffering.batches.fully_handled + exported_name: txn_write_buffering_batches_fully_handled + description: Number of KV batches that were fully handled by the write buffer (not sent to KV) + y_axis_label: KV Batches + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.write_buffering.disabled_after_buffering + exported_name: txn_write_buffering_disabled_after_buffering + description: Number of KV transactions that disabled write buffering after buffering some writes but before an EndTxn request + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.write_buffering.memory_limit_exceeded + exported_name: txn_write_buffering_memory_limit_exceeded + description: Number of KV transactions that exceeded the write buffering memory limit + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.write_buffering.num_enabled + exported_name: txn_write_buffering_num_enabled + description: Number of KV transactions that enabled buffered writes + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE +- name: SERVER + categories: + - name: HARDWARE + metrics: + - name: sys.cpu.combined.percent-normalized + exported_name: sys_cpu_combined_percent_normalized + description: Current user+system cpu percentage consumed by the CRDB process, normalized 0-1 by number of cores + y_axis_label: CPU Time + type: GAUGE + unit: PERCENT + aggregation: AVG + derivative: NONE + how_to_use: "This metric gives the CPU utilization percentage by the CockroachDB process. \n\t\tIf it is equal to 1 (or 100%), then the CPU is overloaded. The CockroachDB process should \n\t\tnot be running with over 80% utilization for extended periods of time (hours). This metric \n\t\tis used in the DB Console CPU Percent graph." + essential: true + - name: sys.cpu.host.combined.percent-normalized + exported_name: sys_cpu_host_combined_percent_normalized + description: Current user+system cpu percentage across the whole machine, normalized 0-1 by number of cores + y_axis_label: CPU Time + type: GAUGE + unit: PERCENT + aggregation: AVG + derivative: NONE + how_to_use: "This metric gives the CPU utilization percentage of the\n\t\tunderlying server, virtual machine, or container hosting the\n\t\tCockroachDB process. It includes CPU usage from both CockroachDB\n\t\tand non-CockroachDB processes. It also accounts for time spent\n\t\tprocessing hardware (irq) and software (softirq) interrupts, as\n\t\twell as nice time, which represents low-priority user-mode\n\t\tactivity.\n\n A value of 1 (or 100%) indicates that the CPU is overloaded. Avoid\n running the CockroachDB process in an environment where the CPU\n remains overloaded for extended periods (e.g. multiple hours). This\n metric appears in the DB Console on the Host CPU Percent graph." + essential: true + - name: sys.cpu.sys.percent + exported_name: sys_cpu_sys_percent + description: Current system cpu percentage consumed by the CRDB process + y_axis_label: CPU Time + type: GAUGE + unit: PERCENT + aggregation: AVG + derivative: NONE + how_to_use: "This metric gives the CPU usage percentage at the system\n\t\t(Linux kernel) level by the CockroachDB process only. This is\n\t\tsimilar to the Linux top command output. The metric value can be\n\t\tmore than 1 (or 100%) on multi-core systems. It is best to combine\n\t\tuser and system metrics." + essential: true + - name: sys.cpu.user.percent + exported_name: sys_cpu_user_percent + description: Current user cpu percentage consumed by the CRDB process + y_axis_label: CPU Time + type: GAUGE + unit: PERCENT + aggregation: AVG + derivative: NONE + how_to_use: "This metric gives the CPU usage percentage at the user\n\t\tlevel by the CockroachDB process only. This is similar to the Linux\n\t\ttop command output. The metric value can be more than 1 (or 100%)\n\t\ton multi-core systems. It is best to combine user and system\n\t\tmetrics." + essential: true + - name: sys.host.disk.iopsinprogress + exported_name: sys_host_disk_iopsinprogress + description: IO operations currently in progress on this host (as reported by the OS) + y_axis_label: Operations + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: This metric gives the average queue length of the storage device. It characterizes the storage device's performance capability. All I/O performance metrics are Linux counters and correspond to the avgqu-sz in the Linux iostat command output. You need to view the device queue graph in the context of the actual read/write IOPS and MBPS metrics that show the actual device utilization. If the device is not keeping up, the queue will grow. Values over 10 are bad. Values around 5 mean the device is working hard trying to keep up. For internal (on chassis) NVMe devices, the queue values are typically 0. For network connected devices, such as AWS EBS volumes, the normal operating range of values is 1 to 2. Spikes in values are OK. They indicate an I/O spike where the device fell behind and then caught up. End users may experience inconsistent response times, but there should be no cluster stability issues. If the queue is greater than 5 for an extended period of time and IOPS or MBPS are low, then the storage is most likely not provisioned per Cockroach Labs guidance. In AWS EBS, it is commonly an EBS type, such as gp2, not suitable as database primary storage. If I/O is low and the queue is low, the most likely scenario is that the CPU is lacking and not driving I/O. One such case is a cluster with nodes with only 2 vcpus which is not supported sizing for production deployments. There are quite a few background processes in the database that take CPU away from the workload, so the workload is just not getting the CPU. Review storage and disk I/O. + essential: true + - name: sys.host.disk.read.bytes + exported_name: sys_host_disk_read_bytes + description: Bytes read from all disks since this process started (as reported by the OS) + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric reports the effective storage device read throughput (MB/s) rate. To confirm that storage is sufficiently provisioned, assess the I/O performance rates (IOPS and MBPS) in the context of the sys.host.disk.iopsinprogress metric. + essential: true + - name: sys.host.disk.read.count + exported_name: sys_host_disk_read_count + description: Disk read operations across all disks since this process started (as reported by the OS) + y_axis_label: Operations + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric reports the effective storage device read IOPS rate. To confirm that storage is sufficiently provisioned, assess the I/O performance rates (IOPS and MBPS) in the context of the sys.host.disk.iopsinprogress metric. + essential: true + - name: sys.host.disk.write.bytes + exported_name: sys_host_disk_write_bytes + description: Bytes written to all disks since this process started (as reported by the OS) + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric reports the effective storage device write throughput (MB/s) rate. To confirm that storage is sufficiently provisioned, assess the I/O performance rates (IOPS and MBPS) in the context of the sys.host.disk.iopsinprogress metric. + essential: true + - name: sys.host.disk.write.count + exported_name: sys_host_disk_write_count + description: Disk write operations across all disks since this process started (as reported by the OS) + y_axis_label: Operations + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric reports the effective storage device write IOPS rate. To confirm that storage is sufficiently provisioned, assess the I/O performance rates (IOPS and MBPS) in the context of the sys.host.disk.iopsinprogress metric. + essential: true + - name: sys.host.net.recv.bytes + exported_name: sys_host_net_recv_bytes + description: Bytes received on all network interfaces since this process started (as reported by the OS) + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric gives the node's ingress/egress network transfer rates for flat sections which may indicate insufficiently provisioned networking or high error rates. CockroachDB is using a reliable TCP/IP protocol, so errors result in delivery retries that create a "slow network" effect. + essential: true + - name: sys.host.net.send.bytes + exported_name: sys_host_net_send_bytes + description: Bytes sent on all network interfaces since this process started (as reported by the OS) + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric gives the node's ingress/egress network transfer rates for flat sections which may indicate insufficiently provisioned networking or high error rates. CockroachDB is using a reliable TCP/IP protocol, so errors result in delivery retries that create a "slow network" effect. + essential: true + - name: sys.rss + exported_name: sys_rss + description: Current process RSS + y_axis_label: RSS + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + how_to_use: "This metric gives the amount of RAM used by the\n\t\tCockroachDB process. Persistently low values over an extended\n\t\tperiod of time suggest there is underutilized memory that can be\n\t\tput to work with adjusted settings for --cache or --max_sql_memory\n\t\tor both. Conversely, a high utilization, even if a temporary spike,\n\t\tindicates an increased risk of Out-of-memory (OOM) crash\n\t\t(particularly since the swap is generally disabled)." + essential: true + - name: sys.runnable.goroutines.per.cpu + exported_name: sys_runnable_goroutines_per_cpu + description: Average number of goroutines that are waiting to run, normalized by number of cores + y_axis_label: goroutines + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: If this metric has a value over 30, it indicates a CPU overload. If the condition lasts a short period of time (a few seconds), the database users are likely to experience inconsistent response times. If the condition persists for an extended period of time (tens of seconds, or minutes) the cluster may start developing stability issues. Review CPU planning. + essential: true + - name: sys.uptime + exported_name: sys_uptime + description: Process uptime + y_axis_label: Uptime + type: COUNTER + unit: SECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric measures the length of time, in seconds, that the CockroachDB process has been running. Monitor this metric to detect events such as node restarts, which may require investigation or intervention. + essential: true + - name: UNSET + metrics: + - name: build.timestamp + exported_name: build_timestamp + description: Build information + y_axis_label: Build Time + type: GAUGE + unit: TIMESTAMP_SEC + aggregation: AVG + derivative: NONE + - name: go.scheduler_latency + exported_name: go_scheduler_latency + description: Go scheduling latency + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: log.buffered.messages.dropped + exported_name: log_buffered_messages_dropped + description: Count of log messages that are dropped by buffered log sinks. When CRDB attempts to buffer a log message in a buffered log sink whose buffer is already full, it drops the oldest buffered messages to make space for the new message + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: log.fluent.sink.conn.attempts + exported_name: log_fluent_sink_conn_attempts + description: Number of connection attempts experienced by fluent-server logging sinks + y_axis_label: Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: log.fluent.sink.conn.errors + exported_name: log_fluent_sink_conn_errors + description: Number of connection errors experienced by fluent-server logging sinks + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: log.fluent.sink.write.attempts + exported_name: log_fluent_sink_write_attempts + description: Number of write attempts experienced by fluent-server logging sinks + y_axis_label: Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: log.fluent.sink.write.errors + exported_name: log_fluent_sink_write_errors + description: Number of write errors experienced by fluent-server logging sinks + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: log.messages.count + exported_name: log_messages_count + description: Count of messages logged on the node since startup. Note that this does not measure the fan-out of single log messages to the various configured logging sinks. + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.cgo.allocbytes + exported_name: sys_cgo_allocbytes + description: Current bytes of memory allocated by cgo + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sys.cgo.totalbytes + exported_name: sys_cgo_totalbytes + description: Total bytes of memory allocated by cgo, but not released + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sys.cgocalls + exported_name: sys_cgocalls + description: Total number of cgo calls + y_axis_label: cgo Calls + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.cpu.now.ns + exported_name: sys_cpu_now_ns + description: The time when CPU measurements were taken, as nanoseconds since epoch + y_axis_label: CPU Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.cpu.sys.ns + exported_name: sys_cpu_sys_ns + description: Total system cpu time consumed by the CRDB process + y_axis_label: CPU Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.cpu.user.ns + exported_name: sys_cpu_user_ns + description: Total user cpu time consumed by the CRDB process + y_axis_label: CPU Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.fd.open + exported_name: sys_fd_open + description: Process open file descriptors + y_axis_label: File Descriptors + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sys.fd.softlimit + exported_name: sys_fd_softlimit + description: Process open FD soft limit + y_axis_label: File Descriptors + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sys.gc.assist.ns + exported_name: sys_gc_assist_ns + description: Estimated total CPU time user goroutines spent to assist the GC process + y_axis_label: CPU Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.gc.count + exported_name: sys_gc_count + description: Total number of GC runs + y_axis_label: GC Runs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.gc.pause.ns + exported_name: sys_gc_pause_ns + description: Total GC pause + y_axis_label: GC Pause + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.gc.pause.percent + exported_name: sys_gc_pause_percent + description: Current GC pause percentage + y_axis_label: GC Pause + type: GAUGE + unit: PERCENT + aggregation: AVG + derivative: NONE + - name: sys.gc.stop.ns + exported_name: sys_gc_stop_ns + description: Estimated GC stop-the-world stopping latencies + y_axis_label: GC Stopping + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sys.go.allocbytes + exported_name: sys_go_allocbytes + description: Current bytes of memory allocated by go + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sys.go.heap.allocbytes + exported_name: sys_go_heap_allocbytes + description: Cumulative bytes allocated for heap objects. + y_axis_label: Memory + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.go.heap.heapfragmentbytes + exported_name: sys_go_heap_heapfragmentbytes + description: Total heap fragmentation bytes, derived from bytes in in-use spans minus bytes allocated + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sys.go.heap.heapreleasedbytes + exported_name: sys_go_heap_heapreleasedbytes + description: Total bytes returned to the OS from heap. + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sys.go.heap.heapreservedbytes + exported_name: sys_go_heap_heapreservedbytes + description: Total bytes reserved by heap, derived from bytes in idle (unused) spans subtracts bytes returned to the OS + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sys.go.pause.other.ns + exported_name: sys_go_pause_other_ns + description: Estimated non-GC-related total pause time + y_axis_label: Non-GC Pause + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sys.go.stack.systembytes + exported_name: sys_go_stack_systembytes + description: Stack memory obtained from the OS. + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sys.go.stop.other.ns + exported_name: sys_go_stop_other_ns + description: Estimated non-GC-related stop-the-world stopping latencies + y_axis_label: Non-GC Stopping + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sys.go.totalbytes + exported_name: sys_go_totalbytes + description: Total bytes of memory allocated by go, but not released + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sys.goroutines + exported_name: sys_goroutines + description: Current number of goroutines + y_axis_label: goroutines + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sys.host.disk.io.time + exported_name: sys_host_disk_io_time + description: Time spent reading from or writing to all disks since this process started (as reported by the OS) + y_axis_label: Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.host.disk.read.time + exported_name: sys_host_disk_read_time + description: Time spent reading from all disks since this process started (as reported by the OS) + y_axis_label: Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.host.disk.weightedio.time + exported_name: sys_host_disk_weightedio_time + description: Weighted time spent reading from or writing to all disks since this process started (as reported by the OS) + y_axis_label: Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.host.disk.write.time + exported_name: sys_host_disk_write_time + description: Time spent writing to all disks since this process started (as reported by the OS) + y_axis_label: Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.host.net.recv.drop + exported_name: sys_host_net_recv_drop + description: Receiving packets that got dropped on all network interfaces since this process started (as reported by the OS) + y_axis_label: Packets + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.host.net.recv.err + exported_name: sys_host_net_recv_err + description: Error receiving packets on all network interfaces since this process started (as reported by the OS) + y_axis_label: Packets + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.host.net.recv.packets + exported_name: sys_host_net_recv_packets + description: Packets received on all network interfaces since this process started (as reported by the OS) + y_axis_label: Packets + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.host.net.send.drop + exported_name: sys_host_net_send_drop + description: Sending packets that got dropped on all network interfaces since this process started (as reported by the OS) + y_axis_label: Packets + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.host.net.send.err + exported_name: sys_host_net_send_err + description: Error on sending packets on all network interfaces since this process started (as reported by the OS) + y_axis_label: Packets + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.host.net.send.packets + exported_name: sys_host_net_send_packets + description: Packets sent on all network interfaces since this process started (as reported by the OS) + y_axis_label: Packets + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.totalmem + exported_name: sys_totalmem + description: Total memory (both free and used) + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE +- name: STORAGE + categories: + - name: OVERLOAD + metrics: + - name: admission.wait_durations.kv + exported_name: admission_wait_durations_kv + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + how_to_use: This metric shows if CPU utilization-based admission control feature is working effectively or potentially overaggressive. This is a latency histogram of how much delay was added to the workload due to throttling. If observing over 100ms waits for over 5 seconds while there was excess capacity available, then the admission control is overly aggressive. + essential: true + - name: admission.wait_durations.kv-stores + exported_name: admission_wait_durations_kv_stores + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + how_to_use: This metric shows if I/O utilization-based admission control feature is working effectively or potentially overaggressive. This is a latency histogram of how much delay was added to the workload due to throttling. If observing over 100ms waits for over 5 seconds while there was excess capacity available, then the admission control is overly aggressive. + essential: true + - name: REPLICATION + metrics: + - name: leases.transfers.success + exported_name: leases_transfers_success + description: Number of successful lease transfers + y_axis_label: Lease Transfers + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: A high number of lease transfers is not a negative or positive signal, rather it is a reflection of the elastic cluster activities. For example, this metric is high during cluster topology changes. A high value is often the reason for NotLeaseHolderErrors which are normal and expected during rebalancing. Observing this metric may provide a confirmation of the cause of such errors. + essential: true + - name: liveness.heartbeatlatency + exported_name: liveness_heartbeatlatency + description: Node liveness heartbeat latency + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + how_to_use: If this metric exceeds 1 second, it is a sign of cluster instability. + essential: true + - name: liveness.livenodes + exported_name: liveness_livenodes + description: Number of live nodes in the cluster (will be 0 if this node is not itself live) + y_axis_label: Nodes + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: This is a critical metric that tracks the live nodes in the cluster. + essential: true + - name: queue.replicate.replacedecommissioningreplica.error + exported_name: queue_replicate_replacedecommissioningreplica_error + description: Number of failed decommissioning replica replacements processed by the replicate queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: Refer to Decommission the node. + essential: true + - name: range.merges + exported_name: range_merges + description: Number of range merges + y_axis_label: Range Ops + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric indicates how fast a workload is scaling down. Merges are Cockroach's optimization for performance. This metric indicates that there have been deletes in the workload. + essential: true + - name: range.splits + exported_name: range_splits + description: Number of range splits + y_axis_label: Range Ops + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric indicates how fast a workload is scaling up. Spikes can indicate resource hotspots since the split heuristic is based on QPS. To understand whether hotspots are an issue and with which tables and indexes they are occurring, correlate this metric with other metrics such as CPU usage, such as sys.cpu.combined.percent-normalized, or use the Hot Ranges page. + essential: true + - name: ranges + exported_name: ranges + description: Number of ranges + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: This metric provides a measure of the scale of the data size. + essential: true + - name: ranges.unavailable + exported_name: ranges_unavailable + description: Number of ranges with fewer live replicas than needed for quorum + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: This metric is an indicator of replication issues. It shows whether the cluster is unhealthy and can impact workload. If an entire range is unavailable, then it will be unable to process queries. + essential: true + - name: ranges.underreplicated + exported_name: ranges_underreplicated + description: Number of ranges with fewer live replicas than the replication target + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: This metric is an indicator of replication issues. It shows whether the cluster has data that is not conforming to resilience goals. The next step is to determine the corresponding database object, such as the table or index, of these under-replicated ranges and whether the under-replication is temporarily expected. Use the statement SELECT table_name, index_name FROM [SHOW RANGES WITH INDEXES] WHERE range_id = {id of under-replicated range}; + essential: true + - name: rebalancing.cpunanospersecond + exported_name: rebalancing_cpunanospersecond + description: Average CPU nanoseconds spent on processing replica operations in the last 30 minutes. + y_axis_label: Nanoseconds/Sec + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + how_to_use: A high value of this metric could indicate that one of the store's replicas is part of a hot range. + essential: true + - name: rebalancing.lease.transfers + exported_name: rebalancing_lease_transfers + description: Number of lease transfers motivated by store-level load imbalances + y_axis_label: Lease Transfers + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: Used to identify when there has been more rebalancing activity triggered by imbalance between stores (of QPS or CPU). If this is high (when the count is rated), it indicates that more rebalancing activity is taking place due to load imbalance between stores. + essential: true + - name: rebalancing.queriespersecond + exported_name: rebalancing_queriespersecond + description: Number of kv-level requests received per second by the store, considering the last 30 minutes, as used in rebalancing decisions. + y_axis_label: Queries/Sec + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: This metric shows hotspots along the queries per second (QPS) dimension. It provides insights into the ongoing rebalancing activities. + essential: true + - name: rebalancing.range.rebalances + exported_name: rebalancing_range_rebalances + description: Number of range rebalance operations motivated by store-level load imbalances + y_axis_label: Range Rebalances + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: Used to identify when there has been more rebalancing activity triggered by imbalance between stores (of QPS or CPU). If this is high (when the count is rated), it indicates that more rebalancing activity is taking place due to load imbalance between stores. + essential: true + - name: rebalancing.replicas.cpunanospersecond + exported_name: rebalancing_replicas_cpunanospersecond + description: Histogram of average CPU nanoseconds spent on processing replica operations in the last 30 minutes. + y_axis_label: Nanoseconds/Sec + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + how_to_use: 'A high value of this metric could indicate that one of the store''s replicas is part of a hot range. See also the non-histogram variant: rebalancing.cpunanospersecond.' + essential: true + - name: rebalancing.replicas.queriespersecond + exported_name: rebalancing_replicas_queriespersecond + description: Histogram of average kv-level requests received per second by replicas on the store in the last 30 minutes. + y_axis_label: Queries/Sec + type: HISTOGRAM + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: 'A high value of this metric could indicate that one of the store''s replicas is part of a hot range. See also: rebalancing_replicas_cpunanospersecond.' + essential: true + - name: replicas + exported_name: replicas + description: Number of replicas + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: This metric provides an essential characterization of the data distribution across cluster nodes. + essential: true + - name: replicas.leaseholders + exported_name: replicas_leaseholders + description: Number of lease holders + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: This metric provides an essential characterization of the data processing points across cluster nodes. + essential: true + - name: SQL + metrics: + - name: txnwaitqueue.deadlocks_total + exported_name: txnwaitqueue_deadlocks_total + description: Number of deadlocks detected by the txn wait queue + y_axis_label: Deadlocks + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: Alert on this metric if its value is greater than zero, especially if transaction throughput is lower than expected. Applications should be able to detect and recover from deadlock errors. However, transaction performance and throughput can be maximized if the application logic avoids deadlock conditions in the first place, for example, by keeping transactions as short as possible. + essential: true + - name: STORAGE + metrics: + - name: admission.io.overload + exported_name: admission_io_overload + description: 1-normalized float indicating whether IO admission control considers the store as overloaded with respect to compaction out of L0 (considers sub-level and file counts). + y_axis_label: Threshold + type: GAUGE + unit: PERCENT + aggregation: AVG + derivative: NONE + how_to_use: If the value of this metric exceeds 1, then it indicates overload. You can also look at the metrics `storage.l0-num-files`, `storage.l0-sublevels` or `rocksdb.read-amplification` directly. A healthy LSM shape is defined as "read-amp < 20" and "L0-files < 1000", looking at cluster settings `admission.l0_sub_level_count_overload_threshold` and `admission.l0_file_count_overload_threshold` respectively. + essential: true + - name: capacity + exported_name: capacity + description: Total storage capacity + y_axis_label: Storage + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + how_to_use: 'This metric gives total storage capacity. Measurements should comply with the following rule: CockroachDB storage volumes should not be utilized more than 60% (40% free space).' + essential: true + - name: capacity.available + exported_name: capacity_available + description: Available storage capacity + y_axis_label: Storage + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + how_to_use: 'This metric gives available storage capacity. Measurements should comply with the following rule: CockroachDB storage volumes should not be utilized more than 60% (40% free space).' + essential: true + - name: capacity.used + exported_name: capacity_used + description: Used storage capacity + y_axis_label: Storage + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + how_to_use: 'This metric gives used storage capacity. Measurements should comply with the following rule: CockroachDB storage volumes should not be utilized more than 60% (40% free space).' + essential: true + - name: rocksdb.block.cache.hits + exported_name: rocksdb_block_cache_hits + description: Count of block cache hits + y_axis_label: Cache Ops + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric gives hits to block cache which is reserved memory. It is allocated upon the start of a node process by the `--cache` flag and never shrinks. By observing block cache hits and misses, you can fine-tune memory allocations in the node process for the demands of the workload. + essential: true + - name: rocksdb.block.cache.misses + exported_name: rocksdb_block_cache_misses + description: Count of block cache misses + y_axis_label: Cache Ops + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric gives misses to block cache which is reserved memory. It is allocated upon the start of a node process by the `--cache` flag and never shrinks. By observing block cache hits and misses, you can fine-tune memory allocations in the node process for the demands of the workload. + essential: true + - name: rocksdb.compactions + exported_name: rocksdb_compactions + description: Number of table compactions + y_axis_label: Compactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric reports the number of a node's LSM compactions. If the number of compactions remains elevated while the LSM health does not improve, compactions are not keeping up with the workload. If the condition persists for an extended period, the cluster will initially exhibit performance issues that will eventually escalate into stability issues. + essential: true + - name: storage.wal.fsync.latency + exported_name: storage_wal_fsync_latency + description: The write ahead log fsync latency + y_axis_label: Fsync Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + how_to_use: If this value is greater than `100ms`, it is an indication of a disk stall. To mitigate the effects of disk stalls, consider deploying your cluster with WAL failover configured. + essential: true + - name: storage.write-stalls + exported_name: storage_write_stalls + description: Number of instances of intentional write stalls to backpressure incoming writes + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric reports actual disk stall events. Ideally, investigate all reports of disk stalls. As a pratical guideline, one stall per minute is not likely to have a material impact on workload beyond an occasional increase in response time. However one stall per second should be viewed as problematic and investigated actively. It is particularly problematic if the rate persists over an extended period of time, and worse, if it is increasing. + essential: true + - name: UNSET + metrics: + - name: abortspanbytes + exported_name: abortspanbytes + description: Number of bytes in the abort span + y_axis_label: Storage + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: addsstable.applications + exported_name: addsstable_applications + description: Number of SSTable ingestions applied (i.e. applied by Replicas) + y_axis_label: Ingestions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: addsstable.aswrites + exported_name: addsstable_aswrites + description: | + Number of SSTables ingested as normal writes. + + These AddSSTable requests do not count towards the addsstable metrics + 'proposals', 'applications', or 'copies', as they are not ingested as AddSSTable + Raft commands, but rather normal write commands. However, if these requests get + throttled they do count towards 'delay.total' and 'delay.enginebackpressure'. + y_axis_label: Ingestions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: addsstable.copies + exported_name: addsstable_copies + description: number of SSTable ingestions that required copying files during application + y_axis_label: Ingestions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: addsstable.delay.enginebackpressure + exported_name: addsstable_delay_enginebackpressure + description: Amount by which evaluation of AddSSTable requests was delayed by storage-engine backpressure + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: addsstable.delay.total + exported_name: addsstable_delay_total + description: Amount by which evaluation of AddSSTable requests was delayed + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: addsstable.proposals + exported_name: addsstable_proposals + description: Number of SSTable ingestions proposed (i.e. sent to Raft by lease holders) + y_axis_label: Ingestions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.elastic-cpu + exported_name: admission_admitted_elastic_cpu + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.elastic-cpu.bulk-normal-pri + exported_name: admission_admitted_elastic_cpu_bulk_normal_pri + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.elastic-cpu.normal-pri + exported_name: admission_admitted_elastic_cpu_normal_pri + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.elastic-stores + exported_name: admission_admitted_elastic_stores + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.elastic-stores.bulk-low-pri + exported_name: admission_admitted_elastic_stores_bulk_low_pri + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.elastic-stores.bulk-normal-pri + exported_name: admission_admitted_elastic_stores_bulk_normal_pri + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.kv + exported_name: admission_admitted_kv + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.kv-stores + exported_name: admission_admitted_kv_stores + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.kv-stores.high-pri + exported_name: admission_admitted_kv_stores_high_pri + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.kv-stores.locking-normal-pri + exported_name: admission_admitted_kv_stores_locking_normal_pri + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.kv-stores.normal-pri + exported_name: admission_admitted_kv_stores_normal_pri + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.kv-stores.user-high-pri + exported_name: admission_admitted_kv_stores_user_high_pri + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.kv.high-pri + exported_name: admission_admitted_kv_high_pri + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.kv.locking-normal-pri + exported_name: admission_admitted_kv_locking_normal_pri + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.kv.normal-pri + exported_name: admission_admitted_kv_normal_pri + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.sql-kv-response + exported_name: admission_admitted_sql_kv_response + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.sql-kv-response.locking-normal-pri + exported_name: admission_admitted_sql_kv_response_locking_normal_pri + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.sql-kv-response.normal-pri + exported_name: admission_admitted_sql_kv_response_normal_pri + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.sql-sql-response + exported_name: admission_admitted_sql_sql_response + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.sql-sql-response.locking-normal-pri + exported_name: admission_admitted_sql_sql_response_locking_normal_pri + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.sql-sql-response.normal-pri + exported_name: admission_admitted_sql_sql_response_normal_pri + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.elastic_cpu.acquired_nanos + exported_name: admission_elastic_cpu_acquired_nanos + description: Total CPU nanoseconds acquired by elastic work + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.elastic_cpu.available_nanos + exported_name: admission_elastic_cpu_available_nanos + description: Instantaneous available CPU nanoseconds per second ignoring utilization limit + y_axis_label: Nanoseconds + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.elastic_cpu.max_available_nanos + exported_name: admission_elastic_cpu_max_available_nanos + description: Maximum available CPU nanoseconds per second ignoring utilization limit + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.elastic_cpu.nanos_exhausted_duration + exported_name: admission_elastic_cpu_nanos_exhausted_duration + description: Total duration when elastic CPU nanoseconds were exhausted, in micros + y_axis_label: Microseconds + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.elastic_cpu.over_limit_durations + exported_name: admission_elastic_cpu_over_limit_durations + description: Measurement of how much over the prescribed limit elastic requests ran (not recorded if requests don't run over) + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.elastic_cpu.pre_work_nanos + exported_name: admission_elastic_cpu_pre_work_nanos + description: Total CPU nanoseconds spent doing pre-work, before doing elastic work + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.elastic_cpu.returned_nanos + exported_name: admission_elastic_cpu_returned_nanos + description: Total CPU nanoseconds returned by elastic work + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.elastic_cpu.utilization + exported_name: admission_elastic_cpu_utilization + description: CPU utilization by elastic work + y_axis_label: CPU Time + type: GAUGE + unit: PERCENT + aggregation: AVG + derivative: NONE + - name: admission.elastic_cpu.utilization_limit + exported_name: admission_elastic_cpu_utilization_limit + description: Utilization limit set for the elastic CPU work + y_axis_label: CPU Time + type: GAUGE + unit: PERCENT + aggregation: AVG + derivative: NONE + - name: admission.errored.elastic-cpu + exported_name: admission_errored_elastic_cpu + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.elastic-cpu.bulk-normal-pri + exported_name: admission_errored_elastic_cpu_bulk_normal_pri + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.elastic-cpu.normal-pri + exported_name: admission_errored_elastic_cpu_normal_pri + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.elastic-stores + exported_name: admission_errored_elastic_stores + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.elastic-stores.bulk-low-pri + exported_name: admission_errored_elastic_stores_bulk_low_pri + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.elastic-stores.bulk-normal-pri + exported_name: admission_errored_elastic_stores_bulk_normal_pri + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.kv + exported_name: admission_errored_kv + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.kv-stores + exported_name: admission_errored_kv_stores + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.kv-stores.high-pri + exported_name: admission_errored_kv_stores_high_pri + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.kv-stores.locking-normal-pri + exported_name: admission_errored_kv_stores_locking_normal_pri + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.kv-stores.normal-pri + exported_name: admission_errored_kv_stores_normal_pri + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.kv-stores.user-high-pri + exported_name: admission_errored_kv_stores_user_high_pri + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.kv.high-pri + exported_name: admission_errored_kv_high_pri + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.kv.locking-normal-pri + exported_name: admission_errored_kv_locking_normal_pri + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.kv.normal-pri + exported_name: admission_errored_kv_normal_pri + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.sql-kv-response + exported_name: admission_errored_sql_kv_response + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.sql-kv-response.locking-normal-pri + exported_name: admission_errored_sql_kv_response_locking_normal_pri + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.sql-kv-response.normal-pri + exported_name: admission_errored_sql_kv_response_normal_pri + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.sql-sql-response + exported_name: admission_errored_sql_sql_response + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.sql-sql-response.locking-normal-pri + exported_name: admission_errored_sql_sql_response_locking_normal_pri + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.sql-sql-response.normal-pri + exported_name: admission_errored_sql_sql_response_normal_pri + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.granter.cpu_load_long_period_duration.kv + exported_name: admission_granter_cpu_load_long_period_duration_kv + description: Total duration when CPULoad was being called with a long period, in micros + y_axis_label: Microseconds + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.granter.cpu_load_short_period_duration.kv + exported_name: admission_granter_cpu_load_short_period_duration_kv + description: Total duration when CPULoad was being called with a short period, in micros + y_axis_label: Microseconds + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.granter.elastic_io_tokens_available.kv + exported_name: admission_granter_elastic_io_tokens_available_kv + description: Number of tokens available + y_axis_label: Tokens + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.granter.elastic_io_tokens_exhausted_duration.kv + exported_name: admission_granter_elastic_io_tokens_exhausted_duration_kv + description: Total duration when Elastic IO tokens were exhausted, in micros + y_axis_label: Microseconds + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.granter.io_tokens_available.kv + exported_name: admission_granter_io_tokens_available_kv + description: Number of tokens available + y_axis_label: Tokens + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.granter.io_tokens_bypassed.kv + exported_name: admission_granter_io_tokens_bypassed_kv + description: Total number of tokens taken by work bypassing admission control (for example, follower writes without flow control) + y_axis_label: Tokens + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.granter.io_tokens_exhausted_duration.kv + exported_name: admission_granter_io_tokens_exhausted_duration_kv + description: Total duration when IO tokens were exhausted, in micros + y_axis_label: Microseconds + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.granter.io_tokens_returned.kv + exported_name: admission_granter_io_tokens_returned_kv + description: Total number of tokens returned + y_axis_label: Tokens + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.granter.io_tokens_taken.kv + exported_name: admission_granter_io_tokens_taken_kv + description: Total number of tokens taken + y_axis_label: Tokens + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.granter.slot_adjuster_decrements.kv + exported_name: admission_granter_slot_adjuster_decrements_kv + description: Number of decrements of the total KV slots + y_axis_label: Slots + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.granter.slot_adjuster_increments.kv + exported_name: admission_granter_slot_adjuster_increments_kv + description: Number of increments of the total KV slots + y_axis_label: Slots + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.granter.slots_exhausted_duration.kv + exported_name: admission_granter_slots_exhausted_duration_kv + description: Total duration when KV slots were exhausted, in micros + y_axis_label: Microseconds + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.granter.total_slots.kv + exported_name: admission_granter_total_slots_kv + description: Total slots for kv work + y_axis_label: Slots + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.granter.used_slots.kv + exported_name: admission_granter_used_slots_kv + description: Used slots + y_axis_label: Slots + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.l0_compacted_bytes.kv + exported_name: admission_l0_compacted_bytes_kv + description: Total bytes compacted out of L0 (used to generate IO tokens) + y_axis_label: Tokens + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.l0_tokens_produced.kv + exported_name: admission_l0_tokens_produced_kv + description: Total bytes produced for L0 writes + y_axis_label: Tokens + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.raft.paused_replicas + exported_name: admission_raft_paused_replicas + description: |- + Number of followers (i.e. Replicas) to which replication is currently paused to help them recover from I/O overload. + + Such Replicas will be ignored for the purposes of proposal quota, and will not + receive replication traffic. They are essentially treated as offline for the + purpose of replication. This serves as a crude form of admission control. + + The count is emitted by the leaseholder of each range. + y_axis_label: Followers + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.raft.paused_replicas_dropped_msgs + exported_name: admission_raft_paused_replicas_dropped_msgs + description: |- + Number of messages dropped instead of being sent to paused replicas. + + The messages are dropped to help these replicas to recover from I/O overload. + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.elastic-cpu + exported_name: admission_requested_elastic_cpu + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.elastic-cpu.bulk-normal-pri + exported_name: admission_requested_elastic_cpu_bulk_normal_pri + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.elastic-cpu.normal-pri + exported_name: admission_requested_elastic_cpu_normal_pri + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.elastic-stores + exported_name: admission_requested_elastic_stores + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.elastic-stores.bulk-low-pri + exported_name: admission_requested_elastic_stores_bulk_low_pri + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.elastic-stores.bulk-normal-pri + exported_name: admission_requested_elastic_stores_bulk_normal_pri + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.kv + exported_name: admission_requested_kv + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.kv-stores + exported_name: admission_requested_kv_stores + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.kv-stores.high-pri + exported_name: admission_requested_kv_stores_high_pri + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.kv-stores.locking-normal-pri + exported_name: admission_requested_kv_stores_locking_normal_pri + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.kv-stores.normal-pri + exported_name: admission_requested_kv_stores_normal_pri + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.kv-stores.user-high-pri + exported_name: admission_requested_kv_stores_user_high_pri + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.kv.high-pri + exported_name: admission_requested_kv_high_pri + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.kv.locking-normal-pri + exported_name: admission_requested_kv_locking_normal_pri + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.kv.normal-pri + exported_name: admission_requested_kv_normal_pri + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.sql-kv-response + exported_name: admission_requested_sql_kv_response + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.sql-kv-response.locking-normal-pri + exported_name: admission_requested_sql_kv_response_locking_normal_pri + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.sql-kv-response.normal-pri + exported_name: admission_requested_sql_kv_response_normal_pri + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.sql-sql-response + exported_name: admission_requested_sql_sql_response + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.sql-sql-response.locking-normal-pri + exported_name: admission_requested_sql_sql_response_locking_normal_pri + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.sql-sql-response.normal-pri + exported_name: admission_requested_sql_sql_response_normal_pri + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.scheduler_latency_listener.p99_nanos + exported_name: admission_scheduler_latency_listener_p99_nanos + description: The scheduling latency at p99 as observed by the scheduler latency listener + y_axis_label: Nanoseconds + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.elastic-cpu + exported_name: admission_wait_durations_elastic_cpu + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.elastic-cpu.bulk-normal-pri + exported_name: admission_wait_durations_elastic_cpu_bulk_normal_pri + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.elastic-cpu.normal-pri + exported_name: admission_wait_durations_elastic_cpu_normal_pri + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.elastic-stores + exported_name: admission_wait_durations_elastic_stores + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.elastic-stores.bulk-low-pri + exported_name: admission_wait_durations_elastic_stores_bulk_low_pri + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.elastic-stores.bulk-normal-pri + exported_name: admission_wait_durations_elastic_stores_bulk_normal_pri + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.kv-stores.high-pri + exported_name: admission_wait_durations_kv_stores_high_pri + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.kv-stores.locking-normal-pri + exported_name: admission_wait_durations_kv_stores_locking_normal_pri + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.kv-stores.normal-pri + exported_name: admission_wait_durations_kv_stores_normal_pri + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.kv-stores.user-high-pri + exported_name: admission_wait_durations_kv_stores_user_high_pri + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.kv.high-pri + exported_name: admission_wait_durations_kv_high_pri + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.kv.locking-normal-pri + exported_name: admission_wait_durations_kv_locking_normal_pri + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.kv.normal-pri + exported_name: admission_wait_durations_kv_normal_pri + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.snapshot_ingest + exported_name: admission_wait_durations_snapshot_ingest + description: Wait time for snapshot ingest requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.sql-kv-response + exported_name: admission_wait_durations_sql_kv_response + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.sql-kv-response.locking-normal-pri + exported_name: admission_wait_durations_sql_kv_response_locking_normal_pri + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.sql-kv-response.normal-pri + exported_name: admission_wait_durations_sql_kv_response_normal_pri + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.sql-sql-response + exported_name: admission_wait_durations_sql_sql_response + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.sql-sql-response.locking-normal-pri + exported_name: admission_wait_durations_sql_sql_response_locking_normal_pri + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.sql-sql-response.normal-pri + exported_name: admission_wait_durations_sql_sql_response_normal_pri + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.elastic-cpu + exported_name: admission_wait_queue_length_elastic_cpu + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.elastic-cpu.bulk-normal-pri + exported_name: admission_wait_queue_length_elastic_cpu_bulk_normal_pri + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.elastic-cpu.normal-pri + exported_name: admission_wait_queue_length_elastic_cpu_normal_pri + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.elastic-stores + exported_name: admission_wait_queue_length_elastic_stores + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.elastic-stores.bulk-low-pri + exported_name: admission_wait_queue_length_elastic_stores_bulk_low_pri + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.elastic-stores.bulk-normal-pri + exported_name: admission_wait_queue_length_elastic_stores_bulk_normal_pri + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.kv + exported_name: admission_wait_queue_length_kv + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.kv-stores + exported_name: admission_wait_queue_length_kv_stores + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.kv-stores.high-pri + exported_name: admission_wait_queue_length_kv_stores_high_pri + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.kv-stores.locking-normal-pri + exported_name: admission_wait_queue_length_kv_stores_locking_normal_pri + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.kv-stores.normal-pri + exported_name: admission_wait_queue_length_kv_stores_normal_pri + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.kv-stores.user-high-pri + exported_name: admission_wait_queue_length_kv_stores_user_high_pri + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.kv.high-pri + exported_name: admission_wait_queue_length_kv_high_pri + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.kv.locking-normal-pri + exported_name: admission_wait_queue_length_kv_locking_normal_pri + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.kv.normal-pri + exported_name: admission_wait_queue_length_kv_normal_pri + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.sql-kv-response + exported_name: admission_wait_queue_length_sql_kv_response + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.sql-kv-response.locking-normal-pri + exported_name: admission_wait_queue_length_sql_kv_response_locking_normal_pri + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.sql-kv-response.normal-pri + exported_name: admission_wait_queue_length_sql_kv_response_normal_pri + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.sql-sql-response + exported_name: admission_wait_queue_length_sql_sql_response + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.sql-sql-response.locking-normal-pri + exported_name: admission_wait_queue_length_sql_sql_response_locking_normal_pri + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.sql-sql-response.normal-pri + exported_name: admission_wait_queue_length_sql_sql_response_normal_pri + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: batch_requests.bytes + exported_name: batch_requests_bytes + description: Total byte count of batch requests processed + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: batch_requests.cross_region.bytes + exported_name: batch_requests_cross_region_bytes + description: "Total byte count of batch requests processed cross region when region\n\t\ttiers are configured" + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: batch_requests.cross_zone.bytes + exported_name: batch_requests_cross_zone_bytes + description: "Total bytes of batch requests processed cross zones within the same\n\t\tregion when zone tiers are configured. If region tiers are not set, it is\n\t\tassumed to be within the same region. To ensure accurate monitoring of\n\t\tcross-zone data transfer, region and zone tiers should be consistently\n\t\tconfigured across all nodes." + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: batch_responses.bytes + exported_name: batch_responses_bytes + description: Total byte count of batch responses received + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: batch_responses.cross_region.bytes + exported_name: batch_responses_cross_region_bytes + description: "Total byte count of batch responses received cross region when region\n\t\ttiers are configured" + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: batch_responses.cross_zone.bytes + exported_name: batch_responses_cross_zone_bytes + description: "Total bytes of batch responses received cross zones within the same\n\t\tregion when zone tiers are configured. If region tiers are not set, it is\n\t\tassumed to be within the same region. To ensure accurate monitoring of\n\t\tcross-zone data transfer, region and zone tiers should be consistently\n\t\tconfigured across all nodes." + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: capacity.reserved + exported_name: capacity_reserved + description: Capacity reserved for snapshots + y_axis_label: Storage + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: exec.error + exported_name: exec_error + description: |- + Number of batch KV requests that failed to execute on this node. + + This count excludes transaction restart/abort errors. However, it will include + other errors expected during normal operation, such as ConditionFailedError. + This metric is thus not an indicator of KV health. + y_axis_label: Batch KV Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: exec.latency + exported_name: exec_latency + description: | + Latency of batch KV requests (including errors) executed on this node. + + This measures requests already addressed to a single replica, from the moment + at which they arrive at the internal gRPC endpoint to the moment at which the + response (or an error) is returned. + + This latency includes in particular commit waits, conflict resolution and replication, + and end-users can easily produce high measurements via long-running transactions that + conflict with foreground traffic. This metric thus does not provide a good signal for + understanding the health of the KV layer. + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: exec.success + exported_name: exec_success + description: | + Number of batch KV requests executed successfully on this node. + + A request is considered to have executed 'successfully' if it either returns a result + or a transaction restart/abort error. + y_axis_label: Batch KV Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: exportrequest.delay.total + exported_name: exportrequest_delay_total + description: Amount by which evaluation of Export requests was delayed + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: follower_reads.success_count + exported_name: follower_reads_success_count + description: Number of reads successfully processed by any replica + y_axis_label: Read Ops + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: gcbytesage + exported_name: gcbytesage + description: Cumulative age of non-live data + y_axis_label: Age + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: gossip.bytes.received + exported_name: gossip_bytes_received + description: Number of received gossip bytes + y_axis_label: Gossip Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: gossip.bytes.sent + exported_name: gossip_bytes_sent + description: Number of sent gossip bytes + y_axis_label: Gossip Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: gossip.callbacks.pending + exported_name: gossip_callbacks_pending + description: Number of gossip callbacks waiting to be processed + y_axis_label: Callbacks + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: gossip.callbacks.pending_duration + exported_name: gossip_callbacks_pending_duration + description: Duration of gossip callback queueing to be processed + y_axis_label: Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: gossip.callbacks.processed + exported_name: gossip_callbacks_processed + description: Number of gossip callbacks processed + y_axis_label: Callbacks + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: gossip.callbacks.processing_duration + exported_name: gossip_callbacks_processing_duration + description: Duration of gossip callback processing + y_axis_label: Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: gossip.connections.incoming + exported_name: gossip_connections_incoming + description: Number of active incoming gossip connections + y_axis_label: Connections + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: gossip.connections.outgoing + exported_name: gossip_connections_outgoing + description: Number of active outgoing gossip connections + y_axis_label: Connections + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: gossip.connections.refused + exported_name: gossip_connections_refused + description: Number of refused incoming gossip connections + y_axis_label: Connections + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: gossip.infos.received + exported_name: gossip_infos_received + description: Number of received gossip Info objects + y_axis_label: Infos + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: gossip.infos.sent + exported_name: gossip_infos_sent + description: Number of sent gossip Info objects + y_axis_label: Infos + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: gossip.messages.received + exported_name: gossip_messages_received + description: Number of received gossip messages + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: gossip.messages.sent + exported_name: gossip_messages_sent + description: Number of sent gossip messages + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: intentage + exported_name: intentage + description: Cumulative age of locks + y_axis_label: Age + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: intentbytes + exported_name: intentbytes + description: Number of bytes in intent KV pairs + y_axis_label: Storage + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: intentcount + exported_name: intentcount + description: Count of intent keys + y_axis_label: Keys + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: intentresolver.async.throttled + exported_name: intentresolver_async_throttled + description: Number of intent resolution attempts not run asynchronously due to throttling + y_axis_label: Intent Resolutions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: intentresolver.finalized_txns.failed + exported_name: intentresolver_finalized_txns_failed + description: Number of finalized transaction cleanup failures. Transaction cleanup refers to the process of resolving all of a transactions intents and then garbage collecting its transaction record. + y_axis_label: Intent Resolutions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: intentresolver.intents.failed + exported_name: intentresolver_intents_failed + description: Number of intent resolution failures. The unit of measurement is a single intent, so if a batch of intent resolution requests fails, the metric will be incremented for each request in the batch. + y_axis_label: Intent Resolutions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: intents.abort-attempts + exported_name: intents_abort_attempts + description: Count of (point or range) non-poisoning intent abort evaluation attempts + y_axis_label: Operations + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: intents.poison-attempts + exported_name: intents_poison_attempts + description: Count of (point or range) poisoning intent abort evaluation attempts + y_axis_label: Operations + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: intents.resolve-attempts + exported_name: intents_resolve_attempts + description: Count of (point or range) intent commit evaluation attempts + y_axis_label: Operations + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: keybytes + exported_name: keybytes + description: Number of bytes taken up by keys + y_axis_label: Storage + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: keycount + exported_name: keycount + description: Count of all keys + y_axis_label: Keys + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.allocator.load_based_lease_transfers.cannot_find_better_candidate + exported_name: kv_allocator_load_based_lease_transfers_cannot_find_better_candidate + description: The number times the allocator determined that the lease was on the best possible replica + y_axis_label: Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.allocator.load_based_lease_transfers.delta_not_significant + exported_name: kv_allocator_load_based_lease_transfers_delta_not_significant + description: The number times the allocator determined that the delta between the existing store and the best candidate was not significant + y_axis_label: Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.allocator.load_based_lease_transfers.existing_not_overfull + exported_name: kv_allocator_load_based_lease_transfers_existing_not_overfull + description: The number times the allocator determined that the lease was not on an overfull store + y_axis_label: Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.allocator.load_based_lease_transfers.follow_the_workload + exported_name: kv_allocator_load_based_lease_transfers_follow_the_workload + description: The number times the allocator determined that the lease should be transferred to another replica for locality. + y_axis_label: Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.allocator.load_based_lease_transfers.missing_stats_for_existing_stores + exported_name: kv_allocator_load_based_lease_transfers_missing_stats_for_existing_stores + description: The number times the allocator was missing qps stats for the leaseholder + y_axis_label: Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.allocator.load_based_lease_transfers.should_transfer + exported_name: kv_allocator_load_based_lease_transfers_should_transfer + description: The number times the allocator determined that the lease should be transferred to another replica for better load distribution + y_axis_label: Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.allocator.load_based_replica_rebalancing.cannot_find_better_candidate + exported_name: kv_allocator_load_based_replica_rebalancing_cannot_find_better_candidate + description: The number times the allocator determined that the range was on the best possible stores + y_axis_label: Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.allocator.load_based_replica_rebalancing.delta_not_significant + exported_name: kv_allocator_load_based_replica_rebalancing_delta_not_significant + description: The number times the allocator determined that the delta between an existing store and the best replacement candidate was not high enough + y_axis_label: Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.allocator.load_based_replica_rebalancing.existing_not_overfull + exported_name: kv_allocator_load_based_replica_rebalancing_existing_not_overfull + description: The number times the allocator determined that none of the range's replicas were on overfull stores + y_axis_label: Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.allocator.load_based_replica_rebalancing.missing_stats_for_existing_store + exported_name: kv_allocator_load_based_replica_rebalancing_missing_stats_for_existing_store + description: The number times the allocator was missing the qps stats for the existing store + y_axis_label: Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.allocator.load_based_replica_rebalancing.should_transfer + exported_name: kv_allocator_load_based_replica_rebalancing_should_transfer + description: The number times the allocator determined that the replica should be rebalanced to another store for better load distribution + y_axis_label: Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.closed_timestamp.max_behind_nanos + exported_name: kv_closed_timestamp_max_behind_nanos + description: Largest latency between realtime and replica max closed timestamp + y_axis_label: Nanoseconds + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lag_by_cluster_setting + exported_name: kv_closed_timestamp_policy_lag_by_cluster_setting + description: Number of ranges with LAG_BY_CLUSTER_SETTING closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lead_for_global_reads_latency_equal_or_greater_than_300ms + exported_name: kv_closed_timestamp_policy_lead_for_global_reads_latency_equal_or_greater_than_300ms + description: Number of ranges with LEAD_FOR_GLOBAL_READS_LATENCY_EQUAL_OR_GREATER_THAN_300MS closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lead_for_global_reads_latency_less_than_100ms + exported_name: kv_closed_timestamp_policy_lead_for_global_reads_latency_less_than_100ms + description: Number of ranges with LEAD_FOR_GLOBAL_READS_LATENCY_LESS_THAN_100MS closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lead_for_global_reads_latency_less_than_120ms + exported_name: kv_closed_timestamp_policy_lead_for_global_reads_latency_less_than_120ms + description: Number of ranges with LEAD_FOR_GLOBAL_READS_LATENCY_LESS_THAN_120MS closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lead_for_global_reads_latency_less_than_140ms + exported_name: kv_closed_timestamp_policy_lead_for_global_reads_latency_less_than_140ms + description: Number of ranges with LEAD_FOR_GLOBAL_READS_LATENCY_LESS_THAN_140MS closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lead_for_global_reads_latency_less_than_160ms + exported_name: kv_closed_timestamp_policy_lead_for_global_reads_latency_less_than_160ms + description: Number of ranges with LEAD_FOR_GLOBAL_READS_LATENCY_LESS_THAN_160MS closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lead_for_global_reads_latency_less_than_180ms + exported_name: kv_closed_timestamp_policy_lead_for_global_reads_latency_less_than_180ms + description: Number of ranges with LEAD_FOR_GLOBAL_READS_LATENCY_LESS_THAN_180MS closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lead_for_global_reads_latency_less_than_200ms + exported_name: kv_closed_timestamp_policy_lead_for_global_reads_latency_less_than_200ms + description: Number of ranges with LEAD_FOR_GLOBAL_READS_LATENCY_LESS_THAN_200MS closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lead_for_global_reads_latency_less_than_20ms + exported_name: kv_closed_timestamp_policy_lead_for_global_reads_latency_less_than_20ms + description: Number of ranges with LEAD_FOR_GLOBAL_READS_LATENCY_LESS_THAN_20MS closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lead_for_global_reads_latency_less_than_220ms + exported_name: kv_closed_timestamp_policy_lead_for_global_reads_latency_less_than_220ms + description: Number of ranges with LEAD_FOR_GLOBAL_READS_LATENCY_LESS_THAN_220MS closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lead_for_global_reads_latency_less_than_240ms + exported_name: kv_closed_timestamp_policy_lead_for_global_reads_latency_less_than_240ms + description: Number of ranges with LEAD_FOR_GLOBAL_READS_LATENCY_LESS_THAN_240MS closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lead_for_global_reads_latency_less_than_260ms + exported_name: kv_closed_timestamp_policy_lead_for_global_reads_latency_less_than_260ms + description: Number of ranges with LEAD_FOR_GLOBAL_READS_LATENCY_LESS_THAN_260MS closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lead_for_global_reads_latency_less_than_280ms + exported_name: kv_closed_timestamp_policy_lead_for_global_reads_latency_less_than_280ms + description: Number of ranges with LEAD_FOR_GLOBAL_READS_LATENCY_LESS_THAN_280MS closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lead_for_global_reads_latency_less_than_300ms + exported_name: kv_closed_timestamp_policy_lead_for_global_reads_latency_less_than_300ms + description: Number of ranges with LEAD_FOR_GLOBAL_READS_LATENCY_LESS_THAN_300MS closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lead_for_global_reads_latency_less_than_40ms + exported_name: kv_closed_timestamp_policy_lead_for_global_reads_latency_less_than_40ms + description: Number of ranges with LEAD_FOR_GLOBAL_READS_LATENCY_LESS_THAN_40MS closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lead_for_global_reads_latency_less_than_60ms + exported_name: kv_closed_timestamp_policy_lead_for_global_reads_latency_less_than_60ms + description: Number of ranges with LEAD_FOR_GLOBAL_READS_LATENCY_LESS_THAN_60MS closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lead_for_global_reads_latency_less_than_80ms + exported_name: kv_closed_timestamp_policy_lead_for_global_reads_latency_less_than_80ms + description: Number of ranges with LEAD_FOR_GLOBAL_READS_LATENCY_LESS_THAN_80MS closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lead_for_global_reads_with_no_latency_info + exported_name: kv_closed_timestamp_policy_lead_for_global_reads_with_no_latency_info + description: Number of ranges with LEAD_FOR_GLOBAL_READS_WITH_NO_LATENCY_INFO closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy_change + exported_name: kv_closed_timestamp_policy_change + description: Number of times closed timestamp policy change occurred on ranges + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.closed_timestamp.policy_latency_info_missing + exported_name: kv_closed_timestamp_policy_latency_info_missing + description: Number of times closed timestamp policy refresh had to use hardcoded network RTT due to missing node latency info for one or more replicas + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.concurrency.avg_lock_hold_duration_nanos + exported_name: kv_concurrency_avg_lock_hold_duration_nanos + description: Average lock hold duration across locks currently held in lock tables. Does not include replicated locks (intents) that are not held in memory + y_axis_label: Nanoseconds + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: kv.concurrency.avg_lock_wait_duration_nanos + exported_name: kv_concurrency_avg_lock_wait_duration_nanos + description: Average lock wait duration across requests currently waiting in lock wait-queues + y_axis_label: Nanoseconds + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: kv.concurrency.latch_conflict_wait_durations + exported_name: kv_concurrency_latch_conflict_wait_durations + description: Durations in nanoseconds spent on latch acquisition waiting for conflicts with other latches + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: kv.concurrency.lock_wait_queue_waiters + exported_name: kv_concurrency_lock_wait_queue_waiters + description: Number of requests actively waiting in a lock wait-queue + y_axis_label: Lock-Queue Waiters + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.concurrency.locks + exported_name: kv_concurrency_locks + description: Number of active locks held in lock tables. Does not include replicated locks (intents) that are not held in memory + y_axis_label: Locks + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.concurrency.locks_with_wait_queues + exported_name: kv_concurrency_locks_with_wait_queues + description: Number of active locks held in lock tables with active wait-queues + y_axis_label: Locks + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.concurrency.max_lock_hold_duration_nanos + exported_name: kv_concurrency_max_lock_hold_duration_nanos + description: Maximum length of time any lock in a lock table is held. Does not include replicated locks (intents) that are not held in memory + y_axis_label: Nanoseconds + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: kv.concurrency.max_lock_wait_duration_nanos + exported_name: kv_concurrency_max_lock_wait_duration_nanos + description: Maximum lock wait duration across requests currently waiting in lock wait-queues + y_axis_label: Nanoseconds + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: kv.concurrency.max_lock_wait_queue_waiters_for_lock + exported_name: kv_concurrency_max_lock_wait_queue_waiters_for_lock + description: Maximum number of requests actively waiting in any single lock wait-queue + y_axis_label: Lock-Queue Waiters + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.loadsplitter.cleardirection + exported_name: kv_loadsplitter_cleardirection + description: Load-based splitter observed an access direction greater than 80% left or right in the samples. + y_axis_label: Occurrences + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.loadsplitter.nosplitkey + exported_name: kv_loadsplitter_nosplitkey + description: Load-based splitter could not find a split key. + y_axis_label: Occurrences + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.loadsplitter.popularkey + exported_name: kv_loadsplitter_popularkey + description: Load-based splitter could not find a split key and the most popular sampled split key occurs in >= 25% of the samples. + y_axis_label: Occurrences + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.prober.planning_attempts + exported_name: kv_prober_planning_attempts + description: Number of attempts at planning out probes made; in order to probe KV we need to plan out which ranges to probe; + y_axis_label: Runs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.prober.planning_failures + exported_name: kv_prober_planning_failures + description: Number of attempts at planning out probes that failed; in order to probe KV we need to plan out which ranges to probe; if planning fails, then kvprober is not able to send probes to all ranges; consider alerting on this metric as a result + y_axis_label: Runs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.prober.read.attempts + exported_name: kv_prober_read_attempts + description: Number of attempts made to read probe KV, regardless of outcome + y_axis_label: Queries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.prober.read.failures + exported_name: kv_prober_read_failures + description: Number of attempts made to read probe KV that failed, whether due to error or timeout + y_axis_label: Queries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.prober.read.latency + exported_name: kv_prober_read_latency + description: Latency of successful KV read probes + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: kv.prober.write.attempts + exported_name: kv_prober_write_attempts + description: Number of attempts made to write probe KV, regardless of outcome + y_axis_label: Queries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.prober.write.failures + exported_name: kv_prober_write_failures + description: Number of attempts made to write probe KV that failed, whether due to error or timeout + y_axis_label: Queries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.prober.write.latency + exported_name: kv_prober_write_latency + description: Latency of successful KV write probes + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: kv.prober.write.quarantine.oldest_duration + exported_name: kv_prober_write_quarantine_oldest_duration + description: The duration that the oldest range in the write quarantine pool has remained + y_axis_label: Seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: kv.rangefeed.budget_allocation_blocked + exported_name: kv_rangefeed_budget_allocation_blocked + description: Number of times RangeFeed waited for budget availability + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.rangefeed.budget_allocation_failed + exported_name: kv_rangefeed_budget_allocation_failed + description: Number of times RangeFeed failed because memory budget was exceeded + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.rangefeed.buffered_registrations + exported_name: kv_rangefeed_buffered_registrations + description: Number of active RangeFeed buffered registrations + y_axis_label: Registrations + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.rangefeed.buffered_sender.queue_size + exported_name: kv_rangefeed_buffered_sender_queue_size + description: Number of entries in the buffered sender queue + y_axis_label: Pending Events + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.rangefeed.catchup_scan_nanos + exported_name: kv_rangefeed_catchup_scan_nanos + description: Time spent in RangeFeed catchup scan + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.rangefeed.closed_timestamp.slow_ranges + exported_name: kv_rangefeed_closed_timestamp_slow_ranges + description: Number of ranges that have a closed timestamp lagging by more than 5x target lag. Periodically re-calculated + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.rangefeed.closed_timestamp.slow_ranges.cancelled + exported_name: kv_rangefeed_closed_timestamp_slow_ranges_cancelled + description: Number of rangefeeds that were cancelled due to a chronically lagging closed timestamp + y_axis_label: Cancellation Count + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.rangefeed.closed_timestamp_max_behind_nanos + exported_name: kv_rangefeed_closed_timestamp_max_behind_nanos + description: Largest latency between realtime and replica max closed timestamp for replicas that have active rangeeds on them + y_axis_label: Nanoseconds + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: kv.rangefeed.mem_shared + exported_name: kv_rangefeed_mem_shared + description: Memory usage by rangefeeds + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: kv.rangefeed.mem_system + exported_name: kv_rangefeed_mem_system + description: Memory usage by rangefeeds on system ranges + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: kv.rangefeed.mux_stream_send.latency + exported_name: kv_rangefeed_mux_stream_send_latency + description: Latency of sending RangeFeed events to the client + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: kv.rangefeed.mux_stream_send.slow_events + exported_name: kv_rangefeed_mux_stream_send_slow_events + description: Number of RangeFeed events that took longer than 10s to send to the client + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.rangefeed.output_loop_unbuffered_registration_nanos + exported_name: kv_rangefeed_output_loop_unbuffered_registration_nanos + description: Duration of the Rangefeed O(range) output loop goroutine. This is only applicable for unbuffered registrations since buffered registrations spawns long-living goroutines. + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.rangefeed.processors_goroutine + exported_name: kv_rangefeed_processors_goroutine + description: Number of active RangeFeed processors using goroutines + y_axis_label: Processors + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.rangefeed.processors_scheduler + exported_name: kv_rangefeed_processors_scheduler + description: Number of active RangeFeed processors using scheduler + y_axis_label: Processors + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.rangefeed.registrations + exported_name: kv_rangefeed_registrations + description: Number of active RangeFeed registrations + y_axis_label: Registrations + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.rangefeed.scheduled_processor.queue_timeout + exported_name: kv_rangefeed_scheduled_processor_queue_timeout + description: Number of times the RangeFeed processor shutdown because of a queue send timeout + y_axis_label: Failure Count + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.rangefeed.scheduler.normal.latency + exported_name: kv_rangefeed_scheduler_normal_latency + description: KV RangeFeed normal scheduler latency + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: kv.rangefeed.scheduler.normal.queue_size + exported_name: kv_rangefeed_scheduler_normal_queue_size + description: Number of entries in the KV RangeFeed normal scheduler queue + y_axis_label: Pending Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.rangefeed.scheduler.system.latency + exported_name: kv_rangefeed_scheduler_system_latency + description: KV RangeFeed system scheduler latency + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: kv.rangefeed.scheduler.system.queue_size + exported_name: kv_rangefeed_scheduler_system_queue_size + description: Number of entries in the KV RangeFeed system scheduler queue + y_axis_label: Pending Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.rangefeed.unbuffered_registrations + exported_name: kv_rangefeed_unbuffered_registrations + description: Number of active RangeFeed unbuffered registrations + y_axis_label: Registrations + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.replica_circuit_breaker.num_tripped_events + exported_name: kv_replica_circuit_breaker_num_tripped_events + description: Number of times the per-Replica circuit breakers tripped since process start. + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.replica_circuit_breaker.num_tripped_replicas + exported_name: kv_replica_circuit_breaker_num_tripped_replicas + description: | + Number of Replicas for which the per-Replica circuit breaker is currently tripped. + + A nonzero value indicates range or replica unavailability, and should be investigated. + Replicas in this state will fail-fast all inbound requests. + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.replica_read_batch_evaluate.dropped_latches_before_eval + exported_name: kv_replica_read_batch_evaluate_dropped_latches_before_eval + description: Number of times read-only batches dropped latches before evaluation. + y_axis_label: Batches + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.replica_read_batch_evaluate.latency + exported_name: kv_replica_read_batch_evaluate_latency + description: |- + Execution duration for evaluating a BatchRequest on the read-only path after latches have been acquired. + + A measurement is recorded regardless of outcome (i.e. also in case of an error). If internal retries occur, each instance is recorded separately. + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: kv.replica_read_batch_evaluate.without_interleaving_iter + exported_name: kv_replica_read_batch_evaluate_without_interleaving_iter + description: Number of read-only batches evaluated without an intent interleaving iter. + y_axis_label: Batches + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.replica_write_batch_evaluate.latency + exported_name: kv_replica_write_batch_evaluate_latency + description: |- + Execution duration for evaluating a BatchRequest on the read-write path after latches have been acquired. + + A measurement is recorded regardless of outcome (i.e. also in case of an error). If internal retries occur, each instance is recorded separately. + Note that the measurement does not include the duration for replicating the evaluated command. + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: kv.split.estimated_stats + exported_name: kv_split_estimated_stats + description: Number of splits that computed estimated MVCC stats. + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.split.total_bytes_estimates + exported_name: kv_split_total_bytes_estimates + description: Number of total bytes difference between the pre-split and post-split MVCC stats. + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.tenant_rate_limit.current_blocked + exported_name: kv_tenant_rate_limit_current_blocked + description: Number of requests currently blocked by the rate limiter + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.tenant_rate_limit.num_tenants + exported_name: kv_tenant_rate_limit_num_tenants + description: Number of tenants currently being tracked + y_axis_label: Tenants + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.tenant_rate_limit.read_batches_admitted + exported_name: kv_tenant_rate_limit_read_batches_admitted + description: Number of read batches admitted by the rate limiter + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.tenant_rate_limit.read_bytes_admitted + exported_name: kv_tenant_rate_limit_read_bytes_admitted + description: Number of read bytes admitted by the rate limiter + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.tenant_rate_limit.read_requests_admitted + exported_name: kv_tenant_rate_limit_read_requests_admitted + description: Number of read requests admitted by the rate limiter + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.tenant_rate_limit.write_batches_admitted + exported_name: kv_tenant_rate_limit_write_batches_admitted + description: Number of write batches admitted by the rate limiter + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.tenant_rate_limit.write_bytes_admitted + exported_name: kv_tenant_rate_limit_write_bytes_admitted + description: Number of write bytes admitted by the rate limiter + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.tenant_rate_limit.write_requests_admitted + exported_name: kv_tenant_rate_limit_write_requests_admitted + description: Number of write requests admitted by the rate limiter + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.eval_wait.elastic.duration + exported_name: kvflowcontrol_eval_wait_elastic_duration + description: Latency histogram for time elastic requests spent waiting for flow tokens to evaluate + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.eval_wait.elastic.requests.admitted + exported_name: kvflowcontrol_eval_wait_elastic_requests_admitted + description: Number of elastic requests admitted by the flow controller + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.eval_wait.elastic.requests.bypassed + exported_name: kvflowcontrol_eval_wait_elastic_requests_bypassed + description: Number of waiting elastic requests that bypassed the flow controller due the evaluating replica not being the leader + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.eval_wait.elastic.requests.errored + exported_name: kvflowcontrol_eval_wait_elastic_requests_errored + description: Number of elastic requests that errored out while waiting for flow tokens + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.eval_wait.elastic.requests.waiting + exported_name: kvflowcontrol_eval_wait_elastic_requests_waiting + description: Number of elastic requests waiting for flow tokens + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.eval_wait.regular.duration + exported_name: kvflowcontrol_eval_wait_regular_duration + description: Latency histogram for time regular requests spent waiting for flow tokens to evaluate + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.eval_wait.regular.requests.admitted + exported_name: kvflowcontrol_eval_wait_regular_requests_admitted + description: Number of regular requests admitted by the flow controller + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.eval_wait.regular.requests.bypassed + exported_name: kvflowcontrol_eval_wait_regular_requests_bypassed + description: Number of waiting regular requests that bypassed the flow controller due the evaluating replica not being the leader + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.eval_wait.regular.requests.errored + exported_name: kvflowcontrol_eval_wait_regular_requests_errored + description: Number of regular requests that errored out while waiting for flow tokens + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.eval_wait.regular.requests.waiting + exported_name: kvflowcontrol_eval_wait_regular_requests_waiting + description: Number of regular requests waiting for flow tokens + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.range_controller.count + exported_name: kvflowcontrol_range_controller_count + description: Gauge of range flow controllers currently open, this should align with the number of leaders + y_axis_label: Count + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.send_queue.bytes + exported_name: kvflowcontrol_send_queue_bytes + description: Byte size of all raft entries queued for sending to followers, waiting on available elastic send tokens + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.send_queue.count + exported_name: kvflowcontrol_send_queue_count + description: Count of all raft entries queued for sending to followers, waiting on available elastic send tokens + y_axis_label: Bytes + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.send_queue.prevent.count + exported_name: kvflowcontrol_send_queue_prevent_count + description: Counter of replication streams that were prevented from forming a send queue + y_axis_label: Preventions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.send_queue.scheduled.deducted_bytes + exported_name: kvflowcontrol_send_queue_scheduled_deducted_bytes + description: Gauge of elastic send token bytes already deducted by replication streams waiting on the scheduler + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.send_queue.scheduled.force_flush + exported_name: kvflowcontrol_send_queue_scheduled_force_flush + description: Gauge of replication streams scheduled to force flush their send queue + y_axis_label: Scheduled force flushes + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.streams.eval.elastic.blocked_count + exported_name: kvflowcontrol_streams_eval_elastic_blocked_count + description: Number of eval replication streams with no flow tokens available for elastic requests + y_axis_label: Count + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.streams.eval.elastic.total_count + exported_name: kvflowcontrol_streams_eval_elastic_total_count + description: Total number of eval replication streams for elastic requests + y_axis_label: Count + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.streams.eval.regular.blocked_count + exported_name: kvflowcontrol_streams_eval_regular_blocked_count + description: Number of eval replication streams with no flow tokens available for regular requests + y_axis_label: Count + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.streams.eval.regular.total_count + exported_name: kvflowcontrol_streams_eval_regular_total_count + description: Total number of eval replication streams for regular requests + y_axis_label: Count + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.streams.send.elastic.blocked_count + exported_name: kvflowcontrol_streams_send_elastic_blocked_count + description: Number of send replication streams with no flow tokens available for elastic requests + y_axis_label: Count + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.streams.send.elastic.total_count + exported_name: kvflowcontrol_streams_send_elastic_total_count + description: Total number of send replication streams for elastic requests + y_axis_label: Count + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.streams.send.regular.blocked_count + exported_name: kvflowcontrol_streams_send_regular_blocked_count + description: Number of send replication streams with no flow tokens available for regular requests + y_axis_label: Count + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.streams.send.regular.total_count + exported_name: kvflowcontrol_streams_send_regular_total_count + description: Total number of send replication streams for regular requests + y_axis_label: Count + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.tokens.eval.elastic.available + exported_name: kvflowcontrol_tokens_eval_elastic_available + description: Flow eval tokens available for elastic requests, across all replication streams + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.tokens.eval.elastic.deducted + exported_name: kvflowcontrol_tokens_eval_elastic_deducted + description: Flow eval tokens deducted by elastic requests, across all replication streams + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.eval.elastic.returned + exported_name: kvflowcontrol_tokens_eval_elastic_returned + description: Flow eval tokens returned by elastic requests, across all replication streams + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.eval.elastic.returned.disconnect + exported_name: kvflowcontrol_tokens_eval_elastic_returned_disconnect + description: Flow eval tokens returned early by elastic due disconnects, across all replication stream, this is a subset of returned tokens + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.eval.elastic.unaccounted + exported_name: kvflowcontrol_tokens_eval_elastic_unaccounted + description: Flow eval tokens returned by elastic requests that were unaccounted for, across all replication streams + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.eval.regular.available + exported_name: kvflowcontrol_tokens_eval_regular_available + description: Flow eval tokens available for regular requests, across all replication streams + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.tokens.eval.regular.deducted + exported_name: kvflowcontrol_tokens_eval_regular_deducted + description: Flow eval tokens deducted by regular requests, across all replication streams + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.eval.regular.returned + exported_name: kvflowcontrol_tokens_eval_regular_returned + description: Flow eval tokens returned by regular requests, across all replication streams + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.eval.regular.returned.disconnect + exported_name: kvflowcontrol_tokens_eval_regular_returned_disconnect + description: Flow eval tokens returned early by regular due disconnects, across all replication stream, this is a subset of returned tokens + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.eval.regular.unaccounted + exported_name: kvflowcontrol_tokens_eval_regular_unaccounted + description: Flow eval tokens returned by regular requests that were unaccounted for, across all replication streams + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.send.elastic.available + exported_name: kvflowcontrol_tokens_send_elastic_available + description: Flow send tokens available for elastic requests, across all replication streams + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.tokens.send.elastic.deducted + exported_name: kvflowcontrol_tokens_send_elastic_deducted + description: Flow send tokens deducted by elastic requests, across all replication streams + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.send.elastic.deducted.force_flush_send_queue + exported_name: kvflowcontrol_tokens_send_elastic_deducted_force_flush_send_queue + description: Flow send tokens deducted by elastic requests, across all replication streams due to force flushing the stream's send queue + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.send.elastic.deducted.prevent_send_queue + exported_name: kvflowcontrol_tokens_send_elastic_deducted_prevent_send_queue + description: Flow send tokens deducted by elastic requests, across all replication streams to prevent forming a send queue + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.send.elastic.returned + exported_name: kvflowcontrol_tokens_send_elastic_returned + description: Flow send tokens returned by elastic requests, across all replication streams + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.send.elastic.returned.disconnect + exported_name: kvflowcontrol_tokens_send_elastic_returned_disconnect + description: Flow send tokens returned early by elastic due disconnects, across all replication stream, this is a subset of returned tokens + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.send.elastic.unaccounted + exported_name: kvflowcontrol_tokens_send_elastic_unaccounted + description: Flow send tokens returned by elastic requests that were unaccounted for, across all replication streams + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.send.regular.available + exported_name: kvflowcontrol_tokens_send_regular_available + description: Flow send tokens available for regular requests, across all replication streams + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.tokens.send.regular.deducted + exported_name: kvflowcontrol_tokens_send_regular_deducted + description: Flow send tokens deducted by regular requests, across all replication streams + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.send.regular.deducted.prevent_send_queue + exported_name: kvflowcontrol_tokens_send_regular_deducted_prevent_send_queue + description: Flow send tokens deducted by regular requests, across all replication streams to prevent forming a send queue + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.send.regular.returned + exported_name: kvflowcontrol_tokens_send_regular_returned + description: Flow send tokens returned by regular requests, across all replication streams + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.send.regular.returned.disconnect + exported_name: kvflowcontrol_tokens_send_regular_returned_disconnect + description: Flow send tokens returned early by regular due disconnects, across all replication stream, this is a subset of returned tokens + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.send.regular.unaccounted + exported_name: kvflowcontrol_tokens_send_regular_unaccounted + description: Flow send tokens returned by regular requests that were unaccounted for, across all replication streams + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: leases.epoch + exported_name: leases_epoch + description: Number of replica leaseholders using epoch-based leases + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: leases.error + exported_name: leases_error + description: Number of failed lease requests + y_axis_label: Lease Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: leases.expiration + exported_name: leases_expiration + description: Number of replica leaseholders using expiration-based leases + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: leases.leader + exported_name: leases_leader + description: Number of replica leaseholders using leader leases + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: leases.liveness + exported_name: leases_liveness + description: Number of replica leaseholders for the liveness range(s) + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: leases.preferences.less-preferred + exported_name: leases_preferences_less_preferred + description: Number of replica leaseholders which satisfy a lease preference which is not the most preferred + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: leases.preferences.violating + exported_name: leases_preferences_violating + description: Number of replica leaseholders which violate lease preferences + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: leases.requests.latency + exported_name: leases_requests_latency + description: Lease request latency (all types and outcomes, coalesced) + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: leases.success + exported_name: leases_success + description: Number of successful lease requests + y_axis_label: Lease Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: leases.transfers.error + exported_name: leases_transfers_error + description: Number of failed lease transfers + y_axis_label: Lease Transfers + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: leases.transfers.locks_written + exported_name: leases_transfers_locks_written + description: Number of locks written to storage during lease transfers + y_axis_label: Locks Written + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: livebytes + exported_name: livebytes + description: Number of bytes of live data (keys plus values) + y_axis_label: Storage + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: livecount + exported_name: livecount + description: Count of live keys + y_axis_label: Keys + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: liveness.epochincrements + exported_name: liveness_epochincrements + description: Number of times this node has incremented its liveness epoch + y_axis_label: Epochs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: liveness.heartbeatfailures + exported_name: liveness_heartbeatfailures + description: Number of failed node liveness heartbeats from this node + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: liveness.heartbeatsinflight + exported_name: liveness_heartbeatsinflight + description: Number of in-flight liveness heartbeats from this node + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: liveness.heartbeatsuccesses + exported_name: liveness_heartbeatsuccesses + description: Number of successful node liveness heartbeats from this node + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: lockbytes + exported_name: lockbytes + description: Number of bytes taken up by replicated lock key-values (shared and exclusive strength, not intent strength) + y_axis_label: Storage + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: lockcount + exported_name: lockcount + description: Count of replicated locks (shared, exclusive, and intent strength) + y_axis_label: Locks + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: node-id + exported_name: node_id + description: node ID with labels for advertised RPC and HTTP addresses + y_axis_label: Node ID + type: GAUGE + unit: CONST + aggregation: AVG + derivative: NONE + - name: queue.consistency.pending + exported_name: queue_consistency_pending + description: Number of pending replicas in the consistency checker queue + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: queue.consistency.process.failure + exported_name: queue_consistency_process_failure + description: Number of replicas which failed processing in the consistency checker queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.consistency.process.success + exported_name: queue_consistency_process_success + description: Number of replicas successfully processed by the consistency checker queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.consistency.processingnanos + exported_name: queue_consistency_processingnanos + description: Nanoseconds spent processing replicas in the consistency checker queue + y_axis_label: Processing Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.abortspanconsidered + exported_name: queue_gc_info_abortspanconsidered + description: Number of AbortSpan entries old enough to be considered for removal + y_axis_label: Txn Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.abortspangcnum + exported_name: queue_gc_info_abortspangcnum + description: Number of AbortSpan entries fit for removal + y_axis_label: Txn Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.abortspanscanned + exported_name: queue_gc_info_abortspanscanned + description: Number of transactions present in the AbortSpan scanned from the engine + y_axis_label: Txn Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.clearrangefailed + exported_name: queue_gc_info_clearrangefailed + description: Number of failed ClearRange operations during GC + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.clearrangesuccess + exported_name: queue_gc_info_clearrangesuccess + description: Number of successful ClearRange operations during GC + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.enqueuehighpriority + exported_name: queue_gc_info_enqueuehighpriority + description: Number of replicas enqueued for GC with high priority + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.intentsconsidered + exported_name: queue_gc_info_intentsconsidered + description: Number of 'old' intents + y_axis_label: Intents + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.intenttxns + exported_name: queue_gc_info_intenttxns + description: Number of associated distinct transactions + y_axis_label: Txns + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.numkeysaffected + exported_name: queue_gc_info_numkeysaffected + description: Number of keys with GC'able data + y_axis_label: Keys + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.numrangekeysaffected + exported_name: queue_gc_info_numrangekeysaffected + description: Number of range keys GC'able + y_axis_label: Range Keys + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.pushtxn + exported_name: queue_gc_info_pushtxn + description: Number of attempted pushes + y_axis_label: Pushes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.resolvefailed + exported_name: queue_gc_info_resolvefailed + description: Number of cleanup intent failures during GC + y_axis_label: Intent Resolutions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.resolvesuccess + exported_name: queue_gc_info_resolvesuccess + description: Number of successful intent resolutions + y_axis_label: Intent Resolutions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.resolvetotal + exported_name: queue_gc_info_resolvetotal + description: Number of attempted intent resolutions + y_axis_label: Intent Resolutions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.transactionresolvefailed + exported_name: queue_gc_info_transactionresolvefailed + description: Number of intent cleanup failures for local transactions during GC + y_axis_label: Intent Resolutions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.transactionspangcaborted + exported_name: queue_gc_info_transactionspangcaborted + description: Number of GC'able entries corresponding to aborted txns + y_axis_label: Txn Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.transactionspangccommitted + exported_name: queue_gc_info_transactionspangccommitted + description: Number of GC'able entries corresponding to committed txns + y_axis_label: Txn Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.transactionspangcpending + exported_name: queue_gc_info_transactionspangcpending + description: Number of GC'able entries corresponding to pending txns + y_axis_label: Txn Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.transactionspangcprepared + exported_name: queue_gc_info_transactionspangcprepared + description: Number of GC'able entries corresponding to prepared txns + y_axis_label: Txn Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.transactionspangcstaging + exported_name: queue_gc_info_transactionspangcstaging + description: Number of GC'able entries corresponding to staging txns + y_axis_label: Txn Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.transactionspanscanned + exported_name: queue_gc_info_transactionspanscanned + description: Number of entries in transaction spans scanned from the engine + y_axis_label: Txn Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.pending + exported_name: queue_gc_pending + description: Number of pending replicas in the MVCC GC queue + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: queue.gc.process.failure + exported_name: queue_gc_process_failure + description: Number of replicas which failed processing in the MVCC GC queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.process.success + exported_name: queue_gc_process_success + description: Number of replicas successfully processed by the MVCC GC queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.processingnanos + exported_name: queue_gc_processingnanos + description: Nanoseconds spent processing replicas in the MVCC GC queue + y_axis_label: Processing Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.lease.pending + exported_name: queue_lease_pending + description: Number of pending replicas in the replica lease queue + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: queue.lease.process.failure + exported_name: queue_lease_process_failure + description: Number of replicas which failed processing in the replica lease queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.lease.process.success + exported_name: queue_lease_process_success + description: Number of replicas successfully processed by the replica lease queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.lease.processingnanos + exported_name: queue_lease_processingnanos + description: Nanoseconds spent processing replicas in the replica lease queue + y_axis_label: Processing Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.lease.purgatory + exported_name: queue_lease_purgatory + description: Number of replicas in the lease queue's purgatory, awaiting lease transfer operations + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: queue.merge.pending + exported_name: queue_merge_pending + description: Number of pending replicas in the merge queue + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: queue.merge.process.failure + exported_name: queue_merge_process_failure + description: Number of replicas which failed processing in the merge queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.merge.process.success + exported_name: queue_merge_process_success + description: Number of replicas successfully processed by the merge queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.merge.processingnanos + exported_name: queue_merge_processingnanos + description: Nanoseconds spent processing replicas in the merge queue + y_axis_label: Processing Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.merge.purgatory + exported_name: queue_merge_purgatory + description: Number of replicas in the merge queue's purgatory, waiting to become mergeable + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: queue.raftlog.pending + exported_name: queue_raftlog_pending + description: Number of pending replicas in the Raft log queue + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: queue.raftlog.process.failure + exported_name: queue_raftlog_process_failure + description: Number of replicas which failed processing in the Raft log queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.raftlog.process.success + exported_name: queue_raftlog_process_success + description: Number of replicas successfully processed by the Raft log queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.raftlog.processingnanos + exported_name: queue_raftlog_processingnanos + description: Nanoseconds spent processing replicas in the Raft log queue + y_axis_label: Processing Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.raftsnapshot.pending + exported_name: queue_raftsnapshot_pending + description: Number of pending replicas in the Raft repair queue + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: queue.raftsnapshot.process.failure + exported_name: queue_raftsnapshot_process_failure + description: Number of replicas which failed processing in the Raft repair queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.raftsnapshot.process.success + exported_name: queue_raftsnapshot_process_success + description: Number of replicas successfully processed by the Raft repair queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.raftsnapshot.processingnanos + exported_name: queue_raftsnapshot_processingnanos + description: Nanoseconds spent processing replicas in the Raft repair queue + y_axis_label: Processing Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicagc.pending + exported_name: queue_replicagc_pending + description: Number of pending replicas in the replica GC queue + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: queue.replicagc.process.failure + exported_name: queue_replicagc_process_failure + description: Number of replicas which failed processing in the replica GC queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicagc.process.success + exported_name: queue_replicagc_process_success + description: Number of replicas successfully processed by the replica GC queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicagc.processingnanos + exported_name: queue_replicagc_processingnanos + description: Nanoseconds spent processing replicas in the replica GC queue + y_axis_label: Processing Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicagc.removereplica + exported_name: queue_replicagc_removereplica + description: Number of replica removals attempted by the replica GC queue + y_axis_label: Replica Removals + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.addnonvoterreplica + exported_name: queue_replicate_addnonvoterreplica + description: Number of non-voter replica additions attempted by the replicate queue + y_axis_label: Replica Additions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.addreplica + exported_name: queue_replicate_addreplica + description: Number of replica additions attempted by the replicate queue + y_axis_label: Replica Additions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.addreplica.error + exported_name: queue_replicate_addreplica_error + description: Number of failed replica additions processed by the replicate queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.addreplica.success + exported_name: queue_replicate_addreplica_success + description: Number of successful replica additions processed by the replicate queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.addvoterreplica + exported_name: queue_replicate_addvoterreplica + description: Number of voter replica additions attempted by the replicate queue + y_axis_label: Replica Additions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.nonvoterpromotions + exported_name: queue_replicate_nonvoterpromotions + description: Number of non-voters promoted to voters by the replicate queue + y_axis_label: Promotions of Non Voters to Voters + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.pending + exported_name: queue_replicate_pending + description: Number of pending replicas in the replicate queue + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: queue.replicate.process.failure + exported_name: queue_replicate_process_failure + description: Number of replicas which failed processing in the replicate queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.process.success + exported_name: queue_replicate_process_success + description: Number of replicas successfully processed by the replicate queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.processingnanos + exported_name: queue_replicate_processingnanos + description: Nanoseconds spent processing replicas in the replicate queue + y_axis_label: Processing Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.purgatory + exported_name: queue_replicate_purgatory + description: Number of replicas in the replicate queue's purgatory, awaiting allocation options + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: queue.replicate.rebalancenonvoterreplica + exported_name: queue_replicate_rebalancenonvoterreplica + description: Number of non-voter replica rebalancer-initiated additions attempted by the replicate queue + y_axis_label: Replica Additions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.rebalancereplica + exported_name: queue_replicate_rebalancereplica + description: Number of replica rebalancer-initiated additions attempted by the replicate queue + y_axis_label: Replica Additions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.rebalancevoterreplica + exported_name: queue_replicate_rebalancevoterreplica + description: Number of voter replica rebalancer-initiated additions attempted by the replicate queue + y_axis_label: Replica Additions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.removedeadnonvoterreplica + exported_name: queue_replicate_removedeadnonvoterreplica + description: Number of dead non-voter replica removals attempted by the replicate queue (typically in response to a node outage) + y_axis_label: Replica Removals + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.removedeadreplica + exported_name: queue_replicate_removedeadreplica + description: Number of dead replica removals attempted by the replicate queue (typically in response to a node outage) + y_axis_label: Replica Removals + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.removedeadreplica.error + exported_name: queue_replicate_removedeadreplica_error + description: Number of failed dead replica removals processed by the replicate queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.removedeadreplica.success + exported_name: queue_replicate_removedeadreplica_success + description: Number of successful dead replica removals processed by the replicate queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.removedeadvoterreplica + exported_name: queue_replicate_removedeadvoterreplica + description: Number of dead voter replica removals attempted by the replicate queue (typically in response to a node outage) + y_axis_label: Replica Removals + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.removedecommissioningnonvoterreplica + exported_name: queue_replicate_removedecommissioningnonvoterreplica + description: Number of decommissioning non-voter replica removals attempted by the replicate queue (typically in response to a node outage) + y_axis_label: Replica Removals + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.removedecommissioningreplica + exported_name: queue_replicate_removedecommissioningreplica + description: Number of decommissioning replica removals attempted by the replicate queue (typically in response to a node outage) + y_axis_label: Replica Removals + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.removedecommissioningreplica.error + exported_name: queue_replicate_removedecommissioningreplica_error + description: Number of failed decommissioning replica removals processed by the replicate queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.removedecommissioningreplica.success + exported_name: queue_replicate_removedecommissioningreplica_success + description: Number of successful decommissioning replica removals processed by the replicate queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.removedecommissioningvoterreplica + exported_name: queue_replicate_removedecommissioningvoterreplica + description: Number of decommissioning voter replica removals attempted by the replicate queue (typically in response to a node outage) + y_axis_label: Replica Removals + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.removelearnerreplica + exported_name: queue_replicate_removelearnerreplica + description: Number of learner replica removals attempted by the replicate queue (typically due to internal race conditions) + y_axis_label: Replica Removals + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.removenonvoterreplica + exported_name: queue_replicate_removenonvoterreplica + description: Number of non-voter replica removals attempted by the replicate queue (typically in response to a rebalancer-initiated addition) + y_axis_label: Replica Removals + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.removereplica + exported_name: queue_replicate_removereplica + description: Number of replica removals attempted by the replicate queue (typically in response to a rebalancer-initiated addition) + y_axis_label: Replica Removals + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.removereplica.error + exported_name: queue_replicate_removereplica_error + description: Number of failed replica removals processed by the replicate queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.removereplica.success + exported_name: queue_replicate_removereplica_success + description: Number of successful replica removals processed by the replicate queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.removevoterreplica + exported_name: queue_replicate_removevoterreplica + description: Number of voter replica removals attempted by the replicate queue (typically in response to a rebalancer-initiated addition) + y_axis_label: Replica Removals + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.replacedeadreplica.error + exported_name: queue_replicate_replacedeadreplica_error + description: Number of failed dead replica replacements processed by the replicate queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.replacedeadreplica.success + exported_name: queue_replicate_replacedeadreplica_success + description: Number of successful dead replica replacements processed by the replicate queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.replacedecommissioningreplica.success + exported_name: queue_replicate_replacedecommissioningreplica_success + description: Number of successful decommissioning replica replacements processed by the replicate queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.transferlease + exported_name: queue_replicate_transferlease + description: Number of range lease transfers attempted by the replicate queue + y_axis_label: Lease Transfers + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.voterdemotions + exported_name: queue_replicate_voterdemotions + description: Number of voters demoted to non-voters by the replicate queue + y_axis_label: Demotions of Voters to Non Voters + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.split.load_based + exported_name: queue_split_load_based + description: Number of range splits due to a range being greater than the configured max range load + y_axis_label: Range Splits + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.split.pending + exported_name: queue_split_pending + description: Number of pending replicas in the split queue + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: queue.split.process.failure + exported_name: queue_split_process_failure + description: Number of replicas which failed processing in the split queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.split.process.success + exported_name: queue_split_process_success + description: Number of replicas successfully processed by the split queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.split.processingnanos + exported_name: queue_split_processingnanos + description: Nanoseconds spent processing replicas in the split queue + y_axis_label: Processing Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.split.purgatory + exported_name: queue_split_purgatory + description: Number of replicas in the split queue's purgatory, waiting to become splittable + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: queue.split.size_based + exported_name: queue_split_size_based + description: Number of range splits due to a range being greater than the configured max range size + y_axis_label: Range Splits + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.split.span_config_based + exported_name: queue_split_span_config_based + description: Number of range splits due to span configuration + y_axis_label: Range Splits + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.tsmaintenance.pending + exported_name: queue_tsmaintenance_pending + description: Number of pending replicas in the time series maintenance queue + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: queue.tsmaintenance.process.failure + exported_name: queue_tsmaintenance_process_failure + description: Number of replicas which failed processing in the time series maintenance queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.tsmaintenance.process.success + exported_name: queue_tsmaintenance_process_success + description: Number of replicas successfully processed by the time series maintenance queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.tsmaintenance.processingnanos + exported_name: queue_tsmaintenance_processingnanos + description: Nanoseconds spent processing replicas in the time series maintenance queue + y_axis_label: Processing Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.commands.pending + exported_name: raft_commands_pending + description: |- + Number of Raft commands proposed and pending. + + The number of Raft commands that the leaseholders are tracking as in-flight. + These commands will be periodically reproposed until they are applied or until + they fail, either unequivocally or ambiguously. + y_axis_label: Commands + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: raft.commands.proposed + exported_name: raft_commands_proposed + description: |- + Number of Raft commands proposed. + + The number of proposals and all kinds of reproposals made by leaseholders. This + metric approximates the number of commands submitted through Raft. + y_axis_label: Commands + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.commands.reproposed.new-lai + exported_name: raft_commands_reproposed_new_lai + description: |- + Number of Raft commands re-proposed with a newer LAI. + + The number of Raft commands that leaseholders re-proposed with a modified LAI. + Such re-proposals happen for commands that are committed to Raft out of intended + order, and hence can not be applied as is. + y_axis_label: Commands + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.commands.reproposed.unchanged + exported_name: raft_commands_reproposed_unchanged + description: |- + Number of Raft commands re-proposed without modification. + + The number of Raft commands that leaseholders re-proposed without modification. + Such re-proposals happen for commands that are not committed/applied within a + timeout, and have a high chance of being dropped. + y_axis_label: Commands + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.commandsapplied + exported_name: raft_commandsapplied + description: |- + Number of Raft commands applied. + + This measurement is taken on the Raft apply loops of all Replicas (leaders and + followers alike), meaning that it does not measure the number of Raft commands + *proposed* (in the hypothetical extreme case, all Replicas may apply all commands + through snapshots, thus not increasing this metric at all). + Instead, it is a proxy for how much work is being done advancing the Replica + state machines on this node. + y_axis_label: Commands + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.dropped + exported_name: raft_dropped + description: Number of Raft proposals dropped (this counts individial raftpb.Entry, not raftpb.MsgProp) + y_axis_label: Proposals + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.dropped_leader + exported_name: raft_dropped_leader + description: Number of Raft proposals dropped by a Replica that believes itself to be the leader; each update also increments `raft.dropped` (this counts individial raftpb.Entry, not raftpb.MsgProp) + y_axis_label: Proposals + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.entrycache.accesses + exported_name: raft_entrycache_accesses + description: Number of cache lookups in the Raft entry cache + y_axis_label: Accesses + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.entrycache.bytes + exported_name: raft_entrycache_bytes + description: Aggregate size of all Raft entries in the Raft entry cache + y_axis_label: Entry Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: raft.entrycache.hits + exported_name: raft_entrycache_hits + description: Number of successful cache lookups in the Raft entry cache + y_axis_label: Hits + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.entrycache.read_bytes + exported_name: raft_entrycache_read_bytes + description: Counter of bytes in entries returned from the Raft entry cache + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.entrycache.size + exported_name: raft_entrycache_size + description: Number of Raft entries in the Raft entry cache + y_axis_label: Entry Count + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: raft.flows.entered.state_probe + exported_name: raft_flows_entered_state_probe + description: The number of leader->peer flows transitioned to StateProbe + y_axis_label: Flows + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.flows.entered.state_replicate + exported_name: raft_flows_entered_state_replicate + description: The number of leader->peer flows transitioned to StateReplicate + y_axis_label: Flows + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.flows.entered.state_snapshot + exported_name: raft_flows_entered_state_snapshot + description: The number of of leader->peer flows transitioned to StateSnapshot + y_axis_label: Flows + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.flows.state_probe + exported_name: raft_flows_state_probe + description: Number of leader->peer flows in StateProbe + y_axis_label: Flows + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: raft.flows.state_replicate + exported_name: raft_flows_state_replicate + description: Number of leader->peer flows in StateReplicate + y_axis_label: Flows + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: raft.flows.state_snapshot + exported_name: raft_flows_state_snapshot + description: Number of leader->peer flows in StateSnapshot + y_axis_label: Flows + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: raft.fortification.skipped_no_support + exported_name: raft_fortification_skipped_no_support + description: The number of fortification requests that were skipped (not sent) due to lack of store liveness support + y_axis_label: Skipped Fortifications + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.fortification_resp.accepted + exported_name: raft_fortification_resp_accepted + description: The number of accepted fortification responses. Calculated on the raft leader + y_axis_label: Accepted Fortification Responses + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.fortification_resp.rejected + exported_name: raft_fortification_resp_rejected + description: The number of rejected fortification responses. Calculated on the raft leader + y_axis_label: Rejected Fortification Responses + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.heartbeats.pending + exported_name: raft_heartbeats_pending + description: Number of pending heartbeats and responses waiting to be coalesced + y_axis_label: Messages + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: raft.loaded_entries.bytes + exported_name: raft_loaded_entries_bytes + description: Bytes allocated by raft Storage.Entries calls that are still kept in memory + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: raft.loaded_entries.reserved.bytes + exported_name: raft_loaded_entries_reserved_bytes + description: Bytes allocated by raft Storage.Entries calls that are still kept in memory + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: raft.process.applycommitted.latency + exported_name: raft_process_applycommitted_latency + description: |- + Latency histogram for applying all committed Raft commands in a Raft ready. + + This measures the end-to-end latency of applying all commands in a Raft ready. Note that + this closes over possibly multiple measurements of the 'raft.process.commandcommit.latency' + metric, which receives datapoints for each sub-batch processed in the process. + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: raft.process.commandcommit.latency + exported_name: raft_process_commandcommit_latency + description: | + Latency histogram for applying a batch of Raft commands to the state machine. + + This metric is misnamed: it measures the latency for *applying* a batch of + committed Raft commands to a Replica state machine. This requires only + non-durable I/O (except for replication configuration changes). + + Note that a "batch" in this context is really a sub-batch of the batch received + for application during raft ready handling. The + 'raft.process.applycommitted.latency' histogram is likely more suitable in most + cases, as it measures the total latency across all sub-batches (i.e. the sum of + commandcommit.latency for a complete batch). + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: raft.process.handleready.latency + exported_name: raft_process_handleready_latency + description: | + Latency histogram for handling a Raft ready. + + This measures the end-to-end-latency of the Raft state advancement loop, including: + - snapshot application + - SST ingestion + - durably appending to the Raft log (i.e. includes fsync) + - entry application (incl. replicated side effects, notably log truncation) + + These include work measured in 'raft.process.commandcommit.latency' and + 'raft.process.applycommitted.latency'. However, matching percentiles of these + metrics may be *higher* than handleready, since not every handleready cycle + leads to an update of the others. For example, under tpcc-100 on a single node, + the handleready count is approximately twice the logcommit count (and logcommit + count tracks closely with applycommitted count). + + High percentile outliers can be caused by individual large Raft commands or + storage layer blips. Lower percentile (e.g. 50th) increases are often driven by + CPU exhaustion or storage layer slowdowns. + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: raft.process.logcommit.latency + exported_name: raft_process_logcommit_latency + description: | + Latency histogram for committing Raft log entries to stable storage + + This measures the latency of durably committing a group of newly received Raft + entries as well as the HardState entry to disk. This excludes any data + processing, i.e. we measure purely the commit latency of the resulting Engine + write. Homogeneous bands of p50-p99 latencies (in the presence of regular Raft + traffic), make it likely that the storage layer is healthy. Spikes in the + latency bands can either hint at the presence of large sets of Raft entries + being received, or at performance issues at the storage layer. + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: raft.process.tickingnanos + exported_name: raft_process_tickingnanos + description: Nanoseconds spent in store.processRaft() processing replica.Tick() + y_axis_label: Processing Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.process.workingnanos + exported_name: raft_process_workingnanos + description: | + Nanoseconds spent in store.processRaft() working. + + This is the sum of the measurements passed to the raft.process.handleready.latency + histogram. + y_axis_label: Processing Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.quota_pool.percent_used + exported_name: raft_quota_pool_percent_used + description: Histogram of proposal quota pool utilization (0-100) per leaseholder per metrics interval + y_axis_label: Percent + type: HISTOGRAM + unit: COUNT + aggregation: AVG + derivative: NONE + - name: raft.rcvd.app + exported_name: raft_rcvd_app + description: Number of MsgApp messages received by this store + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.appresp + exported_name: raft_rcvd_appresp + description: Number of MsgAppResp messages received by this store + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.bytes + exported_name: raft_rcvd_bytes + description: "Number of bytes in Raft messages received by this store. Note\n\t\tthat this does not include raft snapshot received." + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.cross_region.bytes + exported_name: raft_rcvd_cross_region_bytes + description: "Number of bytes received by this store for cross region Raft messages\n\t\twhen region tiers are configured. Note that this does not include raft\n\t\tsnapshot received." + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.cross_zone.bytes + exported_name: raft_rcvd_cross_zone_bytes + description: "Number of bytes received by this store for cross zone, same region\n\t\tRaft messages when zone tiers are configured. If region tiers are not set,\n\t\tit is assumed to be within the same region. To ensure accurate monitoring of\n\t\tcross-zone data transfer, region and zone tiers should be consistently\n\t\tconfigured across all nodes. Note that this does not include raft snapshot\n\t\treceived." + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.defortifyleader + exported_name: raft_rcvd_defortifyleader + description: Number of MsgDeFortifyLeader messages received by this store + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.dropped + exported_name: raft_rcvd_dropped + description: Number of incoming Raft messages dropped (due to queue length or size) + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.dropped_bytes + exported_name: raft_rcvd_dropped_bytes + description: Bytes of dropped incoming Raft messages + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.fortifyleader + exported_name: raft_rcvd_fortifyleader + description: Number of MsgFortifyLeader messages received by this store + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.fortifyleaderresp + exported_name: raft_rcvd_fortifyleaderresp + description: Number of MsgFortifyLeaderResp messages received by this store + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.heartbeat + exported_name: raft_rcvd_heartbeat + description: Number of (coalesced, if enabled) MsgHeartbeat messages received by this store + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.heartbeatresp + exported_name: raft_rcvd_heartbeatresp + description: Number of (coalesced, if enabled) MsgHeartbeatResp messages received by this store + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.prevote + exported_name: raft_rcvd_prevote + description: Number of MsgPreVote messages received by this store + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.prevoteresp + exported_name: raft_rcvd_prevoteresp + description: Number of MsgPreVoteResp messages received by this store + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.prop + exported_name: raft_rcvd_prop + description: Number of MsgProp messages received by this store + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.queued_bytes + exported_name: raft_rcvd_queued_bytes + description: Number of bytes in messages currently waiting for raft processing + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: raft.rcvd.snap + exported_name: raft_rcvd_snap + description: Number of MsgSnap messages received by this store + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.stepped_bytes + exported_name: raft_rcvd_stepped_bytes + description: | + Number of bytes in messages processed by Raft. + + Messages reflected here have been handed to Raft (via RawNode.Step). This does not imply that the + messages are no longer held in memory or that IO has been performed. Raft delegates IO activity to + Raft ready handling, which occurs asynchronously. Since handing messages to Raft serializes with + Raft ready handling and size the size of an entry is dominated by the contained pebble WriteBatch, + on average the rate at which this metric increases is a good proxy for the rate at which Raft ready + handling consumes writes. + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.timeoutnow + exported_name: raft_rcvd_timeoutnow + description: Number of MsgTimeoutNow messages received by this store + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.transferleader + exported_name: raft_rcvd_transferleader + description: Number of MsgTransferLeader messages received by this store + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.vote + exported_name: raft_rcvd_vote + description: Number of MsgVote messages received by this store + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.voteresp + exported_name: raft_rcvd_voteresp + description: Number of MsgVoteResp messages received by this store + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.replication.latency + exported_name: raft_replication_latency + description: |- + The duration elapsed between having evaluated a BatchRequest and it being + reflected in the proposer's state machine (i.e. having applied fully). + + This encompasses time spent in the quota pool, in replication (including + reproposals), and application, but notably *not* sequencing latency (i.e. + contention and latch acquisition). + + No measurement is recorded for read-only commands as well as read-write commands + which end up not writing (such as a DeleteRange on an empty span). Commands that + result in 'above-replication' errors (i.e. txn retries, etc) are similarly + excluded. Errors that arise while waiting for the in-flight replication result + or result from application of the command are included. + + Note also that usually, clients are signalled at beginning of application, but + the recorded measurement captures the entirety of log application. + + The duration is always measured on the proposer, even if the Raft leader and + leaseholder are not colocated, or the request is proposed from a follower. + + Commands that use async consensus will still cause a measurement that reflects + the actual replication latency, despite returning early to the client. + y_axis_label: Latency + type: HISTOGRAM + unit: COUNT + aggregation: AVG + derivative: NONE + - name: raft.scheduler.latency + exported_name: raft_scheduler_latency + description: | + Queueing durations for ranges waiting to be processed by the Raft scheduler. + + This histogram measures the delay from when a range is registered with the scheduler + for processing to when it is actually processed. This does not include the duration + of processing. + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: raft.sent.bytes + exported_name: raft_sent_bytes + description: "Number of bytes in Raft messages sent by this store. Note that\n\t\tthis does not include raft snapshot sent." + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.sent.cross_region.bytes + exported_name: raft_sent_cross_region_bytes + description: "Number of bytes sent by this store for cross region Raft messages\n\t\twhen region\ttiers are configured. Note that this does not include raft\n\t\tsnapshot sent." + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.sent.cross_zone.bytes + exported_name: raft_sent_cross_zone_bytes + description: "Number of bytes sent by this store for cross zone, same region Raft\n\t\tmessages when zone tiers are configured. If region tiers are not set, it is\n\t\tassumed to be within the same region. To ensure accurate monitoring of\n\t\tcross-zone data transfer, region and zone tiers should be consistently\n\t\tconfigured across all nodes. Note that this does not include raft snapshot\n\t\tsent." + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.storage.error + exported_name: raft_storage_error + description: Number of Raft storage errors + y_axis_label: Error Count + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.storage.read_bytes + exported_name: raft_storage_read_bytes + description: | + Counter of raftpb.Entry.Size() read from pebble for raft log entries. + + These are the bytes returned from the (raft.Storage).Entries method that were not + returned via the raft entry cache. This metric plus the raft.entrycache.read_bytes + metric represent the total bytes returned from the Entries method. + + Since pebble might serve these entries from the block cache, only a fraction of this + throughput might manifest in disk metrics. + + Entries tracked in this metric incur an unmarshalling-related CPU and memory + overhead that would not be incurred would the entries be served from the raft + entry cache. + + The bytes returned here do not correspond 1:1 to bytes read from pebble. This + metric measures the in-memory size of the raftpb.Entry, whereas we read its + encoded representation from pebble. As there is no compression involved, these + will generally be comparable. + + A common reason for elevated measurements on this metric is that a store is + falling behind on raft log application. The raft entry cache generally tracks + entries that were recently appended, so if log application falls behind the + cache will already have moved on to newer entries. + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.ticks + exported_name: raft_ticks + description: Number of Raft ticks queued + y_axis_label: Ticks + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.timeoutcampaign + exported_name: raft_timeoutcampaign + description: Number of Raft replicas campaigning after missed heartbeats from leader + y_axis_label: Elections called after timeout + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.transport.flow-token-dispatches-dropped + exported_name: raft_transport_flow_token_dispatches_dropped + description: Number of flow token dispatches dropped by the Raft Transport + y_axis_label: Dispatches + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.transport.rcvd + exported_name: raft_transport_rcvd + description: Number of Raft messages received by the Raft Transport + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.transport.reverse-rcvd + exported_name: raft_transport_reverse_rcvd + description: |- + Messages received from the reverse direction of a stream. + + These messages should be rare. They are mostly informational, and are not actual + responses to Raft messages. Responses are received over another stream. + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.transport.reverse-sent + exported_name: raft_transport_reverse_sent + description: |- + Messages sent in the reverse direction of a stream. + + These messages should be rare. They are mostly informational, and are not actual + responses to Raft messages. Responses are sent over another stream. + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.transport.send-queue-bytes + exported_name: raft_transport_send_queue_bytes + description: |- + The total byte size of pending outgoing messages in the queue. + + The queue is composed of multiple bounded channels associated with different + peers. A size higher than the average baseline could indicate issues streaming + messages to at least one peer. Use this metric together with send-queue-size, to + have a fuller picture. + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: raft.transport.send-queue-size + exported_name: raft_transport_send_queue_size + description: |- + Number of pending outgoing messages in the Raft Transport queue. + + The queue is composed of multiple bounded channels associated with different + peers. The overall size of tens of thousands could indicate issues streaming + messages to at least one peer. Use this metric in conjunction with + send-queue-bytes. + y_axis_label: Messages + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: raft.transport.sends-dropped + exported_name: raft_transport_sends_dropped + description: Number of Raft message sends dropped by the Raft Transport + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.transport.sent + exported_name: raft_transport_sent + description: Number of Raft messages sent by the Raft Transport + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raftlog.behind + exported_name: raftlog_behind + description: |- + Number of Raft log entries followers on other stores are behind. + + This gauge provides a view of the aggregate number of log entries the Raft leaders + on this node think the followers are behind. Since a raft leader may not always + have a good estimate for this information for all of its followers, and since + followers are expected to be behind (when they are not required as part of a + quorum) *and* the aggregate thus scales like the count of such followers, it is + difficult to meaningfully interpret this metric. + y_axis_label: Log Entries + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: raftlog.size.max + exported_name: raftlog_size_max + description: Approximate size of the largest Raft log on the store. + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: raftlog.size.total + exported_name: raftlog_size_total + description: Approximate size of all Raft logs on the store. + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: raftlog.truncated + exported_name: raftlog_truncated + description: Number of Raft log entries truncated + y_axis_label: Log Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.adds + exported_name: range_adds + description: Number of range additions + y_axis_label: Range Ops + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.raftleaderremovals + exported_name: range_raftleaderremovals + description: Number of times the current Raft leader was removed from a range + y_axis_label: Raft leader removals + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.raftleadertransfers + exported_name: range_raftleadertransfers + description: Number of raft leader transfers + y_axis_label: Leader Transfers + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.recoveries + exported_name: range_recoveries + description: |- + Count of offline loss of quorum recovery operations performed on ranges. + + This count increments for every range recovered in offline loss of quorum + recovery operation. Metric is updated when node on which survivor replica + is located starts following the recovery. + y_axis_label: Quorum Recoveries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.removes + exported_name: range_removes + description: Number of range removals + y_axis_label: Range Ops + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.applied-initial + exported_name: range_snapshots_applied_initial + description: Number of snapshots applied for initial upreplication + y_axis_label: Snapshots + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.applied-non-voter + exported_name: range_snapshots_applied_non_voter + description: Number of snapshots applied by non-voter replicas + y_axis_label: Snapshots + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.applied-voter + exported_name: range_snapshots_applied_voter + description: Number of snapshots applied by voter replicas + y_axis_label: Snapshots + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.cross-region.rcvd-bytes + exported_name: range_snapshots_cross_region_rcvd_bytes + description: Number of snapshot bytes received cross region by this store when region tiers are configured + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.cross-region.sent-bytes + exported_name: range_snapshots_cross_region_sent_bytes + description: Number of snapshot bytes sent cross region by this store when region tiers are configured + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.cross-zone.rcvd-bytes + exported_name: range_snapshots_cross_zone_rcvd_bytes + description: "Number of snapshot bytes received cross zone within the same region\n\t\tby this store when zone tiers are configured. If region tiers are not set,\n\t\tit is assumed to be within the same region. To ensure accurate monitoring of\n\t\tcross-zone data transfer, region and zone tiers should be consistently\n\t\tconfigured across all nodes." + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.cross-zone.sent-bytes + exported_name: range_snapshots_cross_zone_sent_bytes + description: "Number of snapshot bytes sent cross zone within the same region by\n\t\tthis store when zone tiers are configured. If region tiers are not set, it\n\t\tis assumed to be within the same region. To ensure accurate monitoring of\n\t\tcross-zone data transfer, region and zone tiers should be consistently\n\t\tconfigured across all nodes." + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.delegate.failures + exported_name: range_snapshots_delegate_failures + description: | + Number of snapshots that were delegated to a different node and + resulted in failure on that delegate. There are numerous reasons a failure can + occur on a delegate such as timeout, the delegate Raft log being too far behind + or the delegate being too busy to send. + y_axis_label: Snapshots + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.delegate.in-progress + exported_name: range_snapshots_delegate_in_progress + description: Number of delegated snapshots that are currently in-flight. + y_axis_label: Snapshots + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: range.snapshots.delegate.sent-bytes + exported_name: range_snapshots_delegate_sent_bytes + description: | + Bytes sent using a delegate. + + The number of bytes sent as a result of a delegate snapshot request + that was originated from a different node. This metric is useful in + evaluating the network savings of not sending cross region traffic. + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.delegate.successes + exported_name: range_snapshots_delegate_successes + description: | + Number of snapshots that were delegated to a different node and + resulted in success on that delegate. This does not count self delegated snapshots. + y_axis_label: Snapshots + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.generated + exported_name: range_snapshots_generated + description: Number of generated snapshots + y_axis_label: Snapshots + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.rcvd-bytes + exported_name: range_snapshots_rcvd_bytes + description: Number of snapshot bytes received + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.rebalancing.rcvd-bytes + exported_name: range_snapshots_rebalancing_rcvd_bytes + description: Number of rebalancing snapshot bytes received + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.rebalancing.sent-bytes + exported_name: range_snapshots_rebalancing_sent_bytes + description: Number of rebalancing snapshot bytes sent + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.recovery.rcvd-bytes + exported_name: range_snapshots_recovery_rcvd_bytes + description: Number of raft recovery snapshot bytes received + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.recovery.sent-bytes + exported_name: range_snapshots_recovery_sent_bytes + description: Number of raft recovery snapshot bytes sent + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.recv-failed + exported_name: range_snapshots_recv_failed + description: Number of range snapshot initialization messages that errored out on the recipient, typically before any data is transferred + y_axis_label: Snapshots + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.recv-in-progress + exported_name: range_snapshots_recv_in_progress + description: Number of non-empty snapshots being received + y_axis_label: Snapshots + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: range.snapshots.recv-queue + exported_name: range_snapshots_recv_queue + description: Number of snapshots queued to receive + y_axis_label: Snapshots + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: range.snapshots.recv-queue-bytes + exported_name: range_snapshots_recv_queue_bytes + description: Total size of all snapshots in the snapshot receive queue + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: range.snapshots.recv-total-in-progress + exported_name: range_snapshots_recv_total_in_progress + description: Number of total snapshots being received + y_axis_label: Snapshots + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: range.snapshots.recv-unusable + exported_name: range_snapshots_recv_unusable + description: Number of range snapshot that were fully transmitted but determined to be unnecessary or unusable + y_axis_label: Snapshots + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.send-in-progress + exported_name: range_snapshots_send_in_progress + description: Number of non-empty snapshots being sent + y_axis_label: Snapshots + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: range.snapshots.send-queue + exported_name: range_snapshots_send_queue + description: Number of snapshots queued to send + y_axis_label: Snapshots + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: range.snapshots.send-queue-bytes + exported_name: range_snapshots_send_queue_bytes + description: Total size of all snapshots in the snapshot send queue + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: range.snapshots.send-total-in-progress + exported_name: range_snapshots_send_total_in_progress + description: Number of total snapshots being sent + y_axis_label: Snapshots + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: range.snapshots.sent-bytes + exported_name: range_snapshots_sent_bytes + description: Number of snapshot bytes sent + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.unknown.rcvd-bytes + exported_name: range_snapshots_unknown_rcvd_bytes + description: Number of unknown snapshot bytes received + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.unknown.sent-bytes + exported_name: range_snapshots_unknown_sent_bytes + description: Number of unknown snapshot bytes sent + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.upreplication.rcvd-bytes + exported_name: range_snapshots_upreplication_rcvd_bytes + description: Number of upreplication snapshot bytes received + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.upreplication.sent-bytes + exported_name: range_snapshots_upreplication_sent_bytes + description: Number of upreplication snapshot bytes sent + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rangekeybytes + exported_name: rangekeybytes + description: Number of bytes taken up by range keys (e.g. MVCC range tombstones) + y_axis_label: Storage + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: rangekeycount + exported_name: rangekeycount + description: Count of all range keys (e.g. MVCC range tombstones) + y_axis_label: Keys + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: ranges.decommissioning + exported_name: ranges_decommissioning + description: Number of ranges with at lease one replica on a decommissioning node + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: ranges.overreplicated + exported_name: ranges_overreplicated + description: Number of ranges with more live replicas than the replication target + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: rangevalbytes + exported_name: rangevalbytes + description: Number of bytes taken up by range key values (e.g. MVCC range tombstones) + y_axis_label: Storage + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: rangevalcount + exported_name: rangevalcount + description: Count of all range key values (e.g. MVCC range tombstones) + y_axis_label: MVCC Values + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: rebalancing.readbytespersecond + exported_name: rebalancing_readbytespersecond + description: Number of bytes read recently per second, considering the last 30 minutes. + y_axis_label: Bytes/Sec + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: rebalancing.readspersecond + exported_name: rebalancing_readspersecond + description: Number of keys read recently per second, considering the last 30 minutes. + y_axis_label: Keys/Sec + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: rebalancing.requestspersecond + exported_name: rebalancing_requestspersecond + description: Number of requests received recently per second, considering the last 30 minutes. + y_axis_label: Requests/Sec + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: rebalancing.state.imbalanced_overfull_options_exhausted + exported_name: rebalancing_state_imbalanced_overfull_options_exhausted + description: Number of occurrences where this store was overfull but failed to shed load after exhausting available rebalance options + y_axis_label: Overfull Options Exhausted + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rebalancing.writebytespersecond + exported_name: rebalancing_writebytespersecond + description: Number of bytes written recently per second, considering the last 30 minutes. + y_axis_label: Bytes/Sec + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: rebalancing.writespersecond + exported_name: rebalancing_writespersecond + description: Number of keys written (i.e. applied by raft) per second to the store, considering the last 30 minutes. + y_axis_label: Keys/Sec + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: replicas.asleep + exported_name: replicas_asleep + description: Number of asleep replicas. Similarly to quiesced replicas, asleep replicas do not tick in Raft. + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: replicas.cpunanospersecond + exported_name: replicas_cpunanospersecond + description: Nanoseconds of CPU time in Replica request processing including evaluation but not replication + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: replicas.leaders + exported_name: replicas_leaders + description: Number of raft leaders + y_axis_label: Raft Leaders + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: replicas.leaders_invalid_lease + exported_name: replicas_leaders_invalid_lease + description: Number of replicas that are Raft leaders whose lease is invalid + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: replicas.leaders_not_fortified + exported_name: replicas_leaders_not_fortified + description: Number of replicas that are not fortified Raft leaders + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: replicas.leaders_not_leaseholders + exported_name: replicas_leaders_not_leaseholders + description: Number of replicas that are Raft leaders whose range lease is held by another store + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: replicas.quiescent + exported_name: replicas_quiescent + description: Number of quiesced replicas + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: replicas.reserved + exported_name: replicas_reserved + description: Number of replicas reserved for snapshots + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: replicas.uninitialized + exported_name: replicas_uninitialized + description: Number of uninitialized replicas, this does not include uninitialized replicas that can lie dormant in a persistent state. + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: requests.backpressure.split + exported_name: requests_backpressure_split + description: | + Number of backpressured writes waiting on a Range split. + + A Range will backpressure (roughly) non-system traffic when the range is above + the configured size until the range splits. When the rate of this metric is + nonzero over extended periods of time, it should be investigated why splits are + not occurring. + y_axis_label: Writes + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: requests.slow.latch + exported_name: requests_slow_latch + description: | + Number of requests that have been stuck for a long time acquiring latches. + + Latches moderate access to the KV keyspace for the purpose of evaluating and + replicating commands. A slow latch acquisition attempt is often caused by + another request holding and not releasing its latches in a timely manner. This + in turn can either be caused by a long delay in evaluation (for example, under + severe system overload) or by delays at the replication layer. + + This gauge registering a nonzero value usually indicates a serious problem and + should be investigated. + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: requests.slow.lease + exported_name: requests_slow_lease + description: | + Number of requests that have been stuck for a long time acquiring a lease. + + This gauge registering a nonzero value usually indicates range or replica + unavailability, and should be investigated. In the common case, we also + expect to see 'requests.slow.raft' to register a nonzero value, indicating + that the lease requests are not getting a timely response from the replication + layer. + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: requests.slow.raft + exported_name: requests_slow_raft + description: | + Number of requests that have been stuck for a long time in the replication layer. + + An (evaluated) request has to pass through the replication layer, notably the + quota pool and raft. If it fails to do so within a highly permissive duration, + the gauge is incremented (and decremented again once the request is either + applied or returns an error). + + A nonzero value indicates range or replica unavailability, and should be investigated. + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: rocksdb.block.cache.usage + exported_name: rocksdb_block_cache_usage + description: Bytes used by the block cache + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: rocksdb.bloom.filter.prefix.checked + exported_name: rocksdb_bloom_filter_prefix_checked + description: Number of times the bloom filter was checked + y_axis_label: Bloom Filter Ops + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rocksdb.bloom.filter.prefix.useful + exported_name: rocksdb_bloom_filter_prefix_useful + description: Number of times the bloom filter helped avoid iterator creation + y_axis_label: Bloom Filter Ops + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rocksdb.compacted-bytes-read + exported_name: rocksdb_compacted_bytes_read + description: Bytes read during compaction + y_axis_label: Bytes Read + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rocksdb.compacted-bytes-written + exported_name: rocksdb_compacted_bytes_written + description: Bytes written during compaction + y_axis_label: Bytes Written + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rocksdb.encryption.algorithm + exported_name: rocksdb_encryption_algorithm + description: Algorithm in use for encryption-at-rest, see storage/enginepb/key_registry.proto + y_axis_label: Encryption At Rest + type: GAUGE + unit: CONST + aggregation: AVG + derivative: NONE + - name: rocksdb.estimated-pending-compaction + exported_name: rocksdb_estimated_pending_compaction + description: Estimated pending compaction bytes + y_axis_label: Storage + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: rocksdb.flushed-bytes + exported_name: rocksdb_flushed_bytes + description: Bytes written during flush + y_axis_label: Bytes Written + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rocksdb.flushes + exported_name: rocksdb_flushes + description: Number of table flushes + y_axis_label: Flushes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rocksdb.ingested-bytes + exported_name: rocksdb_ingested_bytes + description: Bytes ingested + y_axis_label: Bytes Ingested + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rocksdb.memtable.total-size + exported_name: rocksdb_memtable_total_size + description: Current size of memtable in bytes + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: rocksdb.num-sstables + exported_name: rocksdb_num_sstables + description: Number of storage engine SSTables + y_axis_label: SSTables + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: rocksdb.read-amplification + exported_name: rocksdb_read_amplification + description: Number of disk reads per query + y_axis_label: Disk Reads per Query + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: rocksdb.table-readers-mem-estimate + exported_name: rocksdb_table_readers_mem_estimate + description: Memory used by index and filter blocks + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: rpc.batches.recv + exported_name: rpc_batches_recv + description: Number of batches processed + y_axis_label: Batches + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.addsstable.recv + exported_name: rpc_method_addsstable_recv + description: Number of AddSSTable requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.adminchangereplicas.recv + exported_name: rpc_method_adminchangereplicas_recv + description: Number of AdminChangeReplicas requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.adminmerge.recv + exported_name: rpc_method_adminmerge_recv + description: Number of AdminMerge requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.adminrelocaterange.recv + exported_name: rpc_method_adminrelocaterange_recv + description: Number of AdminRelocateRange requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.adminscatter.recv + exported_name: rpc_method_adminscatter_recv + description: Number of AdminScatter requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.adminsplit.recv + exported_name: rpc_method_adminsplit_recv + description: Number of AdminSplit requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.admintransferlease.recv + exported_name: rpc_method_admintransferlease_recv + description: Number of AdminTransferLease requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.adminunsplit.recv + exported_name: rpc_method_adminunsplit_recv + description: Number of AdminUnsplit requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.barrier.recv + exported_name: rpc_method_barrier_recv + description: Number of Barrier requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.checkconsistency.recv + exported_name: rpc_method_checkconsistency_recv + description: Number of CheckConsistency requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.clearrange.recv + exported_name: rpc_method_clearrange_recv + description: Number of ClearRange requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.computechecksum.recv + exported_name: rpc_method_computechecksum_recv + description: Number of ComputeChecksum requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.conditionalput.recv + exported_name: rpc_method_conditionalput_recv + description: Number of ConditionalPut requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.delete.recv + exported_name: rpc_method_delete_recv + description: Number of Delete requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.deleterange.recv + exported_name: rpc_method_deleterange_recv + description: Number of DeleteRange requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.endtxn.recv + exported_name: rpc_method_endtxn_recv + description: Number of EndTxn requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.excise.recv + exported_name: rpc_method_excise_recv + description: Number of Excise requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.export.recv + exported_name: rpc_method_export_recv + description: Number of Export requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.gc.recv + exported_name: rpc_method_gc_recv + description: Number of GC requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.get.recv + exported_name: rpc_method_get_recv + description: Number of Get requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.heartbeattxn.recv + exported_name: rpc_method_heartbeattxn_recv + description: Number of HeartbeatTxn requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.increment.recv + exported_name: rpc_method_increment_recv + description: Number of Increment requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.isspanempty.recv + exported_name: rpc_method_isspanempty_recv + description: Number of IsSpanEmpty requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.leaseinfo.recv + exported_name: rpc_method_leaseinfo_recv + description: Number of LeaseInfo requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.linkexternalsstable.recv + exported_name: rpc_method_linkexternalsstable_recv + description: Number of LinkExternalSSTable requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.merge.recv + exported_name: rpc_method_merge_recv + description: Number of Merge requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.migrate.recv + exported_name: rpc_method_migrate_recv + description: Number of Migrate requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.probe.recv + exported_name: rpc_method_probe_recv + description: Number of Probe requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.pushtxn.recv + exported_name: rpc_method_pushtxn_recv + description: Number of PushTxn requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.put.recv + exported_name: rpc_method_put_recv + description: Number of Put requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.queryintent.recv + exported_name: rpc_method_queryintent_recv + description: Number of QueryIntent requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.querylocks.recv + exported_name: rpc_method_querylocks_recv + description: Number of QueryLocks requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.queryresolvedtimestamp.recv + exported_name: rpc_method_queryresolvedtimestamp_recv + description: Number of QueryResolvedTimestamp requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.querytxn.recv + exported_name: rpc_method_querytxn_recv + description: Number of QueryTxn requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.rangestats.recv + exported_name: rpc_method_rangestats_recv + description: Number of RangeStats requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.recomputestats.recv + exported_name: rpc_method_recomputestats_recv + description: Number of RecomputeStats requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.recovertxn.recv + exported_name: rpc_method_recovertxn_recv + description: Number of RecoverTxn requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.refresh.recv + exported_name: rpc_method_refresh_recv + description: Number of Refresh requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.refreshrange.recv + exported_name: rpc_method_refreshrange_recv + description: Number of RefreshRange requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.requestlease.recv + exported_name: rpc_method_requestlease_recv + description: Number of RequestLease requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.resolveintent.recv + exported_name: rpc_method_resolveintent_recv + description: Number of ResolveIntent requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.resolveintentrange.recv + exported_name: rpc_method_resolveintentrange_recv + description: Number of ResolveIntentRange requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.reversescan.recv + exported_name: rpc_method_reversescan_recv + description: Number of ReverseScan requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.revertrange.recv + exported_name: rpc_method_revertrange_recv + description: Number of RevertRange requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.scan.recv + exported_name: rpc_method_scan_recv + description: Number of Scan requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.subsume.recv + exported_name: rpc_method_subsume_recv + description: Number of Subsume requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.transferlease.recv + exported_name: rpc_method_transferlease_recv + description: Number of TransferLease requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.truncatelog.recv + exported_name: rpc_method_truncatelog_recv + description: Number of TruncateLog requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.writebatch.recv + exported_name: rpc_method_writebatch_recv + description: Number of WriteBatch requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.streams.mux_rangefeed.active + exported_name: rpc_streams_mux_rangefeed_active + description: Number of currently running MuxRangeFeed streams + y_axis_label: Streams + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: rpc.streams.mux_rangefeed.recv + exported_name: rpc_streams_mux_rangefeed_recv + description: Total number of MuxRangeFeed streams + y_axis_label: Streams + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: spanconfig.kvsubscriber.oldest_protected_record_nanos + exported_name: spanconfig_kvsubscriber_oldest_protected_record_nanos + description: Difference between the current time and the oldest protected timestamp (sudden drops indicate a record being released; an ever increasing number indicates that the oldest record is around and preventing GC if > configured GC TTL) + y_axis_label: Nanoseconds + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: spanconfig.kvsubscriber.protected_record_count + exported_name: spanconfig_kvsubscriber_protected_record_count + description: Number of protected timestamp records, as seen by KV + y_axis_label: Records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: spanconfig.kvsubscriber.update_behind_nanos + exported_name: spanconfig_kvsubscriber_update_behind_nanos + description: Difference between the current time and when the KVSubscriber received its last update (an ever increasing number indicates that we're no longer receiving updates) + y_axis_label: Nanoseconds + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: storage.batch-commit.commit-wait.duration + exported_name: storage_batch_commit_commit_wait_duration + description: Cumulative time spent waiting for WAL sync, for batch commit. See storage.AggregatedBatchCommitStats for details. + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.batch-commit.count + exported_name: storage_batch_commit_count + description: Count of batch commits. See storage.AggregatedBatchCommitStats for details. + y_axis_label: Commit Ops + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.batch-commit.duration + exported_name: storage_batch_commit_duration + description: Cumulative time spent in batch commit. See storage.AggregatedBatchCommitStats for details. + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.batch-commit.l0-stall.duration + exported_name: storage_batch_commit_l0_stall_duration + description: Cumulative time spent in a write stall due to high read amplification in L0, for batch commit. See storage.AggregatedBatchCommitStats for details. + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.batch-commit.mem-stall.duration + exported_name: storage_batch_commit_mem_stall_duration + description: Cumulative time spent in a write stall due to too many memtables, for batch commit. See storage.AggregatedBatchCommitStats for details. + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.batch-commit.sem-wait.duration + exported_name: storage_batch_commit_sem_wait_duration + description: Cumulative time spent in semaphore wait, for batch commit. See storage.AggregatedBatchCommitStats for details. + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.batch-commit.wal-queue-wait.duration + exported_name: storage_batch_commit_wal_queue_wait_duration + description: Cumulative time spent waiting for memory blocks in the WAL queue, for batch commit. See storage.AggregatedBatchCommitStats for details. + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.batch-commit.wal-rotation.duration + exported_name: storage_batch_commit_wal_rotation_duration + description: Cumulative time spent waiting for WAL rotation, for batch commit. See storage.AggregatedBatchCommitStats for details. + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.block-load.active + exported_name: storage_block_load_active + description: The number of sstable block loads currently in progress + y_axis_label: Block loads + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.block-load.queued + exported_name: storage_block_load_queued + description: 'The cumulative number of SSTable block loads that were delayed because too many loads were active (see also: `storage.block_load.node_max_active`)' + y_axis_label: Block loads + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.checkpoints + exported_name: storage_checkpoints + description: |- + The number of checkpoint directories found in storage. + + This is the number of directories found in the auxiliary/checkpoints directory. + Each represents an immutable point-in-time storage engine checkpoint. They are + cheap (consisting mostly of hard links), but over time they effectively become a + full copy of the old state, which increases their relative cost. Checkpoints + must be deleted once acted upon (e.g. copied elsewhere or investigated). + + A likely cause of having a checkpoint is that one of the ranges in this store + had inconsistent data among its replicas. Such checkpoint directories are + located in auxiliary/checkpoints/rN_at_M, where N is the range ID, and M is the + Raft applied index at which this checkpoint was taken. + y_axis_label: Directories + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.compactions.cancelled.bytes + exported_name: storage_compactions_cancelled_bytes + description: Cumulative volume of data written to sstables during compactions that were ultimately cancelled due to a conflicting operation. + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.compactions.cancelled.count + exported_name: storage_compactions_cancelled_count + description: Cumulative count of compactions that were cancelled before they completed due to a conflicting operation. + y_axis_label: Compactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.compactions.duration + exported_name: storage_compactions_duration + description: |- + Cumulative sum of all compaction durations. + + The rate of this value provides the effective compaction concurrency of a store, + which can be useful to determine whether the maximum compaction concurrency is + fully utilized. + y_axis_label: Processing Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.compactions.keys.pinned.bytes + exported_name: storage_compactions_keys_pinned_bytes + description: | + Cumulative size of storage engine KVs written to sstables during flushes and compactions due to open LSM snapshots. + + Various subsystems of CockroachDB take LSM snapshots to maintain a consistent view + of the database over an extended duration. In order to maintain the consistent view, + flushes and compactions within the storage engine must preserve keys that otherwise + would have been dropped. This increases write amplification, and introduces keys + that must be skipped during iteration. This metric records the cumulative number of + bytes preserved during flushes and compactions over the lifetime of the process. + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.compactions.keys.pinned.count + exported_name: storage_compactions_keys_pinned_count + description: | + Cumulative count of storage engine KVs written to sstables during flushes and compactions due to open LSM snapshots. + + Various subsystems of CockroachDB take LSM snapshots to maintain a consistent view + of the database over an extended duration. In order to maintain the consistent view, + flushes and compactions within the storage engine must preserve keys that otherwise + would have been dropped. This increases write amplification, and introduces keys + that must be skipped during iteration. This metric records the cumulative count of + KVs preserved during flushes and compactions over the lifetime of the process. + y_axis_label: Keys + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.disk-slow + exported_name: storage_disk_slow + description: Number of instances of disk operations taking longer than 10s + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.disk-stalled + exported_name: storage_disk_stalled + description: Number of instances of disk operations taking longer than 20s + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.disk.io.time + exported_name: storage_disk_io_time + description: Time spent reading from or writing to the store's disk since this process started (as reported by the OS) + y_axis_label: Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.disk.iopsinprogress + exported_name: storage_disk_iopsinprogress + description: IO operations currently in progress on the store's disk (as reported by the OS) + y_axis_label: Operations + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.disk.read-max.bytespersecond + exported_name: storage_disk_read_max_bytespersecond + description: Maximum rate at which bytes were read from disk (as reported by the OS) + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storage.disk.read.bytes + exported_name: storage_disk_read_bytes + description: Bytes read from the store's disk since this process started (as reported by the OS) + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.disk.read.count + exported_name: storage_disk_read_count + description: Disk read operations on the store's disk since this process started (as reported by the OS) + y_axis_label: Operations + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.disk.read.time + exported_name: storage_disk_read_time + description: Time spent reading from the store's disk since this process started (as reported by the OS) + y_axis_label: Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.disk.weightedio.time + exported_name: storage_disk_weightedio_time + description: Weighted time spent reading from or writing to the store's disk since this process started (as reported by the OS) + y_axis_label: Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.disk.write-max.bytespersecond + exported_name: storage_disk_write_max_bytespersecond + description: Maximum rate at which bytes were written to disk (as reported by the OS) + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storage.disk.write.bytes + exported_name: storage_disk_write_bytes + description: Bytes written to the store's disk since this process started (as reported by the OS) + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.disk.write.count + exported_name: storage_disk_write_count + description: Disk write operations on the store's disk since this process started (as reported by the OS) + y_axis_label: Operations + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.disk.write.time + exported_name: storage_disk_write_time + description: Time spent writing to the store's disks since this process started (as reported by the OS) + y_axis_label: Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.flush.ingest.count + exported_name: storage_flush_ingest_count + description: Flushes performing an ingest (flushable ingestions) + y_axis_label: Flushes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.flush.ingest.table.bytes + exported_name: storage_flush_ingest_table_bytes + description: Bytes ingested via flushes (flushable ingestions) + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.flush.ingest.table.count + exported_name: storage_flush_ingest_table_count + description: Tables ingested via flushes (flushable ingestions) + y_axis_label: Tables + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.flush.utilization + exported_name: storage_flush_utilization + description: The percentage of time the storage engine is actively flushing memtables to disk. + y_axis_label: Flush Utilization + type: GAUGE + unit: PERCENT + aggregation: AVG + derivative: NONE + - name: storage.ingest.count + exported_name: storage_ingest_count + description: Number of successful ingestions performed + y_axis_label: Events + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.initial_stats_complete + exported_name: storage_initial_stats_complete + description: Set to 1 when initial table stats collection is complete. + y_axis_label: Boolean + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.iterator.block-load.bytes + exported_name: storage_iterator_block_load_bytes + description: Bytes loaded by storage engine iterators (possibly cached). See storage.AggregatedIteratorStats for details. + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.block-load.cached-bytes + exported_name: storage_iterator_block_load_cached_bytes + description: Bytes loaded by storage engine iterators from the block cache. See storage.AggregatedIteratorStats for details. + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.block-load.read-duration + exported_name: storage_iterator_block_load_read_duration + description: Cumulative time storage engine iterators spent loading blocks from durable storage. See storage.AggregatedIteratorStats for details. + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-backup.block-load.bytes + exported_name: storage_iterator_category_backup_block_load_bytes + description: Bytes loaded by storage sstable iterators (possibly cached). + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-backup.block-load.cached-bytes + exported_name: storage_iterator_category_backup_block_load_cached_bytes + description: Bytes loaded by storage sstable iterators from the block cache + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-backup.block-load.latency-sum + exported_name: storage_iterator_category_backup_block_load_latency_sum + description: Cumulative latency for loading bytes not in the block cache, by storage sstable iterators + y_axis_label: Latency + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-batch-eval.block-load.bytes + exported_name: storage_iterator_category_batch_eval_block_load_bytes + description: Bytes loaded by storage sstable iterators (possibly cached). + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-batch-eval.block-load.cached-bytes + exported_name: storage_iterator_category_batch_eval_block_load_cached_bytes + description: Bytes loaded by storage sstable iterators from the block cache + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-batch-eval.block-load.latency-sum + exported_name: storage_iterator_category_batch_eval_block_load_latency_sum + description: Cumulative latency for loading bytes not in the block cache, by storage sstable iterators + y_axis_label: Latency + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-crdb-unknown.block-load.bytes + exported_name: storage_iterator_category_crdb_unknown_block_load_bytes + description: Bytes loaded by storage sstable iterators (possibly cached). + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-crdb-unknown.block-load.cached-bytes + exported_name: storage_iterator_category_crdb_unknown_block_load_cached_bytes + description: Bytes loaded by storage sstable iterators from the block cache + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-crdb-unknown.block-load.latency-sum + exported_name: storage_iterator_category_crdb_unknown_block_load_latency_sum + description: Cumulative latency for loading bytes not in the block cache, by storage sstable iterators + y_axis_label: Latency + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-intent-resolution.block-load.bytes + exported_name: storage_iterator_category_intent_resolution_block_load_bytes + description: Bytes loaded by storage sstable iterators (possibly cached). + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-intent-resolution.block-load.cached-bytes + exported_name: storage_iterator_category_intent_resolution_block_load_cached_bytes + description: Bytes loaded by storage sstable iterators from the block cache + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-intent-resolution.block-load.latency-sum + exported_name: storage_iterator_category_intent_resolution_block_load_latency_sum + description: Cumulative latency for loading bytes not in the block cache, by storage sstable iterators + y_axis_label: Latency + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-mvcc-gc.block-load.bytes + exported_name: storage_iterator_category_mvcc_gc_block_load_bytes + description: Bytes loaded by storage sstable iterators (possibly cached). + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-mvcc-gc.block-load.cached-bytes + exported_name: storage_iterator_category_mvcc_gc_block_load_cached_bytes + description: Bytes loaded by storage sstable iterators from the block cache + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-mvcc-gc.block-load.latency-sum + exported_name: storage_iterator_category_mvcc_gc_block_load_latency_sum + description: Cumulative latency for loading bytes not in the block cache, by storage sstable iterators + y_axis_label: Latency + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-pebble-compaction.block-load.bytes + exported_name: storage_iterator_category_pebble_compaction_block_load_bytes + description: Bytes loaded by storage sstable iterators (possibly cached). + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-pebble-compaction.block-load.cached-bytes + exported_name: storage_iterator_category_pebble_compaction_block_load_cached_bytes + description: Bytes loaded by storage sstable iterators from the block cache + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-pebble-compaction.block-load.latency-sum + exported_name: storage_iterator_category_pebble_compaction_block_load_latency_sum + description: Cumulative latency for loading bytes not in the block cache, by storage sstable iterators + y_axis_label: Latency + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-pebble-get.block-load.bytes + exported_name: storage_iterator_category_pebble_get_block_load_bytes + description: Bytes loaded by storage sstable iterators (possibly cached). + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-pebble-get.block-load.cached-bytes + exported_name: storage_iterator_category_pebble_get_block_load_cached_bytes + description: Bytes loaded by storage sstable iterators from the block cache + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-pebble-get.block-load.latency-sum + exported_name: storage_iterator_category_pebble_get_block_load_latency_sum + description: Cumulative latency for loading bytes not in the block cache, by storage sstable iterators + y_axis_label: Latency + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-pebble-ingest.block-load.bytes + exported_name: storage_iterator_category_pebble_ingest_block_load_bytes + description: Bytes loaded by storage sstable iterators (possibly cached). + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-pebble-ingest.block-load.cached-bytes + exported_name: storage_iterator_category_pebble_ingest_block_load_cached_bytes + description: Bytes loaded by storage sstable iterators from the block cache + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-pebble-ingest.block-load.latency-sum + exported_name: storage_iterator_category_pebble_ingest_block_load_latency_sum + description: Cumulative latency for loading bytes not in the block cache, by storage sstable iterators + y_axis_label: Latency + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-range-snap.block-load.bytes + exported_name: storage_iterator_category_range_snap_block_load_bytes + description: Bytes loaded by storage sstable iterators (possibly cached). + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-range-snap.block-load.cached-bytes + exported_name: storage_iterator_category_range_snap_block_load_cached_bytes + description: Bytes loaded by storage sstable iterators from the block cache + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-range-snap.block-load.latency-sum + exported_name: storage_iterator_category_range_snap_block_load_latency_sum + description: Cumulative latency for loading bytes not in the block cache, by storage sstable iterators + y_axis_label: Latency + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-rangefeed.block-load.bytes + exported_name: storage_iterator_category_rangefeed_block_load_bytes + description: Bytes loaded by storage sstable iterators (possibly cached). + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-rangefeed.block-load.cached-bytes + exported_name: storage_iterator_category_rangefeed_block_load_cached_bytes + description: Bytes loaded by storage sstable iterators from the block cache + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-rangefeed.block-load.latency-sum + exported_name: storage_iterator_category_rangefeed_block_load_latency_sum + description: Cumulative latency for loading bytes not in the block cache, by storage sstable iterators + y_axis_label: Latency + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-replication.block-load.bytes + exported_name: storage_iterator_category_replication_block_load_bytes + description: Bytes loaded by storage sstable iterators (possibly cached). + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-replication.block-load.cached-bytes + exported_name: storage_iterator_category_replication_block_load_cached_bytes + description: Bytes loaded by storage sstable iterators from the block cache + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-replication.block-load.latency-sum + exported_name: storage_iterator_category_replication_block_load_latency_sum + description: Cumulative latency for loading bytes not in the block cache, by storage sstable iterators + y_axis_label: Latency + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-scan-background.block-load.bytes + exported_name: storage_iterator_category_scan_background_block_load_bytes + description: Bytes loaded by storage sstable iterators (possibly cached). + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-scan-background.block-load.cached-bytes + exported_name: storage_iterator_category_scan_background_block_load_cached_bytes + description: Bytes loaded by storage sstable iterators from the block cache + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-scan-background.block-load.latency-sum + exported_name: storage_iterator_category_scan_background_block_load_latency_sum + description: Cumulative latency for loading bytes not in the block cache, by storage sstable iterators + y_axis_label: Latency + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-scan-regular.block-load.bytes + exported_name: storage_iterator_category_scan_regular_block_load_bytes + description: Bytes loaded by storage sstable iterators (possibly cached). + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-scan-regular.block-load.cached-bytes + exported_name: storage_iterator_category_scan_regular_block_load_cached_bytes + description: Bytes loaded by storage sstable iterators from the block cache + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-scan-regular.block-load.latency-sum + exported_name: storage_iterator_category_scan_regular_block_load_latency_sum + description: Cumulative latency for loading bytes not in the block cache, by storage sstable iterators + y_axis_label: Latency + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-unknown.block-load.bytes + exported_name: storage_iterator_category_unknown_block_load_bytes + description: Bytes loaded by storage sstable iterators (possibly cached). + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-unknown.block-load.cached-bytes + exported_name: storage_iterator_category_unknown_block_load_cached_bytes + description: Bytes loaded by storage sstable iterators from the block cache + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-unknown.block-load.latency-sum + exported_name: storage_iterator_category_unknown_block_load_latency_sum + description: Cumulative latency for loading bytes not in the block cache, by storage sstable iterators + y_axis_label: Latency + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.external.seeks + exported_name: storage_iterator_external_seeks + description: Cumulative count of seeks performed on storage engine iterators. See storage.AggregatedIteratorStats for details. + y_axis_label: Iterator Ops + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.external.steps + exported_name: storage_iterator_external_steps + description: Cumulative count of steps performed on storage engine iterators. See storage.AggregatedIteratorStats for details. + y_axis_label: Iterator Ops + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.internal.seeks + exported_name: storage_iterator_internal_seeks + description: |- + Cumulative count of seeks performed internally within storage engine iterators. + + A value high relative to 'storage.iterator.external.seeks' + is a good indication that there's an accumulation of garbage + internally within the storage engine. + + See storage.AggregatedIteratorStats for details. + y_axis_label: Iterator Ops + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.internal.steps + exported_name: storage_iterator_internal_steps + description: |- + Cumulative count of steps performed internally within storage engine iterators. + + A value high relative to 'storage.iterator.external.steps' + is a good indication that there's an accumulation of garbage + internally within the storage engine. + + See storage.AggregatedIteratorStats for more details. + y_axis_label: Iterator Ops + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.keys.range-key-set.count + exported_name: storage_keys_range_key_set_count + description: Approximate count of RangeKeySet internal keys across the storage engine. + y_axis_label: Keys + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.keys.tombstone.count + exported_name: storage_keys_tombstone_count + description: Approximate count of DEL, SINGLEDEL and RANGEDEL internal keys across the storage engine. + y_axis_label: Keys + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.l0-bytes-flushed + exported_name: storage_l0_bytes_flushed + description: Number of bytes flushed (from memtables) into Level 0 + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.l0-bytes-ingested + exported_name: storage_l0_bytes_ingested + description: Number of bytes ingested directly into Level 0 + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.l0-level-score + exported_name: storage_l0_level_score + description: Compaction score of level 0 + y_axis_label: Score + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.l0-level-size + exported_name: storage_l0_level_size + description: Size of the SSTables in level 0 + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storage.l0-num-files + exported_name: storage_l0_num_files + description: Number of SSTables in Level 0 + y_axis_label: SSTables + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.l0-sublevels + exported_name: storage_l0_sublevels + description: Number of Level 0 sublevels + y_axis_label: Sublevels + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.l1-bytes-ingested + exported_name: storage_l1_bytes_ingested + description: Number of bytes ingested directly into Level 1 + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.l1-level-score + exported_name: storage_l1_level_score + description: Compaction score of level 1 + y_axis_label: Score + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.l1-level-size + exported_name: storage_l1_level_size + description: Size of the SSTables in level 1 + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storage.l2-bytes-ingested + exported_name: storage_l2_bytes_ingested + description: Number of bytes ingested directly into Level 2 + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.l2-level-score + exported_name: storage_l2_level_score + description: Compaction score of level 2 + y_axis_label: Score + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.l2-level-size + exported_name: storage_l2_level_size + description: Size of the SSTables in level 2 + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storage.l3-bytes-ingested + exported_name: storage_l3_bytes_ingested + description: Number of bytes ingested directly into Level 3 + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.l3-level-score + exported_name: storage_l3_level_score + description: Compaction score of level 3 + y_axis_label: Score + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.l3-level-size + exported_name: storage_l3_level_size + description: Size of the SSTables in level 3 + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storage.l4-bytes-ingested + exported_name: storage_l4_bytes_ingested + description: Number of bytes ingested directly into Level 4 + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.l4-level-score + exported_name: storage_l4_level_score + description: Compaction score of level 4 + y_axis_label: Score + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.l4-level-size + exported_name: storage_l4_level_size + description: Size of the SSTables in level 4 + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storage.l5-bytes-ingested + exported_name: storage_l5_bytes_ingested + description: Number of bytes ingested directly into Level 5 + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.l5-level-score + exported_name: storage_l5_level_score + description: Compaction score of level 5 + y_axis_label: Score + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.l5-level-size + exported_name: storage_l5_level_size + description: Size of the SSTables in level 5 + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storage.l6-bytes-ingested + exported_name: storage_l6_bytes_ingested + description: Number of bytes ingested directly into Level 6 + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.l6-level-score + exported_name: storage_l6_level_score + description: Compaction score of level 6 + y_axis_label: Score + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.l6-level-size + exported_name: storage_l6_level_size + description: Size of the SSTables in level 6 + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storage.marked-for-compaction-files + exported_name: storage_marked_for_compaction_files + description: Count of SSTables marked for compaction + y_axis_label: SSTables + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.point_deletions.bytes + exported_name: storage_point_deletions_bytes + description: | + Estimated file bytes that will be saved by compacting all point deletions. + + This is dependent on table stats collection, so can be very incomplete until + storage.initial_stats_complete becomes true. + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storage.range_deletions.bytes + exported_name: storage_range_deletions_bytes + description: | + Estimated file bytes that will be saved by compacting all range deletions. + + This is dependent on table stats collection, so can be very incomplete until + storage.initial_stats_complete becomes true. + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storage.secondary-cache.count + exported_name: storage_secondary_cache_count + description: The count of cache blocks in the secondary cache (not sstable blocks) + y_axis_label: Cache items + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.secondary-cache.evictions + exported_name: storage_secondary_cache_evictions + description: The number of times a cache block was evicted from the secondary cache + y_axis_label: Num evictions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.secondary-cache.reads-full-hit + exported_name: storage_secondary_cache_reads_full_hit + description: The number of reads where all data returned was read from the secondary cache + y_axis_label: Num reads + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.secondary-cache.reads-multi-block + exported_name: storage_secondary_cache_reads_multi_block + description: The number of secondary cache reads that require reading data from 2+ cache blocks + y_axis_label: Num reads + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.secondary-cache.reads-multi-shard + exported_name: storage_secondary_cache_reads_multi_shard + description: The number of secondary cache reads that require reading data from 2+ shards + y_axis_label: Num reads + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.secondary-cache.reads-no-hit + exported_name: storage_secondary_cache_reads_no_hit + description: The number of reads where no data returned was read from the secondary cache + y_axis_label: Num reads + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.secondary-cache.reads-partial-hit + exported_name: storage_secondary_cache_reads_partial_hit + description: The number of reads where some data returned was read from the secondary cache + y_axis_label: Num reads + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.secondary-cache.reads-total + exported_name: storage_secondary_cache_reads_total + description: The number of reads from the secondary cache + y_axis_label: Num reads + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.secondary-cache.size + exported_name: storage_secondary_cache_size + description: The number of sstable bytes stored in the secondary cache + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storage.secondary-cache.write-back-failures + exported_name: storage_secondary_cache_write_back_failures + description: The number of times writing a cache block to the secondary cache failed + y_axis_label: Num failures + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.shared-storage.read + exported_name: storage_shared_storage_read + description: Bytes read from shared storage + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.shared-storage.write + exported_name: storage_shared_storage_write + description: Bytes written to external storage + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.single-delete.ineffectual + exported_name: storage_single_delete_ineffectual + description: Number of SingleDeletes that were ineffectual + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.single-delete.invariant-violation + exported_name: storage_single_delete_invariant_violation + description: Number of SingleDelete invariant violations + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.sstable.compression.none.count + exported_name: storage_sstable_compression_none_count + description: Count of SSTables that are uncompressed. + y_axis_label: SSTables + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.sstable.compression.snappy.count + exported_name: storage_sstable_compression_snappy_count + description: Count of SSTables that have been compressed with the snappy compression algorithm. + y_axis_label: SSTables + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.sstable.compression.unknown.count + exported_name: storage_sstable_compression_unknown_count + description: Count of SSTables that have an unknown compression algorithm. + y_axis_label: SSTables + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.sstable.compression.zstd.count + exported_name: storage_sstable_compression_zstd_count + description: Count of SSTables that have been compressed with the zstd compression algorithm. + y_axis_label: SSTables + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.sstable.remote.bytes + exported_name: storage_sstable_remote_bytes + description: Bytes in SSTables that are stored off-disk (remotely) in object storage. + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storage.sstable.remote.count + exported_name: storage_sstable_remote_count + description: Count of SSTables that are stored off-disk (remotely) in object storage. + y_axis_label: SSTables + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.sstable.zombie.bytes + exported_name: storage_sstable_zombie_bytes + description: Bytes in SSTables that have been logically deleted, but can't yet be physically deleted because an open iterator may be reading them. + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storage.value_separation.blob_files.count + exported_name: storage_value_separation_blob_files_count + description: The number of blob files that are used to store separated values within the storage engine. + y_axis_label: Files + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.value_separation.blob_files.size + exported_name: storage_value_separation_blob_files_size + description: The size of the physical blob files that are used to store separated values within the storage engine. This sum is the physical post-compression sum of value_bytes.referenced and value_bytes.unreferenced. + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storage.value_separation.value_bytes.referenced + exported_name: storage_value_separation_value_bytes_referenced + description: The size of storage engine value bytes (pre-compression) that are stored separately in blob files and referenced by a live sstable. + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storage.value_separation.value_bytes.unreferenced + exported_name: storage_value_separation_value_bytes_unreferenced + description: The size of storage engine value bytes (pre-compression) that are stored separately in blob files and not referenced by any live sstable. These bytes are garbage that could be reclaimed by a compaction. + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storage.wal.bytes_in + exported_name: storage_wal_bytes_in + description: The number of logical bytes the storage engine has written to the WAL + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.wal.bytes_written + exported_name: storage_wal_bytes_written + description: The number of bytes the storage engine has written to the WAL + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.wal.failover.primary.duration + exported_name: storage_wal_failover_primary_duration + description: Cumulative time spent writing to the primary WAL directory. Only populated when WAL failover is configured + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.wal.failover.secondary.duration + exported_name: storage_wal_failover_secondary_duration + description: Cumulative time spent writing to the secondary WAL directory. Only populated when WAL failover is configured + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.wal.failover.switch.count + exported_name: storage_wal_failover_switch_count + description: Count of the number of times WAL writing has switched from primary to secondary and vice versa. + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.wal.failover.write_and_sync.latency + exported_name: storage_wal_failover_write_and_sync_latency + description: The observed latency for writing and syncing to the write ahead log. Only populated when WAL failover is configured + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: storage.write-amplification + exported_name: storage_write_amplification + description: |- + Running measure of write-amplification. + + Write amplification is measured as the ratio of bytes written to disk relative to the logical + bytes present in sstables, over the life of a store. This metric is a running average + of the write amplification as tracked by Pebble. + y_axis_label: Ratio of bytes written to logical bytes + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.write-stall-nanos + exported_name: storage_write_stall_nanos + description: Total write stall duration in nanos + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storeliveness.callbacks.processing_duration + exported_name: storeliveness_callbacks_processing_duration + description: Duration of support withdrawal callback processing + y_axis_label: Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: storeliveness.heartbeat.failures + exported_name: storeliveness_heartbeat_failures + description: Number of Store Liveness heartbeats that failed to be sent out by the Store Liveness Support Manager + y_axis_label: Heartbeats + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storeliveness.heartbeat.successes + exported_name: storeliveness_heartbeat_successes + description: Number of Store Liveness heartbeats sent out by the Store Liveness Support Manager + y_axis_label: Heartbeats + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storeliveness.message_handle.failures + exported_name: storeliveness_message_handle_failures + description: Number of incoming Store Liveness messages that failed to be handled by the Store Liveness Support Manager + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storeliveness.message_handle.successes + exported_name: storeliveness_message_handle_successes + description: Number of incoming Store Liveness messages handled by the Store Liveness Support Manager + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storeliveness.support_for.stores + exported_name: storeliveness_support_for_stores + description: Number of stores that the Store Liveness Support Manager has ever provided support for + y_axis_label: Stores + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storeliveness.support_from.stores + exported_name: storeliveness_support_from_stores + description: Number of stores that the Store Liveness Support Manager is requesting support from by sending heartbeats + y_axis_label: Stores + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storeliveness.support_withdraw.failures + exported_name: storeliveness_support_withdraw_failures + description: Number of times the Store Liveness Support Manager has encountered an error while withdrawing support for another store + y_axis_label: Support Withdrawals + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storeliveness.support_withdraw.successes + exported_name: storeliveness_support_withdraw_successes + description: Number of times the Store Liveness Support Manager has successfully withdrawn support for another store + y_axis_label: Support Withdrawals + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storeliveness.transport.receive-queue-bytes + exported_name: storeliveness_transport_receive_queue_bytes + description: Total byte size of pending incoming messages from Store Liveness Transport + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storeliveness.transport.receive-queue-size + exported_name: storeliveness_transport_receive_queue_size + description: Number of pending incoming messages from the Store Liveness Transport + y_axis_label: Messages + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storeliveness.transport.receive_dropped + exported_name: storeliveness_transport_receive_dropped + description: Number of Store Liveness messages dropped by the Store Liveness Transport on the receiver side + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storeliveness.transport.received + exported_name: storeliveness_transport_received + description: Number of Store Liveness messages received by the Store Liveness Transport + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storeliveness.transport.send-queue-bytes + exported_name: storeliveness_transport_send_queue_bytes + description: Total byte size of pending outgoing messages in all Store Liveness Transport per-store send queues + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storeliveness.transport.send-queue-idle + exported_name: storeliveness_transport_send_queue_idle + description: Number of Store Liveness Transport per-store send queues that have become idle due to no recently-sent messages + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storeliveness.transport.send-queue-size + exported_name: storeliveness_transport_send_queue_size + description: Number of pending outgoing messages in all Store Liveness Transport per-store send queues + y_axis_label: Messages + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storeliveness.transport.send_dropped + exported_name: storeliveness_transport_send_dropped + description: Number of Store Liveness messages dropped by the Store Liveness Transport on the sender side + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storeliveness.transport.sent + exported_name: storeliveness_transport_sent + description: Number of Store Liveness messages sent by the Store Liveness Transport + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sysbytes + exported_name: sysbytes + description: Number of bytes in system KV pairs + y_axis_label: Storage + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: syscount + exported_name: syscount + description: Count of system KV pairs + y_axis_label: Keys + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: tenant.consumption.cross_region_network_ru + exported_name: tenant_consumption_cross_region_network_ru + description: Total number of RUs charged for cross-region network traffic + y_axis_label: Request Units + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.consumption.external_io_egress_bytes + exported_name: tenant_consumption_external_io_egress_bytes + description: Total number of bytes written to external services such as cloud storage providers + y_axis_label: Bytes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.consumption.external_io_ingress_bytes + exported_name: tenant_consumption_external_io_ingress_bytes + description: Total number of bytes read from external services such as cloud storage providers + y_axis_label: Bytes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.consumption.kv_request_units + exported_name: tenant_consumption_kv_request_units + description: RU consumption attributable to KV + y_axis_label: Request Units + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.consumption.pgwire_egress_bytes + exported_name: tenant_consumption_pgwire_egress_bytes + description: Total number of bytes transferred from a SQL pod to the client + y_axis_label: Bytes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.consumption.read_batches + exported_name: tenant_consumption_read_batches + description: Total number of KV read batches + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.consumption.read_bytes + exported_name: tenant_consumption_read_bytes + description: Total number of bytes read from KV + y_axis_label: Bytes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.consumption.read_requests + exported_name: tenant_consumption_read_requests + description: Total number of KV read requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.consumption.request_units + exported_name: tenant_consumption_request_units + description: Total RU consumption + y_axis_label: Request Units + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.consumption.sql_pods_cpu_seconds + exported_name: tenant_consumption_sql_pods_cpu_seconds + description: Total amount of CPU used by SQL pods + y_axis_label: CPU Seconds + type: COUNTER + unit: SECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.consumption.write_batches + exported_name: tenant_consumption_write_batches + description: Total number of KV write batches + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.consumption.write_bytes + exported_name: tenant_consumption_write_bytes + description: Total number of bytes written to KV + y_axis_label: Bytes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.consumption.write_requests + exported_name: tenant_consumption_write_requests + description: Total number of KV write requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: timeseries.write.bytes + exported_name: timeseries_write_bytes + description: Total size in bytes of metric samples written to disk + y_axis_label: Storage + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: timeseries.write.errors + exported_name: timeseries_write_errors + description: Total errors encountered while attempting to write metrics to disk + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: timeseries.write.samples + exported_name: timeseries_write_samples + description: Total number of metric samples written to disk + y_axis_label: Metric Samples + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: totalbytes + exported_name: totalbytes + description: Total number of bytes taken up by keys and values including non-live data + y_axis_label: Storage + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: tscache.skl.pages + exported_name: tscache_skl_pages + description: Number of pages in the timestamp cache + y_axis_label: Pages + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: tscache.skl.rotations + exported_name: tscache_skl_rotations + description: Number of page rotations in the timestamp cache + y_axis_label: Page Rotations + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.commit_waits.before_commit_trigger + exported_name: txn_commit_waits_before_commit_trigger + description: Number of KV transactions that had to commit-wait on the server before committing because they had a commit trigger + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.server_side.1PC.failure + exported_name: txn_server_side_1PC_failure + description: Number of batches that attempted to commit using 1PC and failed + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.server_side.1PC.success + exported_name: txn_server_side_1PC_success + description: Number of batches that attempted to commit using 1PC and succeeded + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.server_side_retry.read_evaluation.failure + exported_name: txn_server_side_retry_read_evaluation_failure + description: Number of read batches that were not successfully refreshed server side + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.server_side_retry.read_evaluation.success + exported_name: txn_server_side_retry_read_evaluation_success + description: Number of read batches that were successfully refreshed server side + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.server_side_retry.uncertainty_interval_error.failure + exported_name: txn_server_side_retry_uncertainty_interval_error_failure + description: Number of batches that ran into uncertainty interval errors that were not successfully refreshed server side + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.server_side_retry.uncertainty_interval_error.success + exported_name: txn_server_side_retry_uncertainty_interval_error_success + description: Number of batches that ran into uncertainty interval errors that were successfully refreshed server side + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.server_side_retry.write_evaluation.failure + exported_name: txn_server_side_retry_write_evaluation_failure + description: Number of write batches that were not successfully refreshed server side + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.server_side_retry.write_evaluation.success + exported_name: txn_server_side_retry_write_evaluation_success + description: Number of write batches that were successfully refreshed server side + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txnrecovery.attempts.pending + exported_name: txnrecovery_attempts_pending + description: Number of transaction recovery attempts currently in-flight + y_axis_label: Recovery Attempts + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: txnrecovery.attempts.total + exported_name: txnrecovery_attempts_total + description: Number of transaction recovery attempts executed + y_axis_label: Recovery Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txnrecovery.failures + exported_name: txnrecovery_failures + description: Number of transaction recovery attempts that failed + y_axis_label: Recovery Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txnrecovery.successes.aborted + exported_name: txnrecovery_successes_aborted + description: Number of transaction recovery attempts that aborted a transaction + y_axis_label: Recovery Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txnrecovery.successes.committed + exported_name: txnrecovery_successes_committed + description: Number of transaction recovery attempts that committed a transaction + y_axis_label: Recovery Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txnrecovery.successes.pending + exported_name: txnrecovery_successes_pending + description: Number of transaction recovery attempts that left a transaction pending + y_axis_label: Recovery Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txnwaitqueue.pushee.waiting + exported_name: txnwaitqueue_pushee_waiting + description: Number of pushees on the txn wait queue + y_axis_label: Waiting Pushees + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: txnwaitqueue.pusher.slow + exported_name: txnwaitqueue_pusher_slow + description: The total number of cases where a pusher waited more than the excessive wait threshold + y_axis_label: Slow Pushers + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: txnwaitqueue.pusher.wait_time + exported_name: txnwaitqueue_pusher_wait_time + description: Histogram of durations spent in queue by pushers + y_axis_label: Pusher wait time + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: txnwaitqueue.pusher.waiting + exported_name: txnwaitqueue_pusher_waiting + description: Number of pushers on the txn wait queue + y_axis_label: Waiting Pushers + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: txnwaitqueue.query.wait_time + exported_name: txnwaitqueue_query_wait_time + description: Histogram of durations spent in queue by queries + y_axis_label: Query wait time + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: txnwaitqueue.query.waiting + exported_name: txnwaitqueue_query_waiting + description: Number of transaction status queries waiting for an updated transaction record + y_axis_label: Waiting Queries + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: valbytes + exported_name: valbytes + description: Number of bytes taken up by values + y_axis_label: Storage + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: valcount + exported_name: valcount + description: Count of all values + y_axis_label: MVCC Values + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE diff --git a/src/current/_includes/cockroachcloud/metrics-tab.md b/src/current/_includes/cockroachcloud/metrics-tab.md index 6e6e584284e..5ce3df2d25c 100644 --- a/src/current/_includes/cockroachcloud/metrics-tab.md +++ b/src/current/_includes/cockroachcloud/metrics-tab.md @@ -1,11 +1,11 @@ {% assign version = site.current_cloud_version | replace: ".", "" %} -{% assign graphs = site.data[version].metrics.metrics | where_exp: "graphs", "graphs.metric_ui_tab contains tab" | map: "metric_ui_graph" | uniq %} +{% assign graphs = site.data[version].metrics.metrics-cloud | where_exp: "graphs", "graphs.metric_ui_tab contains tab" | map: "metric_ui_graph" | uniq %} {% for g in graphs %} {% comment %} Iterate through the graphs. {% endcomment %} ## {{ g }} -{% assign metrics = site.data[version].metrics.metrics | where: "metric_ui_graph", g %} +{% assign metrics = site.data[version].metrics.metrics-cloud | where: "metric_ui_graph", g %} {% comment %} Fetch all metrics for given metric_ui_tab. {% endcomment %} diff --git a/src/current/cockroachcloud/metrics-essential.md b/src/current/cockroachcloud/metrics-essential.md index d9077a15bcf..40e14b740a4 100644 --- a/src/current/cockroachcloud/metrics-essential.md +++ b/src/current/cockroachcloud/metrics-essential.md @@ -7,14 +7,14 @@ toc: true These essential CockroachDB metrics let you monitor your CockroachDB {{ site.data.products.standard }} cluster. The metrics are available on graphs on the tabs listed in the **Metrics tabs** column. Where **Custom** is listed, the metric may be graphed in a [**Custom Metrics Chart**]({% link cockroachcloud/custom-metrics-chart-page.md %}). The **Usage** column explains why each metric is important to visualize and how to make both practical and actionable use of the metric in a production deployment. {% assign version = site.current_cloud_version | replace: ".", "" %} -{% assign types = site.data[version].metrics.metrics | map: "metric_type" | uniq %} +{% assign types = site.data[version].metrics.metrics-cloud | map: "metric_type" | uniq %} {% comment %} Fetch the list of all metric types {% endcomment %} {% for t in types %} {% comment %} Iterate through the types. {% endcomment %} {% unless t contains "Request Units" %} {% comment %} Request Units is only for Basic tier. {% endcomment %} ## {{ t }} - {% assign metrics = site.data[version].metrics.metrics | where: "metric_type", t | sort: "metric_id" | where_exp: "metrics", "metrics.deploy_standard == true"%} + {% assign metrics = site.data[version].metrics.metrics-cloud | where: "metric_type", t | sort: "metric_id" | where_exp: "metrics", "metrics.deploy_standard == true"%} {% comment %} Fetch all metrics for that metric_type. {% endcomment %}
diff --git a/src/current/v25.3/cluster-virtualization-metric-scopes.md b/src/current/v25.3/cluster-virtualization-metric-scopes.md index c7493b90e7c..a01c187feca 100644 --- a/src/current/v25.3/cluster-virtualization-metric-scopes.md +++ b/src/current/v25.3/cluster-virtualization-metric-scopes.md @@ -28,1828 +28,68 @@ STORAGE: Scoped to the system virtual cluster SERVER: n/a {% endcomment %} +{% assign version = site.current_cloud_version | replace: ".", "" %} +{% comment %}version: {{ version }}{% endcomment %} + ## Metrics scoped to a virtual cluster {% comment %}LAYER=APPLICATION{% endcomment %} -- `auth.cert.conn.latency` -- `auth.gss.conn.latency` -- `auth.jwt.conn.latency` -- `auth.ldap.conn.latency` -- `auth.password.conn.latency` -- `auth.scram.conn.latency` -- `backup.last-failed-time.kms-inaccessible` -- `changefeed.admit_latency` -- `changefeed.aggregator_progress` -- `changefeed.backfill_count` -- `changefeed.backfill_pending_ranges` -- `changefeed.batch_reduction_count` -- `changefeed.buffer_entries_mem.acquired` -- `changefeed.buffer_entries_mem.acquired.aggregator` -- `changefeed.buffer_entries_mem.acquired.rangefeed` -- `changefeed.buffer_entries_mem.released` -- `changefeed.buffer_entries_mem.released.aggregator` -- `changefeed.buffer_entries_mem.released.rangefeed` -- `changefeed.buffer_entries.allocated_mem` -- `changefeed.buffer_entries.allocated_mem.aggregator` -- `changefeed.buffer_entries.allocated_mem.rangefeed` -- `changefeed.buffer_entries.flush` -- `changefeed.buffer_entries.flush.aggregator` -- `changefeed.buffer_entries.flush.rangefeed` -- `changefeed.buffer_entries.in` -- `changefeed.buffer_entries.in.aggregator` -- `changefeed.buffer_entries.in.rangefeed` -- `changefeed.buffer_entries.kv` -- `changefeed.buffer_entries.kv.aggregator` -- `changefeed.buffer_entries.kv.rangefeed` -- `changefeed.buffer_entries.out` -- `changefeed.buffer_entries.out.aggregator` -- `changefeed.buffer_entries.out.rangefeed` -- `changefeed.buffer_entries.released` -- `changefeed.buffer_entries.released.aggregator` -- `changefeed.buffer_entries.released.rangefeed` -- `changefeed.buffer_entries.resolved` -- `changefeed.buffer_entries.resolved.aggregator` -- `changefeed.buffer_entries.resolved.rangefeed` -- `changefeed.buffer_pushback_nanos` -- `changefeed.buffer_pushback_nanos.aggregator` -- `changefeed.buffer_pushback_nanos.rangefeed` -- `changefeed.bytes.messages_pushback_nanos` -- `changefeed.checkpoint_hist_nanos` -- `changefeed.checkpoint_progress` -- `changefeed.cloudstorage_buffered_bytes` -- `changefeed.commit_latency` -- `changefeed.emitted_batch_sizes` -- `changefeed.emitted_bytes` -- `changefeed.emitted_messages` -- `changefeed.error_retries` -- `changefeed.failures` -- `changefeed.filtered_messages` -- `changefeed.flush_hist_nanos` -- `changefeed.flush.messages_pushback_nanos` -- `changefeed.flushed_bytes` -- `changefeed.flushes` -- `changefeed.forwarded_resolved_messages` -- `changefeed.frontier_updates` -- `changefeed.internal_retry_message_count` -- `changefeed.kafka_throttling_hist_nanos` -- `changefeed.lagging_ranges` -- `changefeed.max_behind_nanos` -- `changefeed.message_size_hist` -- `changefeed.messages.messages_pushback_nanos` -- `changefeed.network.bytes_in` -- `changefeed.network.bytes_out` -- `changefeed.nprocs_consume_event_nanos` -- `changefeed.nprocs_flush_nanos` -- `changefeed.nprocs_in_flight_count` -- `changefeed.parallel_io_in_flight_keys` -- `changefeed.parallel_io_pending_rows` -- `changefeed.parallel_io_queue_nanos` -- `changefeed.parallel_io_result_queue_nanos` -- `changefeed.queue_time_nanos` -- `changefeed.running` -- `changefeed.schema_registry.registrations` -- `changefeed.schema_registry.retry_count` -- `changefeed.schemafeed.table_history_scans` -- `changefeed.schemafeed.table_metadata_nanos` -- `changefeed.sink_batch_hist_nanos` -- `changefeed.sink_errors` -- `changefeed.sink_io_inflight` -- `changefeed.size_based_flushes` -- `changefeed.total_ranges` -- `changefeed.usage.error_count` -- `changefeed.usage.query_duration` -- `changefeed.usage.table_bytes` -- `clock-offset.meannanos` -- `clock-offset.stddevnanos` -- `cloud.conns_opened` -- `cloud.conns_reused` -- `cloud.listing_results` -- `cloud.listings` -- `cloud.open_readers` -- `cloud.open_writers` -- `cloud.read_bytes` -- `cloud.readers_opened` -- `cloud.tls_handshakes` -- `cloud.write_bytes` -- `cloud.writers_opened` -- `cluster.preserve-downgrade-option.last-updated` -- `distsender.batch_requests.cross_region.bytes` -- `distsender.batch_requests.cross_zone.bytes` -- `distsender.batch_requests.replica_addressed.bytes` -- `distsender.batch_responses.cross_region.bytes` -- `distsender.batch_responses.cross_zone.bytes` -- `distsender.batch_responses.replica_addressed.bytes` -- `distsender.batches` -- `distsender.batches.async.sent` -- `distsender.batches.async.throttled` -- `distsender.batches.partial` -- `distsender.circuit_breaker.replicas.count` -- `distsender.circuit_breaker.replicas.probes.failure` -- `distsender.circuit_breaker.replicas.probes.running` -- `distsender.circuit_breaker.replicas.probes.success` -- `distsender.circuit_breaker.replicas.requests.cancelled` -- `distsender.circuit_breaker.replicas.requests.rejected` -- `distsender.circuit_breaker.replicas.tripped` -- `distsender.circuit_breaker.replicas.tripped_events` -- `distsender.errors.inleasetransferbackoffs` -- `distsender.errors.notleaseholder` -- `distsender.rangefeed.catchup_ranges` -- `distsender.rangefeed.error_catchup_ranges` -- `distsender.rangefeed.local_ranges` -- `distsender.rangefeed.restart_ranges` -- `distsender.rangefeed.retry.logical_ops_missing` -- `distsender.rangefeed.retry.manual_range_split` -- `distsender.rangefeed.retry.no_leaseholder` -- `distsender.rangefeed.retry.node_not_found` -- `distsender.rangefeed.retry.raft_snapshot` -- `distsender.rangefeed.retry.range_key_mismatch` -- `distsender.rangefeed.retry.range_merged` -- `distsender.rangefeed.retry.range_not_found` -- `distsender.rangefeed.retry.range_split` -- `distsender.rangefeed.retry.rangefeed_closed` -- `distsender.rangefeed.retry.replica_removed` -- `distsender.rangefeed.retry.send` -- `distsender.rangefeed.retry.slow_consumer` -- `distsender.rangefeed.retry.store_not_found` -- `distsender.rangefeed.total_ranges` -- `distsender.rangelookups` -- `distsender.rpc.addsstable.sent` -- `distsender.rpc.adminchangereplicas.sent` -- `distsender.rpc.adminmerge.sent` -- `distsender.rpc.adminrelocaterange.sent` -- `distsender.rpc.adminscatter.sent` -- `distsender.rpc.adminsplit.sent` -- `distsender.rpc.admintransferlease.sent` -- `distsender.rpc.adminunsplit.sent` -- `distsender.rpc.adminverifyprotectedtimestamp.sent` -- `distsender.rpc.barrier.sent` -- `distsender.rpc.checkconsistency.sent` -- `distsender.rpc.clearrange.sent` -- `distsender.rpc.computechecksum.sent` -- `distsender.rpc.conditionalput.sent` -- `distsender.rpc.delete.sent` -- `distsender.rpc.deleterange.sent` -- `distsender.rpc.endtxn.sent` -- `distsender.rpc.err.ambiguousresulterrtype` -- `distsender.rpc.err.batchtimestampbeforegcerrtype` -- `distsender.rpc.err.communicationerrtype` -- `distsender.rpc.err.conditionfailederrtype` -- `distsender.rpc.err.errordetailtype(0)` -- `distsender.rpc.err.errordetailtype(15)` -- `distsender.rpc.err.errordetailtype(19)` -- `distsender.rpc.err.errordetailtype(20)` -- `distsender.rpc.err.errordetailtype(21)` -- `distsender.rpc.err.errordetailtype(23)` -- `distsender.rpc.err.errordetailtype(24)` -- `distsender.rpc.err.errordetailtype(29)` -- `distsender.rpc.err.errordetailtype(30)` -- `distsender.rpc.err.errordetailtype(33)` -- `distsender.rpc.err.indeterminatecommiterrtype` -- `distsender.rpc.err.integeroverflowerrtype` -- `distsender.rpc.err.intentmissingerrtype` -- `distsender.rpc.err.internalerrtype` -- `distsender.rpc.err.invalidleaseerrtype` -- `distsender.rpc.err.leaserejectederrtype` -- `distsender.rpc.err.lockconflicterrtype` -- `distsender.rpc.err.mergeinprogresserrtype` -- `distsender.rpc.err.mintimestampboundunsatisfiableerrtype` -- `distsender.rpc.err.mvcchistorymutationerrtype` -- `distsender.rpc.err.nodeunavailableerrtype` -- `distsender.rpc.err.notleaseholdererrtype` -- `distsender.rpc.err.oprequirestxnerrtype` -- `distsender.rpc.err.optimisticevalconflictserrtype` -- `distsender.rpc.err.proxyfailederrtype` -- `distsender.rpc.err.raftgroupdeletederrtype` -- `distsender.rpc.err.rangefeedretryerrtype` -- `distsender.rpc.err.rangekeymismatcherrtype` -- `distsender.rpc.err.rangenotfounderrtype` -- `distsender.rpc.err.readwithinuncertaintyintervalerrtype` -- `distsender.rpc.err.refreshfailederrtype` -- `distsender.rpc.err.replicacorruptionerrtype` -- `distsender.rpc.err.replicatooolderrtype` -- `distsender.rpc.err.replicaunavailableerrtype` -- `distsender.rpc.err.storenotfounderrtype` -- `distsender.rpc.err.transactionabortederrtype` -- `distsender.rpc.err.transactionpusherrtype` -- `distsender.rpc.err.transactionretryerrtype` -- `distsender.rpc.err.transactionretrywithprotorefresherrtype` -- `distsender.rpc.err.transactionstatuserrtype` -- `distsender.rpc.err.txnalreadyencounterederrtype` -- `distsender.rpc.err.unsupportedrequesterrtype` -- `distsender.rpc.err.writeintenterrtype` -- `distsender.rpc.err.writetooolderrtype` -- `distsender.rpc.export.sent` -- `distsender.rpc.gc.sent` -- `distsender.rpc.get.sent` -- `distsender.rpc.heartbeattxn.sent` -- `distsender.rpc.increment.sent` -- `distsender.rpc.initput.sent` -- `distsender.rpc.isspanempty.sent` -- `distsender.rpc.leaseinfo.sent` -- `distsender.rpc.linkexternalsstable.sent` -- `distsender.rpc.merge.sent` -- `distsender.rpc.migrate.sent` -- `distsender.rpc.probe.sent` -- `distsender.rpc.proxy.err` -- `distsender.rpc.proxy.forward.err` -- `distsender.rpc.proxy.forward.sent` -- `distsender.rpc.proxy.sent` -- `distsender.rpc.pushtxn.sent` -- `distsender.rpc.put.sent` -- `distsender.rpc.queryintent.sent` -- `distsender.rpc.querylocks.sent` -- `distsender.rpc.queryresolvedtimestamp.sent` -- `distsender.rpc.querytxn.sent` -- `distsender.rpc.rangestats.sent` -- `distsender.rpc.recomputestats.sent` -- `distsender.rpc.recovertxn.sent` -- `distsender.rpc.refresh.sent` -- `distsender.rpc.refreshrange.sent` -- `distsender.rpc.requestlease.sent` -- `distsender.rpc.resolveintent.sent` -- `distsender.rpc.resolveintentrange.sent` -- `distsender.rpc.reversescan.sent` -- `distsender.rpc.revertrange.sent` -- `distsender.rpc.scan.sent` -- `distsender.rpc.sent` -- `distsender.rpc.sent.local` -- `distsender.rpc.sent.nextreplicaerror` -- `distsender.rpc.subsume.sent` -- `distsender.rpc.transferlease.sent` -- `distsender.rpc.truncatelog.sent` -- `distsender.rpc.writebatch.sent` -- `distsender.slow.replicarpcs` -- `jobs.adopt_iterations` -- `jobs.auto_config_env_runner.currently_idle` -- `jobs.auto_config_env_runner.currently_paused` -- `jobs.auto_config_env_runner.currently_running` -- `jobs.auto_config_env_runner.expired_pts_records` -- `jobs.auto_config_env_runner.fail_or_cancel_completed` -- `jobs.auto_config_env_runner.fail_or_cancel_failed` -- `jobs.auto_config_env_runner.fail_or_cancel_retry_error` -- `jobs.auto_config_env_runner.protected_age_sec` -- `jobs.auto_config_env_runner.protected_record_count` -- `jobs.auto_config_env_runner.resume_completed` -- `jobs.auto_config_env_runner.resume_failed` -- `jobs.auto_config_env_runner.resume_retry_error` -- `jobs.auto_config_runner.currently_idle` -- `jobs.auto_config_runner.currently_paused` -- `jobs.auto_config_runner.currently_running` -- `jobs.auto_config_runner.expired_pts_records` -- `jobs.auto_config_runner.fail_or_cancel_completed` -- `jobs.auto_config_runner.fail_or_cancel_failed` -- `jobs.auto_config_runner.fail_or_cancel_retry_error` -- `jobs.auto_config_runner.protected_age_sec` -- `jobs.auto_config_runner.protected_record_count` -- `jobs.auto_config_runner.resume_completed` -- `jobs.auto_config_runner.resume_failed` -- `jobs.auto_config_runner.resume_retry_error` -- `jobs.auto_config_task.currently_idle` -- `jobs.auto_config_task.currently_paused` -- `jobs.auto_config_task.currently_running` -- `jobs.auto_config_task.expired_pts_records` -- `jobs.auto_config_task.fail_or_cancel_completed` -- `jobs.auto_config_task.fail_or_cancel_failed` -- `jobs.auto_config_task.fail_or_cancel_retry_error` -- `jobs.auto_config_task.protected_age_sec` -- `jobs.auto_config_task.protected_record_count` -- `jobs.auto_config_task.resume_completed` -- `jobs.auto_config_task.resume_failed` -- `jobs.auto_config_task.resume_retry_error` -- `jobs.auto_create_partial_stats.currently_idle` -- `jobs.auto_create_partial_stats.currently_paused` -- `jobs.auto_create_partial_stats.currently_running` -- `jobs.auto_create_partial_stats.expired_pts_records` -- `jobs.auto_create_partial_stats.fail_or_cancel_completed` -- `jobs.auto_create_partial_stats.fail_or_cancel_failed` -- `jobs.auto_create_partial_stats.fail_or_cancel_retry_error` -- `jobs.auto_create_partial_stats.protected_age_sec` -- `jobs.auto_create_partial_stats.protected_record_count` -- `jobs.auto_create_partial_stats.resume_completed` -- `jobs.auto_create_partial_stats.resume_failed` -- `jobs.auto_create_partial_stats.resume_retry_error` -- `jobs.auto_create_stats.currently_idle` -- `jobs.auto_create_stats.currently_paused` -- `jobs.auto_create_stats.currently_running` -- `jobs.auto_create_stats.expired_pts_records` -- `jobs.auto_create_stats.fail_or_cancel_completed` -- `jobs.auto_create_stats.fail_or_cancel_failed` -- `jobs.auto_create_stats.fail_or_cancel_retry_error` -- `jobs.auto_create_stats.protected_age_sec` -- `jobs.auto_create_stats.protected_record_count` -- `jobs.auto_create_stats.resume_completed` -- `jobs.auto_create_stats.resume_failed` -- `jobs.auto_create_stats.resume_retry_error` -- `jobs.auto_schema_telemetry.currently_idle` -- `jobs.auto_schema_telemetry.currently_paused` -- `jobs.auto_schema_telemetry.currently_running` -- `jobs.auto_schema_telemetry.expired_pts_records` -- `jobs.auto_schema_telemetry.fail_or_cancel_completed` -- `jobs.auto_schema_telemetry.fail_or_cancel_failed` -- `jobs.auto_schema_telemetry.fail_or_cancel_retry_error` -- `jobs.auto_schema_telemetry.protected_age_sec` -- `jobs.auto_schema_telemetry.protected_record_count` -- `jobs.auto_schema_telemetry.resume_completed` -- `jobs.auto_schema_telemetry.resume_failed` -- `jobs.auto_schema_telemetry.resume_retry_error` -- `jobs.auto_span_config_reconciliation.currently_idle` -- `jobs.auto_span_config_reconciliation.currently_paused` -- `jobs.auto_span_config_reconciliation.currently_running` -- `jobs.auto_span_config_reconciliation.expired_pts_records` -- `jobs.auto_span_config_reconciliation.fail_or_cancel_completed` -- `jobs.auto_span_config_reconciliation.fail_or_cancel_failed` -- `jobs.auto_span_config_reconciliation.fail_or_cancel_retry_error` -- `jobs.auto_span_config_reconciliation.protected_age_sec` -- `jobs.auto_span_config_reconciliation.protected_record_count` -- `jobs.auto_span_config_reconciliation.resume_completed` -- `jobs.auto_span_config_reconciliation.resume_failed` -- `jobs.auto_span_config_reconciliation.resume_retry_error` -- `jobs.auto_sql_stats_compaction.currently_idle` -- `jobs.auto_sql_stats_compaction.currently_paused` -- `jobs.auto_sql_stats_compaction.currently_running` -- `jobs.auto_sql_stats_compaction.expired_pts_records` -- `jobs.auto_sql_stats_compaction.fail_or_cancel_completed` -- `jobs.auto_sql_stats_compaction.fail_or_cancel_failed` -- `jobs.auto_sql_stats_compaction.fail_or_cancel_retry_error` -- `jobs.auto_sql_stats_compaction.protected_age_sec` -- `jobs.auto_sql_stats_compaction.protected_record_count` -- `jobs.auto_sql_stats_compaction.resume_completed` -- `jobs.auto_sql_stats_compaction.resume_failed` -- `jobs.auto_sql_stats_compaction.resume_retry_error` -- `jobs.auto_update_sql_activity.currently_idle` -- `jobs.auto_update_sql_activity.currently_paused` -- `jobs.auto_update_sql_activity.currently_running` -- `jobs.auto_update_sql_activity.expired_pts_records` -- `jobs.auto_update_sql_activity.fail_or_cancel_completed` -- `jobs.auto_update_sql_activity.fail_or_cancel_failed` -- `jobs.auto_update_sql_activity.fail_or_cancel_retry_error` -- `jobs.auto_update_sql_activity.protected_age_sec` -- `jobs.auto_update_sql_activity.protected_record_count` -- `jobs.auto_update_sql_activity.resume_completed` -- `jobs.auto_update_sql_activity.resume_failed` -- `jobs.auto_update_sql_activity.resume_retry_error` -- `jobs.backup.currently_idle` -- `jobs.backup.currently_paused` -- `jobs.backup.currently_running` -- `jobs.backup.expired_pts_records` -- `jobs.backup.fail_or_cancel_completed` -- `jobs.backup.fail_or_cancel_failed` -- `jobs.backup.fail_or_cancel_retry_error` -- `jobs.backup.protected_age_sec` -- `jobs.backup.protected_record_count` -- `jobs.backup.resume_completed` -- `jobs.backup.resume_failed` -- `jobs.backup.resume_retry_error` -- `jobs.changefeed.currently_idle` -- `jobs.changefeed.currently_paused` -- `jobs.changefeed.currently_running` -- `jobs.changefeed.expired_pts_records` -- `jobs.changefeed.fail_or_cancel_completed` -- `jobs.changefeed.fail_or_cancel_failed` -- `jobs.changefeed.fail_or_cancel_retry_error` -- `jobs.changefeed.protected_age_sec` -- `jobs.changefeed.protected_record_count` -- `jobs.changefeed.resume_completed` -- `jobs.changefeed.resume_failed` -- `jobs.changefeed.resume_retry_error` -- `jobs.claimed_jobs` -- `jobs.create_stats.currently_idle` -- `jobs.create_stats.currently_paused` -- `jobs.create_stats.currently_running` -- `jobs.create_stats.expired_pts_records` -- `jobs.create_stats.fail_or_cancel_completed` -- `jobs.create_stats.fail_or_cancel_failed` -- `jobs.create_stats.fail_or_cancel_retry_error` -- `jobs.create_stats.protected_age_sec` -- `jobs.create_stats.protected_record_count` -- `jobs.create_stats.resume_completed` -- `jobs.create_stats.resume_failed` -- `jobs.create_stats.resume_retry_error` -- `jobs.history_retention.currently_idle` -- `jobs.history_retention.currently_paused` -- `jobs.history_retention.currently_running` -- `jobs.history_retention.expired_pts_records` -- `jobs.history_retention.fail_or_cancel_completed` -- `jobs.history_retention.fail_or_cancel_failed` -- `jobs.history_retention.fail_or_cancel_retry_error` -- `jobs.history_retention.protected_age_sec` -- `jobs.history_retention.protected_record_count` -- `jobs.history_retention.resume_completed` -- `jobs.history_retention.resume_failed` -- `jobs.history_retention.resume_retry_error` -- `jobs.import_rollback.currently_idle` -- `jobs.import_rollback.currently_paused` -- `jobs.import_rollback.currently_running` -- `jobs.import_rollback.expired_pts_records` -- `jobs.import_rollback.fail_or_cancel_completed` -- `jobs.import_rollback.fail_or_cancel_failed` -- `jobs.import_rollback.fail_or_cancel_retry_error` -- `jobs.import_rollback.protected_age_sec` -- `jobs.import_rollback.protected_record_count` -- `jobs.import_rollback.resume_completed` -- `jobs.import_rollback.resume_failed` -- `jobs.import_rollback.resume_retry_error` -- `jobs.import.currently_idle` -- `jobs.import.currently_paused` -- `jobs.import.currently_running` -- `jobs.import.expired_pts_records` -- `jobs.import.fail_or_cancel_completed` -- `jobs.import.fail_or_cancel_failed` -- `jobs.import.fail_or_cancel_retry_error` -- `jobs.import.protected_age_sec` -- `jobs.import.protected_record_count` -- `jobs.import.resume_completed` -- `jobs.import.resume_failed` -- `jobs.import.resume_retry_error` -- `jobs.key_visualizer.currently_idle` -- `jobs.key_visualizer.currently_paused` -- `jobs.key_visualizer.currently_running` -- `jobs.key_visualizer.expired_pts_records` -- `jobs.key_visualizer.fail_or_cancel_completed` -- `jobs.key_visualizer.fail_or_cancel_failed` -- `jobs.key_visualizer.fail_or_cancel_retry_error` -- `jobs.key_visualizer.protected_age_sec` -- `jobs.key_visualizer.protected_record_count` -- `jobs.key_visualizer.resume_completed` -- `jobs.key_visualizer.resume_failed` -- `jobs.key_visualizer.resume_retry_error` -- `jobs.logical_replication.currently_idle` -- `jobs.logical_replication.currently_paused` -- `jobs.logical_replication.currently_running` -- `jobs.logical_replication.expired_pts_records` -- `jobs.logical_replication.fail_or_cancel_completed` -- `jobs.logical_replication.fail_or_cancel_failed` -- `jobs.logical_replication.fail_or_cancel_retry_error` -- `jobs.logical_replication.protected_age_sec` -- `jobs.logical_replication.protected_record_count` -- `jobs.logical_replication.resume_completed` -- `jobs.logical_replication.resume_failed` -- `jobs.logical_replication.resume_retry_error` -- `jobs.metrics.task_failed` -- `jobs.migration.currently_idle` -- `jobs.migration.currently_paused` -- `jobs.migration.currently_running` -- `jobs.migration.expired_pts_records` -- `jobs.migration.fail_or_cancel_completed` -- `jobs.migration.fail_or_cancel_failed` -- `jobs.migration.fail_or_cancel_retry_error` -- `jobs.migration.protected_age_sec` -- `jobs.migration.protected_record_count` -- `jobs.migration.resume_completed` -- `jobs.migration.resume_failed` -- `jobs.migration.resume_retry_error` -- `jobs.mvcc_statistics_update.currently_idle` -- `jobs.mvcc_statistics_update.currently_paused` -- `jobs.mvcc_statistics_update.currently_running` -- `jobs.mvcc_statistics_update.expired_pts_records` -- `jobs.mvcc_statistics_update.fail_or_cancel_completed` -- `jobs.mvcc_statistics_update.fail_or_cancel_failed` -- `jobs.mvcc_statistics_update.fail_or_cancel_retry_error` -- `jobs.mvcc_statistics_update.protected_age_sec` -- `jobs.mvcc_statistics_update.protected_record_count` -- `jobs.mvcc_statistics_update.resume_completed` -- `jobs.mvcc_statistics_update.resume_failed` -- `jobs.mvcc_statistics_update.resume_retry_error` -- `jobs.new_schema_change.currently_idle` -- `jobs.new_schema_change.currently_paused` -- `jobs.new_schema_change.currently_running` -- `jobs.new_schema_change.expired_pts_records` -- `jobs.new_schema_change.fail_or_cancel_completed` -- `jobs.new_schema_change.fail_or_cancel_failed` -- `jobs.new_schema_change.fail_or_cancel_retry_error` -- `jobs.new_schema_change.protected_age_sec` -- `jobs.new_schema_change.protected_record_count` -- `jobs.new_schema_change.resume_completed` -- `jobs.new_schema_change.resume_failed` -- `jobs.new_schema_change.resume_retry_error` -- `jobs.poll_jobs_stats.currently_idle` -- `jobs.poll_jobs_stats.currently_paused` -- `jobs.poll_jobs_stats.currently_running` -- `jobs.poll_jobs_stats.expired_pts_records` -- `jobs.poll_jobs_stats.fail_or_cancel_completed` -- `jobs.poll_jobs_stats.fail_or_cancel_failed` -- `jobs.poll_jobs_stats.fail_or_cancel_retry_error` -- `jobs.poll_jobs_stats.protected_age_sec` -- `jobs.poll_jobs_stats.protected_record_count` -- `jobs.poll_jobs_stats.resume_completed` -- `jobs.poll_jobs_stats.resume_failed` -- `jobs.poll_jobs_stats.resume_retry_error` -- `jobs.replication_stream_ingestion.currently_idle` -- `jobs.replication_stream_ingestion.currently_paused` -- `jobs.replication_stream_ingestion.currently_running` -- `jobs.replication_stream_ingestion.expired_pts_records` -- `jobs.replication_stream_ingestion.fail_or_cancel_completed` -- `jobs.replication_stream_ingestion.fail_or_cancel_failed` -- `jobs.replication_stream_ingestion.fail_or_cancel_retry_error` -- `jobs.replication_stream_ingestion.protected_age_sec` -- `jobs.replication_stream_ingestion.protected_record_count` -- `jobs.replication_stream_ingestion.resume_completed` -- `jobs.replication_stream_ingestion.resume_failed` -- `jobs.replication_stream_ingestion.resume_retry_error` -- `jobs.replication_stream_producer.currently_idle` -- `jobs.replication_stream_producer.currently_paused` -- `jobs.replication_stream_producer.currently_running` -- `jobs.replication_stream_producer.expired_pts_records` -- `jobs.replication_stream_producer.fail_or_cancel_completed` -- `jobs.replication_stream_producer.fail_or_cancel_failed` -- `jobs.replication_stream_producer.fail_or_cancel_retry_error` -- `jobs.replication_stream_producer.protected_age_sec` -- `jobs.replication_stream_producer.protected_record_count` -- `jobs.replication_stream_producer.resume_completed` -- `jobs.replication_stream_producer.resume_failed` -- `jobs.replication_stream_producer.resume_retry_error` -- `jobs.restore.currently_idle` -- `jobs.restore.currently_paused` -- `jobs.restore.currently_running` -- `jobs.restore.expired_pts_records` -- `jobs.restore.fail_or_cancel_completed` -- `jobs.restore.fail_or_cancel_failed` -- `jobs.restore.fail_or_cancel_retry_error` -- `jobs.restore.protected_age_sec` -- `jobs.restore.protected_record_count` -- `jobs.restore.resume_completed` -- `jobs.restore.resume_failed` -- `jobs.restore.resume_retry_error` -- `jobs.resumed_claimed_jobs` -- `jobs.row_level_ttl.currently_idle` -- `jobs.row_level_ttl.currently_paused` -- `jobs.row_level_ttl.currently_running` -- `jobs.row_level_ttl.delete_duration` -- `jobs.row_level_ttl.expired_pts_records` -- `jobs.row_level_ttl.fail_or_cancel_completed` -- `jobs.row_level_ttl.fail_or_cancel_failed` -- `jobs.row_level_ttl.fail_or_cancel_retry_error` -- `jobs.row_level_ttl.num_active_spans` -- `jobs.row_level_ttl.protected_age_sec` -- `jobs.row_level_ttl.protected_record_count` -- `jobs.row_level_ttl.resume_completed` -- `jobs.row_level_ttl.resume_failed` -- `jobs.row_level_ttl.resume_retry_error` -- `jobs.row_level_ttl.rows_deleted` -- `jobs.row_level_ttl.rows_selected` -- `jobs.row_level_ttl.select_duration` -- `jobs.row_level_ttl.span_total_duration` -- `jobs.row_level_ttl.total_expired_rows` -- `jobs.row_level_ttl.total_rows` -- `jobs.running_non_idle` -- `jobs.schema_change_gc.currently_idle` -- `jobs.schema_change_gc.currently_paused` -- `jobs.schema_change_gc.currently_running` -- `jobs.schema_change_gc.expired_pts_records` -- `jobs.schema_change_gc.fail_or_cancel_completed` -- `jobs.schema_change_gc.fail_or_cancel_failed` -- `jobs.schema_change_gc.fail_or_cancel_retry_error` -- `jobs.schema_change_gc.protected_age_sec` -- `jobs.schema_change_gc.protected_record_count` -- `jobs.schema_change_gc.resume_completed` -- `jobs.schema_change_gc.resume_failed` -- `jobs.schema_change_gc.resume_retry_error` -- `jobs.schema_change.currently_idle` -- `jobs.schema_change.currently_paused` -- `jobs.schema_change.currently_running` -- `jobs.schema_change.expired_pts_records` -- `jobs.schema_change.fail_or_cancel_completed` -- `jobs.schema_change.fail_or_cancel_failed` -- `jobs.schema_change.fail_or_cancel_retry_error` -- `jobs.schema_change.protected_age_sec` -- `jobs.schema_change.protected_record_count` -- `jobs.schema_change.resume_completed` -- `jobs.schema_change.resume_failed` -- `jobs.schema_change.resume_retry_error` -- `jobs.standby_read_ts_poller.currently_idle` -- `jobs.standby_read_ts_poller.currently_paused` -- `jobs.standby_read_ts_poller.currently_running` -- `jobs.standby_read_ts_poller.expired_pts_records` -- `jobs.standby_read_ts_poller.fail_or_cancel_completed` -- `jobs.standby_read_ts_poller.fail_or_cancel_failed` -- `jobs.standby_read_ts_poller.fail_or_cancel_retry_error` -- `jobs.standby_read_ts_poller.protected_age_sec` -- `jobs.standby_read_ts_poller.protected_record_count` -- `jobs.standby_read_ts_poller.resume_completed` -- `jobs.standby_read_ts_poller.resume_failed` -- `jobs.standby_read_ts_poller.resume_retry_error` -- `jobs.typedesc_schema_change.currently_idle` -- `jobs.typedesc_schema_change.currently_paused` -- `jobs.typedesc_schema_change.currently_running` -- `jobs.typedesc_schema_change.expired_pts_records` -- `jobs.typedesc_schema_change.fail_or_cancel_completed` -- `jobs.typedesc_schema_change.fail_or_cancel_failed` -- `jobs.typedesc_schema_change.fail_or_cancel_retry_error` -- `jobs.typedesc_schema_change.protected_age_sec` -- `jobs.typedesc_schema_change.protected_record_count` -- `jobs.typedesc_schema_change.resume_completed` -- `jobs.typedesc_schema_change.resume_failed` -- `jobs.typedesc_schema_change.resume_retry_error` -- `jobs.update_table_metadata_cache.currently_idle` -- `jobs.update_table_metadata_cache.currently_paused` -- `jobs.update_table_metadata_cache.currently_running` -- `jobs.update_table_metadata_cache.expired_pts_records` -- `jobs.update_table_metadata_cache.fail_or_cancel_completed` -- `jobs.update_table_metadata_cache.fail_or_cancel_failed` -- `jobs.update_table_metadata_cache.fail_or_cancel_retry_error` -- `jobs.update_table_metadata_cache.protected_age_sec` -- `jobs.update_table_metadata_cache.protected_record_count` -- `jobs.update_table_metadata_cache.resume_completed` -- `jobs.update_table_metadata_cache.resume_failed` -- `jobs.update_table_metadata_cache.resume_retry_error` -- `kv.protectedts.reconciliation.errors` -- `kv.protectedts.reconciliation.num_runs` -- `kv.protectedts.reconciliation.records_processed` -- `kv.protectedts.reconciliation.records_removed` -- `logical_replication.batch_hist_nanos` -- `logical_replication.checkpoint_events_ingested` -- `logical_replication.commit_latency` -- `logical_replication.events_dlqed` -- `logical_replication.events_dlqed_age` -- `logical_replication.events_dlqed_by_label` -- `logical_replication.events_dlqed_errtype` -- `logical_replication.events_dlqed_space` -- `logical_replication.events_ingested` -- `logical_replication.events_ingested_by_label` -- `logical_replication.events_initial_failure` -- `logical_replication.events_initial_success` -- `logical_replication.events_retry_failure` -- `logical_replication.events_retry_success` -- `logical_replication.logical_bytes` -- `logical_replication.replan_count` -- `logical_replication.replicated_time_by_label` -- `logical_replication.replicated_time_seconds` -- `logical_replication.retry_queue_bytes` -- `logical_replication.retry_queue_events` -- `obs.tablemetadata.update_job.duration` -- `obs.tablemetadata.update_job.errors` -- `obs.tablemetadata.update_job.runs` -- `obs.tablemetadata.update_job.table_updates` -- `physical_replication.admit_latency` -- `physical_replication.commit_latency` -- `physical_replication.cutover_progress` -- `physical_replication.distsql_replan_count` -- `physical_replication.events_ingested` -- `physical_replication.flush_hist_nanos` -- `physical_replication.flushes` -- `physical_replication.logical_bytes` -- `physical_replication.replicated_time_seconds` -- `physical_replication.resolved_events_ingested` -- `physical_replication.running` -- `requests.slow.distsender` -- `round-trip-latency` -- `rpc.client.bytes.egress` -- `rpc.client.bytes.ingress` -- `rpc.connection.avg_round_trip_latency` -- `rpc.connection.connected` -- `rpc.connection.failures` -- `rpc.connection.healthy` -- `rpc.connection.healthy_nanos` -- `rpc.connection.heartbeats` -- `rpc.connection.inactive` -- `rpc.connection.unhealthy` -- `rpc.connection.unhealthy_nanos` -- `schedules.BACKUP.failed` -- `schedules.BACKUP.last-completed-time` -- `schedules.BACKUP.protected_age_sec` -- `schedules.BACKUP.protected_record_count` -- `schedules.BACKUP.started` -- `schedules.BACKUP.succeeded` -- `schedules.CHANGEFEED.failed` -- `schedules.CHANGEFEED.started` -- `schedules.CHANGEFEED.succeeded` -- `schedules.error` -- `schedules.malformed` -- `schedules.round.jobs-started` -- `schedules.round.reschedule-skip` -- `schedules.round.reschedule-wait` -- `schedules.scheduled-row-level-ttl-executor.failed` -- `schedules.scheduled-row-level-ttl-executor.started` -- `schedules.scheduled-row-level-ttl-executor.succeeded` -- `schedules.scheduled-schema-telemetry-executor.failed` -- `schedules.scheduled-schema-telemetry-executor.started` -- `schedules.scheduled-schema-telemetry-executor.succeeded` -- `schedules.scheduled-sql-stats-compaction-executor.failed` -- `schedules.scheduled-sql-stats-compaction-executor.started` -- `schedules.scheduled-sql-stats-compaction-executor.succeeded` -- `sql.bytesin` -- `sql.bytesout` -- `sql.conn.failures` -- `sql.conn.latency` -- `sql.conns` -- `sql.conns_waiting_to_hash` -- `sql.contention.resolver.failed_resolutions` -- `sql.contention.resolver.queue_size` -- `sql.contention.resolver.retries` -- `sql.contention.txn_id_cache.miss` -- `sql.contention.txn_id_cache.read` -- `sql.copy.count` -- `sql.copy.count.internal` -- `sql.copy.nonatomic.count` -- `sql.copy.nonatomic.count.internal` -- `sql.copy.nonatomic.started.count` -- `sql.copy.nonatomic.started.count.internal` -- `sql.copy.started.count` -- `sql.copy.started.count.internal` -- `sql.ddl.count` -- `sql.ddl.count.internal` -- `sql.ddl.started.count` -- `sql.ddl.started.count.internal` -- `sql.delete.count` -- `sql.delete.count.internal` -- `sql.delete.started.count` -- `sql.delete.started.count.internal` -- `sql.disk.distsql.current` -- `sql.disk.distsql.max` -- `sql.disk.distsql.spilled.bytes.read` -- `sql.disk.distsql.spilled.bytes.written` -- `sql.distsql.contended_queries.count` -- `sql.distsql.cumulative_contention_nanos` -- `sql.distsql.dist_query_rerun_locally.count` -- `sql.distsql.dist_query_rerun_locally.failure_count` -- `sql.distsql.exec.latency` -- `sql.distsql.exec.latency.internal` -- `sql.distsql.flows.active` -- `sql.distsql.flows.total` -- `sql.distsql.queries.active` -- `sql.distsql.queries.spilled` -- `sql.distsql.queries.total` -- `sql.distsql.select.count` -- `sql.distsql.select.count.internal` -- `sql.distsql.service.latency` -- `sql.distsql.service.latency.internal` -- `sql.distsql.vec.openfds` -- `sql.exec.latency` -- `sql.exec.latency.internal` -- `sql.failure.count` -- `sql.failure.count.internal` -- `sql.feature_flag_denial` -- `sql.full.scan.count` -- `sql.full.scan.count.internal` -- `sql.guardrails.full_scan_rejected.count` -- `sql.guardrails.full_scan_rejected.count.internal` -- `sql.guardrails.max_row_size_err.count` -- `sql.guardrails.max_row_size_err.count.internal` -- `sql.guardrails.max_row_size_log.count` -- `sql.guardrails.max_row_size_log.count.internal` -- `sql.guardrails.transaction_rows_read_err.count` -- `sql.guardrails.transaction_rows_read_err.count.internal` -- `sql.guardrails.transaction_rows_read_log.count` -- `sql.guardrails.transaction_rows_read_log.count.internal` -- `sql.guardrails.transaction_rows_written_err.count` -- `sql.guardrails.transaction_rows_written_err.count.internal` -- `sql.guardrails.transaction_rows_written_log.count` -- `sql.guardrails.transaction_rows_written_log.count.internal` -- `sql.hydrated_schema_cache.hits` -- `sql.hydrated_schema_cache.misses` -- `sql.hydrated_table_cache.hits` -- `sql.hydrated_table_cache.misses` -- `sql.hydrated_type_cache.hits` -- `sql.hydrated_type_cache.misses` -- `sql.hydrated_udf_cache.hits` -- `sql.hydrated_udf_cache.misses` -- `sql.insert.count` -- `sql.insert.count.internal` -- `sql.insert.started.count` -- `sql.insert.started.count.internal` -- `sql.insights.anomaly_detection.evictions` -- `sql.insights.anomaly_detection.fingerprints` -- `sql.insights.anomaly_detection.memory` -- `sql.leases.active` -- `sql.leases.expired` -- `sql.leases.long_wait_for_no_version` -- `sql.leases.long_wait_for_one_version` -- `sql.leases.long_wait_for_two_version_invariant` -- `sql.leases.waiting_to_expire` -- `sql.mem.bulk.current` -- `sql.mem.bulk.max` -- `sql.mem.conns.current` -- `sql.mem.conns.max` -- `sql.mem.distsql.current` -- `sql.mem.distsql.max` -- `sql.mem.internal.current` -- `sql.mem.internal.max` -- `sql.mem.internal.session.current` -- `sql.mem.internal.session.max` -- `sql.mem.internal.session.prepared.current` -- `sql.mem.internal.session.prepared.max` -- `sql.mem.internal.txn.current` -- `sql.mem.internal.txn.max` -- `sql.mem.root.current` -- `sql.mem.root.max` -- `sql.mem.sql.current` -- `sql.mem.sql.max` -- `sql.mem.sql.session.current` -- `sql.mem.sql.session.max` -- `sql.mem.sql.session.prepared.current` -- `sql.mem.sql.session.prepared.max` -- `sql.mem.sql.txn.current` -- `sql.mem.sql.txn.max` -- `sql.misc.count` -- `sql.misc.count.internal` -- `sql.misc.started.count` -- `sql.misc.started.count.internal` -- `sql.new_conns` -- `sql.optimizer.fallback.count` -- `sql.optimizer.fallback.count.internal` -- `sql.optimizer.plan_cache.hits` -- `sql.optimizer.plan_cache.hits.internal` -- `sql.optimizer.plan_cache.misses` -- `sql.optimizer.plan_cache.misses.internal` -- `sql.pgwire_cancel.ignored` -- `sql.pgwire_cancel.successful` -- `sql.pgwire_cancel.total` -- `sql.pgwire.pipeline.count` -- `sql.pre_serve.bytesin` -- `sql.pre_serve.bytesout` -- `sql.pre_serve.conn.failures` -- `sql.pre_serve.mem.cur` -- `sql.pre_serve.mem.max` -- `sql.pre_serve.new_conns` -- `sql.query.count` -- `sql.query.count.internal` -- `sql.query.started.count` -- `sql.query.started.count.internal` -- `sql.restart_savepoint.count` -- `sql.restart_savepoint.count.internal` -- `sql.restart_savepoint.release.count` -- `sql.restart_savepoint.release.count.internal` -- `sql.restart_savepoint.release.started.count` -- `sql.restart_savepoint.release.started.count.internal` -- `sql.restart_savepoint.rollback.count` -- `sql.restart_savepoint.rollback.count.internal` -- `sql.restart_savepoint.rollback.started.count` -- `sql.restart_savepoint.rollback.started.count.internal` -- `sql.restart_savepoint.started.count` -- `sql.restart_savepoint.started.count.internal` -- `sql.savepoint.count` -- `sql.savepoint.count.internal` -- `sql.savepoint.release.count` -- `sql.savepoint.release.count.internal` -- `sql.savepoint.release.started.count` -- `sql.savepoint.release.started.count.internal` -- `sql.savepoint.rollback.count` -- `sql.savepoint.rollback.count.internal` -- `sql.savepoint.rollback.started.count` -- `sql.savepoint.rollback.started.count.internal` -- `sql.savepoint.started.count` -- `sql.savepoint.started.count.internal` -- `sql.schema_changer.permanent_errors` -- `sql.schema_changer.retry_errors` -- `sql.schema_changer.running` -- `sql.schema_changer.successes` -- `sql.schema.invalid_objects` -- `sql.select.count` -- `sql.select.count.internal` -- `sql.select.started.count` -- `sql.select.started.count.internal` -- `sql.service.latency` -- `sql.service.latency.internal` -- `sql.statements.active` -- `sql.statements.active.internal` -- `sql.stats.activity.update.latency` -- `sql.stats.activity.updates.failed` -- `sql.stats.activity.updates.successful` -- `sql.stats.cleanup.rows_removed` -- `sql.stats.discarded.current` -- `sql.stats.flush.done_signals.ignored` -- `sql.stats.flush.fingerprint.count` -- `sql.stats.flush.latency` -- `sql.stats.flushes.failed` -- `sql.stats.flushes.successful` -- `sql.stats.mem.current` -- `sql.stats.mem.max` -- `sql.stats.reported.mem.current` -- `sql.stats.reported.mem.max` -- `sql.stats.txn_stats_collection.duration` -- `sql.temp_object_cleaner.active_cleaners` -- `sql.temp_object_cleaner.schemas_deletion_error` -- `sql.temp_object_cleaner.schemas_deletion_success` -- `sql.temp_object_cleaner.schemas_to_delete` -- `sql.txn.abort.count` -- `sql.txn.abort.count.internal` -- `sql.txn.begin.count` -- `sql.txn.begin.count.internal` -- `sql.txn.begin.started.count` -- `sql.txn.begin.started.count.internal` -- `sql.txn.commit.count` -- `sql.txn.commit.count.internal` -- `sql.txn.commit.started.count` -- `sql.txn.commit.started.count.internal` -- `sql.txn.contended.count` -- `sql.txn.contended.count.internal` -- `sql.txn.latency` -- `sql.txn.latency.internal` -- `sql.txn.rollback.count` -- `sql.txn.rollback.count.internal` -- `sql.txn.rollback.started.count` -- `sql.txn.rollback.started.count.internal` -- `sql.txn.upgraded_iso_level.count` -- `sql.txn.upgraded_iso_level.count.internal` -- `sql.txns.open` -- `sql.txns.open.internal` -- `sql.update.count` -- `sql.update.count.internal` -- `sql.update.started.count` -- `sql.update.started.count.internal` -- `sqlliveness.is_alive.cache_hits` -- `sqlliveness.is_alive.cache_misses` -- `sqlliveness.sessions_deleted` -- `sqlliveness.sessions_deletion_runs` -- `sqlliveness.write_failures` -- `sqlliveness.write_successes` -- `tenant.cost_client.blocked_requests` -- `tenant.sql_usage.cross_region_network_ru` -- `tenant.sql_usage.estimated_cpu_seconds` -- `tenant.sql_usage.estimated_kv_cpu_seconds` -- `tenant.sql_usage.estimated_replication_bytes` -- `tenant.sql_usage.external_io_egress_bytes` -- `tenant.sql_usage.external_io_ingress_bytes` -- `tenant.sql_usage.kv_request_units` -- `tenant.sql_usage.pgwire_egress_bytes` -- `tenant.sql_usage.provisioned_vcpus` -- `tenant.sql_usage.read_batches` -- `tenant.sql_usage.read_bytes` -- `tenant.sql_usage.read_requests` -- `tenant.sql_usage.request_units` -- `tenant.sql_usage.sql_pods_cpu_seconds` -- `tenant.sql_usage.write_batches` -- `tenant.sql_usage.write_bytes` -- `tenant.sql_usage.write_requests` -- `txn.aborts` -- `txn.commit_waits` -- `txn.commits` -- `txn.commits_read_only` -- `txn.commits1PC` -- `txn.condensed_intent_spans` -- `txn.condensed_intent_spans_gauge` -- `txn.condensed_intent_spans_rejected` -- `txn.durations` -- `txn.inflight_locks_over_tracking_budget` -- `txn.parallelcommits` -- `txn.parallelcommits.auto_retries` -- `txn.refresh.auto_retries` -- `txn.refresh.fail` -- `txn.refresh.fail_with_condensed_spans` -- `txn.refresh.memory_limit_exceeded` -- `txn.refresh.success` -- `txn.refresh.success_server_side` -- `txn.restarts` -- `txn.restarts.asyncwritefailure` -- `txn.restarts.commitdeadlineexceeded` -- `txn.restarts.readwithinuncertainty` -- `txn.restarts.serializable` -- `txn.restarts.txnaborted` -- `txn.restarts.txnpush` -- `txn.restarts.unknown` -- `txn.restarts.writetooold` -- `txn.rollbacks.async.failed` -- `txn.rollbacks.failed` +{% assign names_string = "" %} + +{% for layer in site.data[version].metrics.metrics.layers %} + {% comment %}layer: {{ layer.name }}{% endcomment %} + {% if layer.name == "APPLICATION" %} + {% for category in layer.categories %} + {% comment %}category: {{ category.name }}{% endcomment %} + {% for metric in category.metrics %} + {% assign names_string = names_string | append: metric.name | append: "||" %} + {% endfor %} + {% endfor %} + {% endif %} +{% endfor %} + +{% comment %}names_string: {{ names_string }}{% endcomment %} + +{% assign name_list = names_string | split: "||" | uniq | sort %} + +
    + {% for name in name_list %} + {% unless name == "" %} +
  • {{ name }}
  • + {% endunless %} + {% endfor %} +
## Metrics scoped to the system virtual cluster {% comment %}LAYER=STORAGE{% endcomment %} -- `abortspanbytes` -- `addsstable.applications` -- `addsstable.aswrites` -- `addsstable.copies` -- `addsstable.delay.enginebackpressure` -- `addsstable.delay.total` -- `addsstable.proposals` -- `admission.admitted.elastic-cpu` -- `admission.admitted.elastic-cpu.bulk-normal-pri` -- `admission.admitted.elastic-cpu.normal-pri` -- `admission.admitted.elastic-stores` -- `admission.admitted.elastic-stores.bulk-low-pri` -- `admission.admitted.elastic-stores.bulk-normal-pri` -- `admission.admitted.kv` -- `admission.admitted.kv-stores` -- `admission.admitted.kv-stores.high-pri` -- `admission.admitted.kv-stores.locking-normal-pri` -- `admission.admitted.kv-stores.normal-pri` -- `admission.admitted.kv.high-pri` -- `admission.admitted.kv.locking-normal-pri` -- `admission.admitted.kv.normal-pri` -- `admission.admitted.sql-kv-response` -- `admission.admitted.sql-kv-response.locking-normal-pri` -- `admission.admitted.sql-kv-response.normal-pri` -- `admission.admitted.sql-leaf-start` -- `admission.admitted.sql-leaf-start.locking-normal-pri` -- `admission.admitted.sql-leaf-start.normal-pri` -- `admission.admitted.sql-root-start` -- `admission.admitted.sql-root-start.locking-normal-pri` -- `admission.admitted.sql-root-start.normal-pri` -- `admission.admitted.sql-sql-response` -- `admission.admitted.sql-sql-response.locking-normal-pri` -- `admission.admitted.sql-sql-response.normal-pri` -- `admission.elastic_cpu.acquired_nanos` -- `admission.elastic_cpu.available_nanos` -- `admission.elastic_cpu.max_available_nanos` -- `admission.elastic_cpu.nanos_exhausted_duration` -- `admission.elastic_cpu.over_limit_durations` -- `admission.elastic_cpu.pre_work_nanos` -- `admission.elastic_cpu.returned_nanos` -- `admission.elastic_cpu.utilization` -- `admission.elastic_cpu.utilization_limit` -- `admission.errored.elastic-cpu` -- `admission.errored.elastic-cpu.bulk-normal-pri` -- `admission.errored.elastic-cpu.normal-pri` -- `admission.errored.elastic-stores` -- `admission.errored.elastic-stores.bulk-low-pri` -- `admission.errored.elastic-stores.bulk-normal-pri` -- `admission.errored.kv` -- `admission.errored.kv-stores` -- `admission.errored.kv-stores.high-pri` -- `admission.errored.kv-stores.locking-normal-pri` -- `admission.errored.kv-stores.normal-pri` -- `admission.errored.kv.high-pri` -- `admission.errored.kv.locking-normal-pri` -- `admission.errored.kv.normal-pri` -- `admission.errored.sql-kv-response` -- `admission.errored.sql-kv-response.locking-normal-pri` -- `admission.errored.sql-kv-response.normal-pri` -- `admission.errored.sql-leaf-start` -- `admission.errored.sql-leaf-start.locking-normal-pri` -- `admission.errored.sql-leaf-start.normal-pri` -- `admission.errored.sql-root-start` -- `admission.errored.sql-root-start.locking-normal-pri` -- `admission.errored.sql-root-start.normal-pri` -- `admission.errored.sql-sql-response` -- `admission.errored.sql-sql-response.locking-normal-pri` -- `admission.errored.sql-sql-response.normal-pri` -- `admission.granter.cpu_load_long_period_duration.kv` -- `admission.granter.cpu_load_short_period_duration.kv` -- `admission.granter.elastic_io_tokens_available.kv` -- `admission.granter.elastic_io_tokens_exhausted_duration.kv` -- `admission.granter.io_tokens_available.kv` -- `admission.granter.io_tokens_bypassed.kv` -- `admission.granter.io_tokens_exhausted_duration.kv` -- `admission.granter.io_tokens_returned.kv` -- `admission.granter.io_tokens_taken.kv` -- `admission.granter.slot_adjuster_decrements.kv` -- `admission.granter.slot_adjuster_increments.kv` -- `admission.granter.slots_exhausted_duration.kv` -- `admission.granter.total_slots.kv` -- `admission.granter.used_slots.kv` -- `admission.granter.used_slots.sql-leaf-start` -- `admission.granter.used_slots.sql-root-start` -- `admission.io.overload` -- `admission.l0_compacted_bytes.kv` -- `admission.l0_tokens_produced.kv` -- `admission.raft.paused_replicas` -- `admission.raft.paused_replicas_dropped_msgs` -- `admission.requested.elastic-cpu` -- `admission.requested.elastic-cpu.bulk-normal-pri` -- `admission.requested.elastic-cpu.normal-pri` -- `admission.requested.elastic-stores` -- `admission.requested.elastic-stores.bulk-low-pri` -- `admission.requested.elastic-stores.bulk-normal-pri` -- `admission.requested.kv` -- `admission.requested.kv-stores` -- `admission.requested.kv-stores.high-pri` -- `admission.requested.kv-stores.locking-normal-pri` -- `admission.requested.kv-stores.normal-pri` -- `admission.requested.kv.high-pri` -- `admission.requested.kv.locking-normal-pri` -- `admission.requested.kv.normal-pri` -- `admission.requested.sql-kv-response` -- `admission.requested.sql-kv-response.locking-normal-pri` -- `admission.requested.sql-kv-response.normal-pri` -- `admission.requested.sql-leaf-start` -- `admission.requested.sql-leaf-start.locking-normal-pri` -- `admission.requested.sql-leaf-start.normal-pri` -- `admission.requested.sql-root-start` -- `admission.requested.sql-root-start.locking-normal-pri` -- `admission.requested.sql-root-start.normal-pri` -- `admission.requested.sql-sql-response` -- `admission.requested.sql-sql-response.locking-normal-pri` -- `admission.requested.sql-sql-response.normal-pri` -- `admission.scheduler_latency_listener.p99_nanos` -- `admission.wait_durations.elastic-cpu` -- `admission.wait_durations.elastic-cpu.bulk-normal-pri` -- `admission.wait_durations.elastic-cpu.normal-pri` -- `admission.wait_durations.elastic-stores` -- `admission.wait_durations.elastic-stores.bulk-low-pri` -- `admission.wait_durations.elastic-stores.bulk-normal-pri` -- `admission.wait_durations.kv` -- `admission.wait_durations.kv-stores` -- `admission.wait_durations.kv-stores.high-pri` -- `admission.wait_durations.kv-stores.locking-normal-pri` -- `admission.wait_durations.kv-stores.normal-pri` -- `admission.wait_durations.kv.high-pri` -- `admission.wait_durations.kv.locking-normal-pri` -- `admission.wait_durations.kv.normal-pri` -- `admission.wait_durations.snapshot_ingest` -- `admission.wait_durations.sql-kv-response` -- `admission.wait_durations.sql-kv-response.locking-normal-pri` -- `admission.wait_durations.sql-kv-response.normal-pri` -- `admission.wait_durations.sql-leaf-start` -- `admission.wait_durations.sql-leaf-start.locking-normal-pri` -- `admission.wait_durations.sql-leaf-start.normal-pri` -- `admission.wait_durations.sql-root-start` -- `admission.wait_durations.sql-root-start.locking-normal-pri` -- `admission.wait_durations.sql-root-start.normal-pri` -- `admission.wait_durations.sql-sql-response` -- `admission.wait_durations.sql-sql-response.locking-normal-pri` -- `admission.wait_durations.sql-sql-response.normal-pri` -- `admission.wait_queue_length.elastic-cpu` -- `admission.wait_queue_length.elastic-cpu.bulk-normal-pri` -- `admission.wait_queue_length.elastic-cpu.normal-pri` -- `admission.wait_queue_length.elastic-stores` -- `admission.wait_queue_length.elastic-stores.bulk-low-pri` -- `admission.wait_queue_length.elastic-stores.bulk-normal-pri` -- `admission.wait_queue_length.kv` -- `admission.wait_queue_length.kv-stores` -- `admission.wait_queue_length.kv-stores.high-pri` -- `admission.wait_queue_length.kv-stores.locking-normal-pri` -- `admission.wait_queue_length.kv-stores.normal-pri` -- `admission.wait_queue_length.kv.high-pri` -- `admission.wait_queue_length.kv.locking-normal-pri` -- `admission.wait_queue_length.kv.normal-pri` -- `admission.wait_queue_length.sql-kv-response` -- `admission.wait_queue_length.sql-kv-response.locking-normal-pri` -- `admission.wait_queue_length.sql-kv-response.normal-pri` -- `admission.wait_queue_length.sql-leaf-start` -- `admission.wait_queue_length.sql-leaf-start.locking-normal-pri` -- `admission.wait_queue_length.sql-leaf-start.normal-pri` -- `admission.wait_queue_length.sql-root-start` -- `admission.wait_queue_length.sql-root-start.locking-normal-pri` -- `admission.wait_queue_length.sql-root-start.normal-pri` -- `admission.wait_queue_length.sql-sql-response` -- `admission.wait_queue_length.sql-sql-response.locking-normal-pri` -- `admission.wait_queue_length.sql-sql-response.normal-pri` -- `batch_requests.bytes` -- `batch_requests.cross_region.bytes` -- `batch_requests.cross_zone.bytes` -- `batch_responses.bytes` -- `batch_responses.cross_region.bytes` -- `batch_responses.cross_zone.bytes` -- `capacity` -- `capacity.available` -- `capacity.reserved` -- `capacity.used` -- `exec.error` -- `exec.latency` -- `exec.success` -- `exportrequest.delay.total` -- `follower_reads.success_count` -- `gcbytesage` -- `gossip.bytes.received` -- `gossip.bytes.sent` -- `gossip.callbacks.pending` -- `gossip.callbacks.pending_duration` -- `gossip.callbacks.processed` -- `gossip.callbacks.processing_duration` -- `gossip.connections.incoming` -- `gossip.connections.outgoing` -- `gossip.connections.refused` -- `gossip.infos.received` -- `gossip.infos.sent` -- `intentage` -- `intentbytes` -- `intentcount` -- `intentresolver.async.throttled` -- `intentresolver.finalized_txns.failed` -- `intentresolver.intents.failed` -- `intents.abort-attempts` -- `intents.poison-attempts` -- `intents.resolve-attempts` -- `keybytes` -- `keycount` -- `kv.allocator.load_based_lease_transfers.cannot_find_better_candidate` -- `kv.allocator.load_based_lease_transfers.delta_not_significant` -- `kv.allocator.load_based_lease_transfers.existing_not_overfull` -- `kv.allocator.load_based_lease_transfers.follow_the_workload` -- `kv.allocator.load_based_lease_transfers.missing_stats_for_existing_stores` -- `kv.allocator.load_based_lease_transfers.should_transfer` -- `kv.allocator.load_based_replica_rebalancing.cannot_find_better_candidate` -- `kv.allocator.load_based_replica_rebalancing.delta_not_significant` -- `kv.allocator.load_based_replica_rebalancing.existing_not_overfull` -- `kv.allocator.load_based_replica_rebalancing.missing_stats_for_existing_store` -- `kv.allocator.load_based_replica_rebalancing.should_transfer` -- `kv.closed_timestamp.max_behind_nanos` -- `kv.concurrency.avg_lock_hold_duration_nanos` -- `kv.concurrency.avg_lock_wait_duration_nanos` -- `kv.concurrency.latch_conflict_wait_durations` -- `kv.concurrency.lock_wait_queue_waiters` -- `kv.concurrency.locks` -- `kv.concurrency.locks_with_wait_queues` -- `kv.concurrency.max_lock_hold_duration_nanos` -- `kv.concurrency.max_lock_wait_duration_nanos` -- `kv.concurrency.max_lock_wait_queue_waiters_for_lock` -- `kv.loadsplitter.nosplitkey` -- `kv.loadsplitter.popularkey` -- `kv.prober.planning_attempts` -- `kv.prober.planning_failures` -- `kv.prober.read.attempts` -- `kv.prober.read.failures` -- `kv.prober.read.latency` -- `kv.prober.write.attempts` -- `kv.prober.write.failures` -- `kv.prober.write.latency` -- `kv.prober.write.quarantine.oldest_duration` -- `kv.rangefeed.budget_allocation_blocked` -- `kv.rangefeed.budget_allocation_failed` -- `kv.rangefeed.catchup_scan_nanos` -- `kv.rangefeed.closed_timestamp_max_behind_nanos` -- `kv.rangefeed.closed_timestamp.slow_ranges` -- `kv.rangefeed.mem_shared` -- `kv.rangefeed.mem_system` -- `kv.rangefeed.processors_goroutine` -- `kv.rangefeed.processors_scheduler` -- `kv.rangefeed.registrations` -- `kv.rangefeed.scheduler.normal.latency` -- `kv.rangefeed.scheduler.normal.queue_size` -- `kv.rangefeed.scheduler.system.latency` -- `kv.rangefeed.scheduler.system.queue_size` -- `kv.replica_circuit_breaker.num_tripped_events` -- `kv.replica_circuit_breaker.num_tripped_replicas` -- `kv.replica_read_batch_evaluate.dropped_latches_before_eval` -- `kv.replica_read_batch_evaluate.latency` -- `kv.replica_read_batch_evaluate.without_interleaving_iter` -- `kv.replica_write_batch_evaluate.latency` -- `kv.split.estimated_stats` -- `kv.split.total_bytes_estimates` -- `kv.tenant_rate_limit.current_blocked` -- `kv.tenant_rate_limit.num_tenants` -- `kv.tenant_rate_limit.read_batches_admitted` -- `kv.tenant_rate_limit.read_bytes_admitted` -- `kv.tenant_rate_limit.read_requests_admitted` -- `kv.tenant_rate_limit.write_batches_admitted` -- `kv.tenant_rate_limit.write_bytes_admitted` -- `kv.tenant_rate_limit.write_requests_admitted` -- `kvadmission.flow_controller.elastic_blocked_stream_count` -- `kvadmission.flow_controller.elastic_requests_admitted` -- `kvadmission.flow_controller.elastic_requests_bypassed` -- `kvadmission.flow_controller.elastic_requests_errored` -- `kvadmission.flow_controller.elastic_requests_waiting` -- `kvadmission.flow_controller.elastic_stream_count` -- `kvadmission.flow_controller.elastic_tokens_available` -- `kvadmission.flow_controller.elastic_tokens_deducted` -- `kvadmission.flow_controller.elastic_tokens_returned` -- `kvadmission.flow_controller.elastic_tokens_unaccounted` -- `kvadmission.flow_controller.elastic_wait_duration` -- `kvadmission.flow_controller.regular_blocked_stream_count` -- `kvadmission.flow_controller.regular_requests_admitted` -- `kvadmission.flow_controller.regular_requests_bypassed` -- `kvadmission.flow_controller.regular_requests_errored` -- `kvadmission.flow_controller.regular_requests_waiting` -- `kvadmission.flow_controller.regular_stream_count` -- `kvadmission.flow_controller.regular_tokens_available` -- `kvadmission.flow_controller.regular_tokens_deducted` -- `kvadmission.flow_controller.regular_tokens_returned` -- `kvadmission.flow_controller.regular_tokens_unaccounted` -- `kvadmission.flow_controller.regular_wait_duration` -- `kvadmission.flow_handle.elastic_requests_admitted` -- `kvadmission.flow_handle.elastic_requests_errored` -- `kvadmission.flow_handle.elastic_requests_waiting` -- `kvadmission.flow_handle.elastic_wait_duration` -- `kvadmission.flow_handle.regular_requests_admitted` -- `kvadmission.flow_handle.regular_requests_errored` -- `kvadmission.flow_handle.regular_requests_waiting` -- `kvadmission.flow_handle.regular_wait_duration` -- `kvadmission.flow_handle.streams_connected` -- `kvadmission.flow_handle.streams_disconnected` -- `kvadmission.flow_token_dispatch.coalesced_elastic` -- `kvadmission.flow_token_dispatch.coalesced_regular` -- `kvadmission.flow_token_dispatch.local_elastic` -- `kvadmission.flow_token_dispatch.local_regular` -- `kvadmission.flow_token_dispatch.pending_elastic` -- `kvadmission.flow_token_dispatch.pending_nodes` -- `kvadmission.flow_token_dispatch.pending_regular` -- `kvadmission.flow_token_dispatch.remote_elastic` -- `kvadmission.flow_token_dispatch.remote_regular` -- `kvflowcontrol.eval_wait.elastic.duration` -- `kvflowcontrol.eval_wait.elastic.requests.admitted` -- `kvflowcontrol.eval_wait.elastic.requests.bypassed` -- `kvflowcontrol.eval_wait.elastic.requests.errored` -- `kvflowcontrol.eval_wait.elastic.requests.waiting` -- `kvflowcontrol.eval_wait.regular.duration` -- `kvflowcontrol.eval_wait.regular.requests.admitted` -- `kvflowcontrol.eval_wait.regular.requests.bypassed` -- `kvflowcontrol.eval_wait.regular.requests.errored` -- `kvflowcontrol.eval_wait.regular.requests.waiting` -- `kvflowcontrol.range_controller.count` -- `kvflowcontrol.send_queue.bytes` -- `kvflowcontrol.send_queue.count` -- `kvflowcontrol.send_queue.prevent.count` -- `kvflowcontrol.send_queue.scheduled.deducted_bytes` -- `kvflowcontrol.send_queue.scheduled.force_flush` -- `kvflowcontrol.streams.eval.elastic.blocked_count` -- `kvflowcontrol.streams.eval.elastic.total_count` -- `kvflowcontrol.streams.eval.regular.blocked_count` -- `kvflowcontrol.streams.eval.regular.total_count` -- `kvflowcontrol.streams.send.elastic.blocked_count` -- `kvflowcontrol.streams.send.elastic.total_count` -- `kvflowcontrol.streams.send.regular.blocked_count` -- `kvflowcontrol.streams.send.regular.total_count` -- `kvflowcontrol.tokens.eval.elastic.available` -- `kvflowcontrol.tokens.eval.elastic.deducted` -- `kvflowcontrol.tokens.eval.elastic.returned` -- `kvflowcontrol.tokens.eval.elastic.returned.disconnect` -- `kvflowcontrol.tokens.eval.elastic.unaccounted` -- `kvflowcontrol.tokens.eval.regular.available` -- `kvflowcontrol.tokens.eval.regular.deducted` -- `kvflowcontrol.tokens.eval.regular.returned` -- `kvflowcontrol.tokens.eval.regular.returned.disconnect` -- `kvflowcontrol.tokens.eval.regular.unaccounted` -- `kvflowcontrol.tokens.send.elastic.available` -- `kvflowcontrol.tokens.send.elastic.deducted` -- `kvflowcontrol.tokens.send.elastic.deducted.force_flush_send_queue` -- `kvflowcontrol.tokens.send.elastic.deducted.prevent_send_queue` -- `kvflowcontrol.tokens.send.elastic.returned` -- `kvflowcontrol.tokens.send.elastic.returned.disconnect` -- `kvflowcontrol.tokens.send.elastic.unaccounted` -- `kvflowcontrol.tokens.send.regular.available` -- `kvflowcontrol.tokens.send.regular.deducted` -- `kvflowcontrol.tokens.send.regular.deducted.prevent_send_queue` -- `kvflowcontrol.tokens.send.regular.returned` -- `kvflowcontrol.tokens.send.regular.returned.disconnect` -- `kvflowcontrol.tokens.send.regular.unaccounted` -- `leases.epoch` -- `leases.error` -- `leases.expiration` -- `leases.leader` -- `leases.liveness` -- `leases.preferences.less-preferred` -- `leases.preferences.violating` -- `leases.requests.latency` -- `leases.success` -- `leases.transfers.error` -- `leases.transfers.success` -- `livebytes` -- `livecount` -- `liveness.epochincrements` -- `liveness.heartbeatfailures` -- `liveness.heartbeatlatency` -- `liveness.heartbeatsinflight` -- `liveness.heartbeatsuccesses` -- `liveness.livenodes` -- `lockbytes` -- `lockcount` -- `node-id` -- `queue.consistency.pending` -- `queue.consistency.process.failure` -- `queue.consistency.process.success` -- `queue.consistency.processingnanos` -- `queue.gc.info.abortspanconsidered` -- `queue.gc.info.abortspangcnum` -- `queue.gc.info.abortspanscanned` -- `queue.gc.info.clearrangefailed` -- `queue.gc.info.clearrangesuccess` -- `queue.gc.info.enqueuehighpriority` -- `queue.gc.info.intentsconsidered` -- `queue.gc.info.intenttxns` -- `queue.gc.info.numkeysaffected` -- `queue.gc.info.numrangekeysaffected` -- `queue.gc.info.pushtxn` -- `queue.gc.info.resolvefailed` -- `queue.gc.info.resolvesuccess` -- `queue.gc.info.resolvetotal` -- `queue.gc.info.transactionresolvefailed` -- `queue.gc.info.transactionspangcaborted` -- `queue.gc.info.transactionspangccommitted` -- `queue.gc.info.transactionspangcpending` -- `queue.gc.info.transactionspangcstaging` -- `queue.gc.info.transactionspanscanned` -- `queue.gc.pending` -- `queue.gc.process.failure` -- `queue.gc.process.success` -- `queue.gc.processingnanos` -- `queue.lease.pending` -- `queue.lease.process.failure` -- `queue.lease.process.success` -- `queue.lease.processingnanos` -- `queue.lease.purgatory` -- `queue.merge.pending` -- `queue.merge.process.failure` -- `queue.merge.process.success` -- `queue.merge.processingnanos` -- `queue.merge.purgatory` -- `queue.raftlog.pending` -- `queue.raftlog.process.failure` -- `queue.raftlog.process.success` -- `queue.raftlog.processingnanos` -- `queue.raftsnapshot.pending` -- `queue.raftsnapshot.process.failure` -- `queue.raftsnapshot.process.success` -- `queue.raftsnapshot.processingnanos` -- `queue.replicagc.pending` -- `queue.replicagc.process.failure` -- `queue.replicagc.process.success` -- `queue.replicagc.processingnanos` -- `queue.replicagc.removereplica` -- `queue.replicate.addnonvoterreplica` -- `queue.replicate.addreplica` -- `queue.replicate.addreplica.error` -- `queue.replicate.addreplica.success` -- `queue.replicate.addvoterreplica` -- `queue.replicate.nonvoterpromotions` -- `queue.replicate.pending` -- `queue.replicate.process.failure` -- `queue.replicate.process.success` -- `queue.replicate.processingnanos` -- `queue.replicate.purgatory` -- `queue.replicate.rebalancenonvoterreplica` -- `queue.replicate.rebalancereplica` -- `queue.replicate.rebalancevoterreplica` -- `queue.replicate.removedeadnonvoterreplica` -- `queue.replicate.removedeadreplica` -- `queue.replicate.removedeadreplica.error` -- `queue.replicate.removedeadreplica.success` -- `queue.replicate.removedeadvoterreplica` -- `queue.replicate.removedecommissioningnonvoterreplica` -- `queue.replicate.removedecommissioningreplica` -- `queue.replicate.removedecommissioningreplica.error` -- `queue.replicate.removedecommissioningreplica.success` -- `queue.replicate.removedecommissioningvoterreplica` -- `queue.replicate.removelearnerreplica` -- `queue.replicate.removenonvoterreplica` -- `queue.replicate.removereplica` -- `queue.replicate.removereplica.error` -- `queue.replicate.removereplica.success` -- `queue.replicate.removevoterreplica` -- `queue.replicate.replacedeadreplica.error` -- `queue.replicate.replacedeadreplica.success` -- `queue.replicate.replacedecommissioningreplica.error` -- `queue.replicate.replacedecommissioningreplica.success` -- `queue.replicate.transferlease` -- `queue.replicate.voterdemotions` -- `queue.split.load_based` -- `queue.split.pending` -- `queue.split.process.failure` -- `queue.split.process.success` -- `queue.split.processingnanos` -- `queue.split.purgatory` -- `queue.split.size_based` -- `queue.split.span_config_based` -- `queue.tsmaintenance.pending` -- `queue.tsmaintenance.process.failure` -- `queue.tsmaintenance.process.success` -- `queue.tsmaintenance.processingnanos` -- `raft.commands.pending` -- `raft.commands.proposed` -- `raft.commands.reproposed.new-lai` -- `raft.commands.reproposed.unchanged` -- `raft.commandsapplied` -- `raft.dropped` -- `raft.dropped_leader` -- `raft.entrycache.accesses` -- `raft.entrycache.bytes` -- `raft.entrycache.hits` -- `raft.entrycache.read_bytes` -- `raft.entrycache.size` -- `raft.heartbeats.pending` -- `raft.loaded_entries.bytes` -- `raft.loaded_entries.reserved.bytes` -- `raft.process.applycommitted.latency` -- `raft.process.commandcommit.latency` -- `raft.process.handleready.latency` -- `raft.process.logcommit.latency` -- `raft.process.tickingnanos` -- `raft.process.workingnanos` -- `raft.quota_pool.percent_used` -- `raft.rcvd.app` -- `raft.rcvd.appresp` -- `raft.rcvd.bytes` -- `raft.rcvd.cross_region.bytes` -- `raft.rcvd.cross_zone.bytes` -- `raft.rcvd.defortifyleader` -- `raft.rcvd.dropped` -- `raft.rcvd.dropped_bytes` -- `raft.rcvd.fortifyleader` -- `raft.rcvd.fortifyleaderresp` -- `raft.rcvd.heartbeat` -- `raft.rcvd.heartbeatresp` -- `raft.rcvd.prevote` -- `raft.rcvd.prevoteresp` -- `raft.rcvd.prop` -- `raft.rcvd.queued_bytes` -- `raft.rcvd.snap` -- `raft.rcvd.stepped_bytes` -- `raft.rcvd.timeoutnow` -- `raft.rcvd.transferleader` -- `raft.rcvd.vote` -- `raft.rcvd.voteresp` -- `raft.replication.latency` -- `raft.scheduler.latency` -- `raft.sent.bytes` -- `raft.sent.cross_region.bytes` -- `raft.sent.cross_zone.bytes` -- `raft.storage.error` -- `raft.storage.read_bytes` -- `raft.ticks` -- `raft.timeoutcampaign` -- `raft.transport.flow-token-dispatches-dropped` -- `raft.transport.rcvd` -- `raft.transport.reverse-rcvd` -- `raft.transport.reverse-sent` -- `raft.transport.send-queue-bytes` -- `raft.transport.send-queue-size` -- `raft.transport.sends-dropped` -- `raft.transport.sent` -- `raftlog.behind` -- `raftlog.truncated` -- `range.adds` -- `range.merges` -- `range.raftleaderremovals` -- `range.raftleadertransfers` -- `range.recoveries` -- `range.removes` -- `range.snapshots.applied-initial` -- `range.snapshots.applied-non-voter` -- `range.snapshots.applied-voter` -- `range.snapshots.cross-region.rcvd-bytes` -- `range.snapshots.cross-region.sent-bytes` -- `range.snapshots.cross-zone.rcvd-bytes` -- `range.snapshots.cross-zone.sent-bytes` -- `range.snapshots.delegate.failures` -- `range.snapshots.delegate.in-progress` -- `range.snapshots.delegate.sent-bytes` -- `range.snapshots.delegate.successes` -- `range.snapshots.generated` -- `range.snapshots.rcvd-bytes` -- `range.snapshots.rebalancing.rcvd-bytes` -- `range.snapshots.rebalancing.sent-bytes` -- `range.snapshots.recovery.rcvd-bytes` -- `range.snapshots.recovery.sent-bytes` -- `range.snapshots.recv-failed` -- `range.snapshots.recv-in-progress` -- `range.snapshots.recv-queue` -- `range.snapshots.recv-queue-bytes` -- `range.snapshots.recv-total-in-progress` -- `range.snapshots.recv-unusable` -- `range.snapshots.send-in-progress` -- `range.snapshots.send-queue` -- `range.snapshots.send-queue-bytes` -- `range.snapshots.send-total-in-progress` -- `range.snapshots.sent-bytes` -- `range.snapshots.unknown.rcvd-bytes` -- `range.snapshots.unknown.sent-bytes` -- `range.snapshots.upreplication.rcvd-bytes` -- `range.snapshots.upreplication.sent-bytes` -- `range.splits` -- `rangekeybytes` -- `rangekeycount` -- `ranges` -- `ranges.decommissioning` -- `ranges.overreplicated` -- `ranges.unavailable` -- `ranges.underreplicated` -- `rangevalbytes` -- `rangevalcount` -- `rebalancing.cpunanospersecond` -- `rebalancing.lease.transfers` -- `rebalancing.queriespersecond` -- `rebalancing.range.rebalances` -- `rebalancing.readbytespersecond` -- `rebalancing.readspersecond` -- `rebalancing.replicas.cpunanospersecond` -- `rebalancing.replicas.queriespersecond` -- `rebalancing.requestspersecond` -- `rebalancing.state.imbalanced_overfull_options_exhausted` -- `rebalancing.writebytespersecond` -- `rebalancing.writespersecond` -- `replicas` -- `replicas.leaders` -- `replicas.leaders_invalid_lease` -- `replicas.leaders_not_leaseholders` -- `replicas.leaseholders` -- `replicas.quiescent` -- `replicas.reserved` -- `replicas.uninitialized` -- `requests.backpressure.split` -- `requests.slow.latch` -- `requests.slow.lease` -- `requests.slow.raft` -- `rocksdb.block.cache.hits` -- `rocksdb.block.cache.misses` -- `rocksdb.block.cache.usage` -- `rocksdb.bloom.filter.prefix.checked` -- `rocksdb.bloom.filter.prefix.useful` -- `rocksdb.compacted-bytes-read` -- `rocksdb.compacted-bytes-written` -- `rocksdb.compactions` -- `rocksdb.encryption.algorithm` -- `rocksdb.estimated-pending-compaction` -- `rocksdb.flushed-bytes` -- `rocksdb.flushes` -- `rocksdb.ingested-bytes` -- `rocksdb.memtable.total-size` -- `rocksdb.num-sstables` -- `rocksdb.read-amplification` -- `rocksdb.table-readers-mem-estimate` -- `rpc.batches.recv` -- `rpc.method.addsstable.recv` -- `rpc.method.adminchangereplicas.recv` -- `rpc.method.adminmerge.recv` -- `rpc.method.adminrelocaterange.recv` -- `rpc.method.adminscatter.recv` -- `rpc.method.adminsplit.recv` -- `rpc.method.admintransferlease.recv` -- `rpc.method.adminunsplit.recv` -- `rpc.method.adminverifyprotectedtimestamp.recv` -- `rpc.method.barrier.recv` -- `rpc.method.checkconsistency.recv` -- `rpc.method.clearrange.recv` -- `rpc.method.computechecksum.recv` -- `rpc.method.conditionalput.recv` -- `rpc.method.delete.recv` -- `rpc.method.deleterange.recv` -- `rpc.method.endtxn.recv` -- `rpc.method.export.recv` -- `rpc.method.gc.recv` -- `rpc.method.get.recv` -- `rpc.method.heartbeattxn.recv` -- `rpc.method.increment.recv` -- `rpc.method.initput.recv` -- `rpc.method.isspanempty.recv` -- `rpc.method.leaseinfo.recv` -- `rpc.method.linkexternalsstable.recv` -- `rpc.method.merge.recv` -- `rpc.method.migrate.recv` -- `rpc.method.probe.recv` -- `rpc.method.pushtxn.recv` -- `rpc.method.put.recv` -- `rpc.method.queryintent.recv` -- `rpc.method.querylocks.recv` -- `rpc.method.queryresolvedtimestamp.recv` -- `rpc.method.querytxn.recv` -- `rpc.method.rangestats.recv` -- `rpc.method.recomputestats.recv` -- `rpc.method.recovertxn.recv` -- `rpc.method.refresh.recv` -- `rpc.method.refreshrange.recv` -- `rpc.method.requestlease.recv` -- `rpc.method.resolveintent.recv` -- `rpc.method.resolveintentrange.recv` -- `rpc.method.reversescan.recv` -- `rpc.method.revertrange.recv` -- `rpc.method.scan.recv` -- `rpc.method.subsume.recv` -- `rpc.method.transferlease.recv` -- `rpc.method.truncatelog.recv` -- `rpc.method.writebatch.recv` -- `rpc.streams.mux_rangefeed.active` -- `rpc.streams.mux_rangefeed.recv` -- `spanconfig.kvsubscriber.oldest_protected_record_nanos` -- `spanconfig.kvsubscriber.protected_record_count` -- `spanconfig.kvsubscriber.update_behind_nanos` -- `storage.batch-commit.commit-wait.duration` -- `storage.batch-commit.count` -- `storage.batch-commit.duration` -- `storage.batch-commit.l0-stall.duration` -- `storage.batch-commit.mem-stall.duration` -- `storage.batch-commit.sem-wait.duration` -- `storage.batch-commit.wal-queue-wait.duration` -- `storage.batch-commit.wal-rotation.duration` -- `storage.block-load.active` -- `storage.block-load.queued` -- `storage.checkpoints` -- `storage.compactions.duration` -- `storage.compactions.keys.pinned.bytes` -- `storage.compactions.keys.pinned.count` -- `storage.disk-slow` -- `storage.disk-stalled` -- `storage.disk.io.time` -- `storage.disk.iopsinprogress` -- `storage.disk.read-max.bytespersecond` -- `storage.disk.read.bytes` -- `storage.disk.read.count` -- `storage.disk.read.time` -- `storage.disk.weightedio.time` -- `storage.disk.write-max.bytespersecond` -- `storage.disk.write.bytes` -- `storage.disk.write.count` -- `storage.disk.write.time` -- `storage.flush.ingest.count` -- `storage.flush.ingest.table.bytes` -- `storage.flush.ingest.table.count` -- `storage.flush.utilization` -- `storage.ingest.count` -- `storage.iterator.block-load.bytes` -- `storage.iterator.block-load.cached-bytes` -- `storage.iterator.block-load.read-duration` -- `storage.iterator.external.seeks` -- `storage.iterator.external.steps` -- `storage.iterator.internal.seeks` -- `storage.iterator.internal.steps` -- `storage.keys.range-key-set.count` -- `storage.keys.tombstone.count` -- `storage.l0-bytes-flushed` -- `storage.l0-bytes-ingested` -- `storage.l0-level-score` -- `storage.l0-level-size` -- `storage.l0-num-files` -- `storage.l0-sublevels` -- `storage.l1-bytes-ingested` -- `storage.l1-level-score` -- `storage.l1-level-size` -- `storage.l2-bytes-ingested` -- `storage.l2-level-score` -- `storage.l2-level-size` -- `storage.l3-bytes-ingested` -- `storage.l3-level-score` -- `storage.l3-level-size` -- `storage.l4-bytes-ingested` -- `storage.l4-level-score` -- `storage.l4-level-size` -- `storage.l5-bytes-ingested` -- `storage.l5-level-score` -- `storage.l5-level-size` -- `storage.l6-bytes-ingested` -- `storage.l6-level-score` -- `storage.l6-level-size` -- `storage.marked-for-compaction-files` -- `storage.queue.store-failures` -- `storage.secondary-cache.count` -- `storage.secondary-cache.evictions` -- `storage.secondary-cache.reads-full-hit` -- `storage.secondary-cache.reads-multi-block` -- `storage.secondary-cache.reads-multi-shard` -- `storage.secondary-cache.reads-no-hit` -- `storage.secondary-cache.reads-partial-hit` -- `storage.secondary-cache.reads-total` -- `storage.secondary-cache.size` -- `storage.secondary-cache.write-back-failures` -- `storage.shared-storage.read` -- `storage.shared-storage.write` -- `storage.single-delete.ineffectual` -- `storage.single-delete.invariant-violation` -- `storage.sstable.compression.none.count` -- `storage.sstable.compression.snappy.count` -- `storage.sstable.compression.unknown.count` -- `storage.sstable.compression.zstd.count` -- `storage.sstable.zombie.bytes` -- `storage.wal.bytes_in` -- `storage.wal.bytes_written` -- `storage.wal.failover.primary.duration` -- `storage.wal.failover.secondary.duration` -- `storage.wal.failover.switch.count` -- `storage.wal.failover.write_and_sync.latency` -- `storage.wal.fsync.latency` -- `storage.write-amplification` -- `storage.write-stall-nanos` -- `storage.write-stalls` -- `storeliveness.heartbeat.failures` -- `storeliveness.heartbeat.successes` -- `storeliveness.message_handle.failures` -- `storeliveness.message_handle.successes` -- `storeliveness.support_for.stores` -- `storeliveness.support_from.stores` -- `storeliveness.support_withdraw.failures` -- `storeliveness.support_withdraw.successes` -- `storeliveness.transport.receive_dropped` -- `storeliveness.transport.receive-queue-bytes` -- `storeliveness.transport.receive-queue-size` -- `storeliveness.transport.received` -- `storeliveness.transport.send_dropped` -- `storeliveness.transport.send-queue-bytes` -- `storeliveness.transport.send-queue-idle` -- `storeliveness.transport.send-queue-size` -- `storeliveness.transport.sent` -- `sysbytes` -- `syscount` -- `tenant.consumption.cross_region_network_ru` -- `tenant.consumption.external_io_egress_bytes` -- `tenant.consumption.external_io_ingress_bytes` -- `tenant.consumption.kv_request_units` -- `tenant.consumption.pgwire_egress_bytes` -- `tenant.consumption.read_batches` -- `tenant.consumption.read_bytes` -- `tenant.consumption.read_requests` -- `tenant.consumption.request_units` -- `tenant.consumption.sql_pods_cpu_seconds` -- `tenant.consumption.write_batches` -- `tenant.consumption.write_bytes` -- `tenant.consumption.write_requests` -- `timeseries.write.bytes` -- `timeseries.write.errors` -- `timeseries.write.samples` -- `totalbytes` -- `tscache.skl.pages` -- `tscache.skl.rotations` -- `txn.commit_waits.before_commit_trigger` -- `txn.server_side_retry.read_evaluation.failure` -- `txn.server_side_retry.read_evaluation.success` -- `txn.server_side_retry.uncertainty_interval_error.failure` -- `txn.server_side_retry.uncertainty_interval_error.success` -- `txn.server_side_retry.write_evaluation.failure` -- `txn.server_side_retry.write_evaluation.success` -- `txn.server_side.1PC.failure` -- `txn.server_side.1PC.success` -- `txnrecovery.attempts.pending` -- `txnrecovery.attempts.total` -- `txnrecovery.failures` -- `txnrecovery.successes.aborted` -- `txnrecovery.successes.committed` -- `txnrecovery.successes.pending` -- `txnwaitqueue.deadlocks_total` -- `txnwaitqueue.pushee.waiting` -- `txnwaitqueue.pusher.slow` -- `txnwaitqueue.pusher.wait_time` -- `txnwaitqueue.pusher.waiting` -- `txnwaitqueue.query.wait_time` -- `txnwaitqueue.query.waiting` -- `valbytes` -- `valcount` +{% assign names_string = "" %} + +{% for layer in site.data[version].metrics.metrics.layers %} + {% comment %}layer: {{ layer.name }}{% endcomment %} + {% if layer.name == "STORAGE" %} + {% for category in layer.categories %} + {% comment %}category: {{ category.name }}{% endcomment %} + {% for metric in category.metrics %} + {% assign names_string = names_string | append: metric.name | append: "||" %} + {% endfor %} + {% endfor %} + {% endif %} +{% endfor %} + +{% comment %}names_string: {{ names_string }}{% endcomment %} + +{% assign name_list = names_string | split: "||" | uniq | sort %} + +
    + {% for name in name_list %} + {% unless name == "" %} +
  • {{ name }}
  • + {% endunless %} + {% endfor %} +
## See also diff --git a/src/current/v25.3/cluster-virtualization-setting-scopes.md b/src/current/v25.3/cluster-virtualization-setting-scopes.md index 07cedc6c98a..9c98fe06db9 100644 --- a/src/current/v25.3/cluster-virtualization-setting-scopes.md +++ b/src/current/v25.3/cluster-virtualization-setting-scopes.md @@ -18,7 +18,7 @@ When [cluster virtualization]({% link {{ page.version.version }}/cluster-virtual - When a cluster setting is system-visible, it can be set only from the system virtual cluster but can be queried from any virtual cluster. For example, a virtual cluster can query a system-visible cluster setting's value, such as `storage.max_sync_duration`, to help adapt to the CockroachDB cluster's configuration. {% comment %} -Src: cockroach gen metrics-list --format=csv against cockroach-v24.3.0-beta.1.darwin-10.9-amd64 +Src: `cockroach gen settings-list --show-class --format=csv > cluster-settings.csv` against cockroach-v25.3.0-rc.1.darwin-11.0-arm64; Also saved in https://docs.google.com/spreadsheets/d/1HIalzAhwU0CEYzSuG2m1aXSJRpiIyQPJdt8SusHpJ_U/edit?usp=sharing (shared CRL-internal). Sort by the Class column, then Settings column, and paste into the correct section below. @@ -28,382 +28,74 @@ system-only: Scoped to the system virtual cluster system-visible: Can be set / modified only from the system virtual cluster, but can be viewed from a VC {% endcomment %} +{% assign version = site.current_cloud_version | replace: ".", "" %} +{% comment %}version: {{ version }}{% endcomment %} + ## Cluster settings scoped to a virtual cluster {% comment %}Class=application{% endcomment %} -- `admission.epoch_lifo.enabled` -- `admission.epoch_lifo.epoch_closing_delta_duration` -- `admission.epoch_lifo.epoch_duration` -- `admission.epoch_lifo.queue_delay_threshold_to_switch_to_lifo` -- `admission.sql_kv_response.enabled` -- `admission.sql_sql_response.enabled` -- `bulkio.backup.deprecated_full_backup_with_subdir.enabled` -- `bulkio.backup.file_size` -- `bulkio.backup.read_timeout` -- `bulkio.backup.read_with_priority_after` -- `changefeed.aggregator.flush_jitter` -- `changefeed.backfill.concurrent_scan_requests` -- `changefeed.backfill.scan_request_size` -- `changefeed.batch_reduction_retry.enabled (alias: changefeed.batch_reduction_retry_enabled)` -- `changefeed.default_range_distribution_strategy` -- `changefeed.event_consumer_worker_queue_size` -- `changefeed.event_consumer_workers` -- `changefeed.fast_gzip.enabled` -- `changefeed.frontier_highwater_lag_checkpoint_threshold` -- `changefeed.memory.per_changefeed_limit` -- `changefeed.min_highwater_advance` -- `changefeed.node_throttle_config` -- `changefeed.protect_timestamp.max_age` -- `changefeed.protect_timestamp_interval` -- `changefeed.schema_feed.read_with_priority_after` -- `changefeed.sink_io_workers` -- `cloudstorage.azure.concurrent_upload_buffers` -- `cloudstorage.azure.read.node_burst_limit` -- `cloudstorage.azure.read.node_rate_limit` -- `cloudstorage.azure.write.node_burst_limit` -- `cloudstorage.azure.write.node_rate_limit` -- `cloudstorage.gs.read.node_burst_limit` -- `cloudstorage.gs.read.node_rate_limit` -- `cloudstorage.gs.write.node_burst_limit` -- `cloudstorage.gs.write.node_rate_limit` -- `cloudstorage.http.custom_ca` -- `cloudstorage.http.read.node_burst_limit` -- `cloudstorage.http.read.node_rate_limit` -- `cloudstorage.http.write.node_burst_limit` -- `cloudstorage.http.write.node_rate_limit` -- `cloudstorage.nodelocal.read.node_burst_limit` -- `cloudstorage.nodelocal.read.node_rate_limit` -- `cloudstorage.nodelocal.write.node_burst_limit` -- `cloudstorage.nodelocal.write.node_rate_limit` -- `cloudstorage.nullsink.read.node_burst_limit` -- `cloudstorage.nullsink.read.node_rate_limit` -- `cloudstorage.nullsink.write.node_burst_limit` -- `cloudstorage.nullsink.write.node_rate_limit` -- `cloudstorage.s3.read.node_burst_limit` -- `cloudstorage.s3.read.node_rate_limit` -- `cloudstorage.s3.write.node_burst_limit` -- `cloudstorage.s3.write.node_rate_limit` -- `cloudstorage.timeout` -- `cloudstorage.userfile.read.node_burst_limit` -- `cloudstorage.userfile.read.node_rate_limit` -- `cloudstorage.userfile.write.node_burst_limit` -- `cloudstorage.userfile.write.node_rate_limit` -- `cluster.auto_upgrade.enabled` -- `cluster.preserve_downgrade_option` -- `debug.zip.redact_addresses.enabled` -- `diagnostics.forced_sql_stat_reset.interval` -- `diagnostics.reporting.enabled` -- `diagnostics.reporting.interval` -- `external.graphite.endpoint` -- `external.graphite.interval` -- `feature.backup.enabled` -- `feature.changefeed.enabled` -- `feature.export.enabled` -- `feature.import.enabled` -- `feature.restore.enabled` -- `feature.schema_change.enabled` -- `feature.stats.enabled` -- `jobs.retention_time` -- `kv.dist_sender.circuit_breaker.cancellation.enabled` -- `kv.dist_sender.circuit_breaker.cancellation.write_grace_period` -- `kv.dist_sender.circuit_breaker.probe.interval` -- `kv.dist_sender.circuit_breaker.probe.threshold` -- `kv.dist_sender.circuit_breaker.probe.timeout` -- `kv.dist_sender.circuit_breakers.mode` -- `kv.rangefeed.client.stream_startup_rate` -- `kv.transaction.max_intents_bytes` -- `kv.transaction.max_refresh_spans_bytes` -- `kv.transaction.randomized_anchor_key.enabled` -- `kv.transaction.reject_over_max_intents_budget.enabled` -- `kv.transaction.write_pipelining.locking_reads.enabled` -- `kv.transaction.write_pipelining.ranged_writes.enabled` -- `kv.transaction.write_pipelining.enabled (alias: kv.transaction.write_pipelining_enabled)` -- `kv.transaction.write_pipelining.max_batch_size (alias: kv.transaction.write_pipelining_max_batch_size)` -- `obs.tablemetadata.automatic_updates.enabled` -- `obs.tablemetadata.data_valid_duration` -- `schedules.backup.gc_protection.enabled` -- `security.ocsp.mode` -- `security.ocsp.timeout` -- `server.auth_log.sql_connections.enabled` -- `server.auth_log.sql_sessions.enabled` -- `server.authentication_cache.enabled` -- `server.child_metrics.enabled` -- `server.client_cert_expiration_cache.capacity` -- `server.clock.forward_jump_check.enabled (alias: server.clock.forward_jump_check_enabled)` -- `server.clock.persist_upper_bound_interval` -- `server.eventlog.enabled` -- `server.eventlog.ttl` -- `server.host_based_authentication.configuration` -- `server.hot_ranges_request.node.timeout` -- `server.hsts.enabled` -- `server.http.base_path` -- `server.identity_map.configuration` -- `server.jwt_authentication.audience` -- `server.jwt_authentication.claim` -- `server.jwt_authentication.client.timeout` -- `server.jwt_authentication.enabled` -- `server.jwt_authentication.issuers.configuration (alias: server.jwt_authentication.issuers)` -- `server.jwt_authentication.issuers.custom_ca` -- `server.jwt_authentication.jwks` -- `server.jwt_authentication.jwks_auto_fetch.enabled` -- `server.ldap_authentication.client.tls_certificate` -- `server.ldap_authentication.client.tls_key` -- `server.ldap_authentication.domain.custom_ca` -- `server.log_gc.max_deletions_per_cycle` -- `server.log_gc.period` -- `server.max_connections_per_gateway` -- `server.max_open_transactions_per_gateway` -- `server.oidc_authentication.autologin.enabled (alias: server.oidc_authentication.autologin)` -- `server.oidc_authentication.button_text` -- `server.oidc_authentication.claim_json_key` -- `server.oidc_authentication.client.timeout` -- `server.oidc_authentication.client_id` -- `server.oidc_authentication.client_secret` -- `server.oidc_authentication.enabled` -- `server.oidc_authentication.principal_regex` -- `server.oidc_authentication.provider_url` -- `server.oidc_authentication.redirect_url` -- `server.oidc_authentication.scopes` -- `server.redact_sensitive_settings.enabled` -- `server.shutdown.connections.timeout (alias: server.shutdown.connection_wait)` -- `server.shutdown.initial_wait (alias: server.shutdown.drain_wait)` -- `server.shutdown.transactions.timeout (alias: server.shutdown.query_wait)` -- `server.sql_tcp_keep_alive.count` -- `server.sql_tcp_keep_alive.interval` -- `server.time_until_store_dead` -- `server.user_login.cert_password_method.auto_scram_promotion.enabled` -- `server.user_login.downgrade_scram_stored_passwords_to_bcrypt.enabled` -- `server.user_login.min_password_length` -- `server.user_login.password_encryption` -- `server.user_login.password_hashes.default_cost.crdb_bcrypt` -- `server.user_login.password_hashes.default_cost.scram_sha_256` -- `server.user_login.rehash_scram_stored_passwords_on_cost_change.enabled` -- `server.user_login.timeout` -- `server.user_login.upgrade_bcrypt_stored_passwords_to_scram.enabled` -- `server.web_session.purge.ttl` -- `server.web_session.timeout (alias: server.web_session_timeout)` -- `sql.auth.change_own_password.enabled` -- `sql.auth.grant_option_for_owner.enabled` -- `sql.auth.grant_option_inheritance.enabled` -- `sql.auth.public_schema_create_privilege.enabled` -- `sql.auth.resolve_membership_single_scan.enabled` -- `sql.closed_session_cache.capacity` -- `sql.closed_session_cache.time_to_live` -- `sql.contention.event_store.capacity` -- `sql.contention.event_store.duration_threshold` -- `sql.contention.record_serialization_conflicts.enabled` -- `sql.contention.txn_id_cache.max_size` -- `sql.cross_db_fks.enabled` -- `sql.cross_db_sequence_owners.enabled` -- `sql.cross_db_sequence_references.enabled` -- `sql.cross_db_views.enabled` -- `sql.defaults.cost_scans_with_default_col_size.enabled` -- `sql.defaults.datestyle` -- `sql.defaults.default_hash_sharded_index_bucket_count` -- `sql.defaults.default_int_size` -- `sql.defaults.disallow_full_table_scans.enabled` -- `sql.defaults.distsql` -- `sql.defaults.experimental_alter_column_type.enabled` -- `sql.defaults.experimental_distsql_planning` -- `sql.defaults.experimental_enable_unique_without_index_constraints.enabled` -- `sql.defaults.experimental_implicit_column_partitioning.enabled` -- `sql.defaults.experimental_temporary_tables.enabled` -- `sql.defaults.foreign_key_cascades_limit` -- `sql.defaults.idle_in_session_timeout` -- `sql.defaults.idle_in_transaction_session_timeout` -- `sql.defaults.implicit_select_for_update.enabled` -- `sql.defaults.insert_fast_path.enabled` -- `sql.defaults.intervalstyle` -- `sql.defaults.large_full_scan_rows` -- `sql.defaults.locality_optimized_partitioned_index_scan.enabled` -- `sql.defaults.lock_timeout` -- `sql.defaults.on_update_rehome_row.enabled` -- `sql.defaults.optimizer_use_histograms.enabled` -- `sql.defaults.optimizer_use_multicol_stats.enabled` -- `sql.defaults.override_alter_primary_region_in_super_region.enabled` -- `sql.defaults.override_multi_region_zone_config.enabled` -- `sql.defaults.prefer_lookup_joins_for_fks.enabled` -- `sql.defaults.primary_region` -- `sql.defaults.reorder_joins_limit` -- `sql.defaults.require_explicit_primary_keys.enabled` -- `sql.defaults.results_buffer.size` -- `sql.defaults.serial_normalization` -- `sql.defaults.statement_timeout` -- `sql.defaults.stub_catalog_tables.enabled` -- `sql.defaults.super_regions.enabled` -- `sql.defaults.transaction_rows_read_err` -- `sql.defaults.transaction_rows_read_log` -- `sql.defaults.transaction_rows_written_err` -- `sql.defaults.transaction_rows_written_log` -- `sql.defaults.use_declarative_schema_changer` -- `sql.defaults.vectorize` -- `sql.defaults.zigzag_join.enabled` -- `sql.distsql.temp_storage.workmem` -- `sql.guardrails.max_row_size_err` -- `sql.guardrails.max_row_size_log` -- `sql.hash_sharded_range_pre_split.max` -- `sql.index_recommendation.drop_unused_duration` -- `sql.insights.anomaly_detection.enabled` -- `sql.insights.anomaly_detection.latency_threshold` -- `sql.insights.anomaly_detection.memory_limit` -- `sql.insights.execution_insights_capacity` -- `sql.insights.high_retry_count.threshold` -- `sql.insights.latency_threshold` -- `sql.log.slow_query.experimental_full_table_scans.enabled` -- `sql.log.slow_query.internal_queries.enabled` -- `sql.log.slow_query.latency_threshold` -- `sql.log.user_audit` -- `sql.log.user_audit.reduced_config.enabled` -- `sql.metrics.index_usage_stats.enabled` -- `sql.metrics.max_mem_reported_stmt_fingerprints` -- `sql.metrics.max_mem_reported_txn_fingerprints` -- `sql.metrics.max_mem_stmt_fingerprints` -- `sql.metrics.max_mem_txn_fingerprints` -- `sql.metrics.statement_details.dump_to_logs.enabled (alias: sql.metrics.statement_details.dump_to_logs)` -- `sql.metrics.statement_details.enabled` -- `sql.metrics.statement_details.gateway_node.enabled` -- `sql.metrics.statement_details.index_recommendation_collection.enabled` -- `sql.metrics.statement_details.max_mem_reported_idx_recommendations` -- `sql.metrics.statement_details.plan_collection.enabled` -- `sql.metrics.statement_details.plan_collection.period` -- `sql.metrics.statement_details.threshold` -- `sql.metrics.transaction_details.enabled` -- `sql.multiple_modifications_of_table.enabled` -- `sql.multiregion.drop_primary_region.enabled` -- `sql.notices.enabled` -- `sql.optimizer.uniqueness_checks_for_gen_random_uuid.enabled` -- `sql.spatial.experimental_box2d_comparison_operators.enabled` -- `sql.stats.activity.persisted_rows.max` -- `sql.stats.automatic_collection.enabled` -- `sql.stats.automatic_collection.fraction_stale_rows` -- `sql.stats.automatic_collection.min_stale_rows` -- `sql.stats.automatic_partial_collection.enabled` -- `sql.stats.automatic_partial_collection.fraction_stale_rows` -- `sql.stats.automatic_partial_collection.min_stale_rows` -- `sql.stats.cleanup.recurrence` -- `sql.stats.flush.enabled` -- `sql.stats.flush.interval` -- `sql.stats.forecasts.enabled` -- `sql.stats.forecasts.max_decrease` -- `sql.stats.forecasts.min_goodness_of_fit` -- `sql.stats.forecasts.min_observations` -- `sql.stats.histogram_buckets.count` -- `sql.stats.histogram_buckets.include_most_common_values.enabled` -- `sql.stats.histogram_buckets.max_fraction_most_common_values` -- `sql.stats.histogram_collection.enabled` -- `sql.stats.histogram_samples.count` -- `sql.stats.multi_column_collection.enabled` -- `sql.stats.non_default_columns.min_retention_period` -- `sql.stats.persisted_rows.max` -- `sql.stats.post_events.enabled` -- `sql.stats.response.max` -- `sql.stats.response.show_internal.enabled` -- `sql.stats.system_tables.enabled` -- `sql.stats.system_tables_autostats.enabled` -- `sql.stats.virtual_computed_columns.enabled` -- `sql.telemetry.query_sampling.enabled` -- `sql.telemetry.query_sampling.internal.enabled` -- `sql.telemetry.query_sampling.max_event_frequency` -- `sql.telemetry.query_sampling.mode` -- `sql.telemetry.transaction_sampling.max_event_frequency` -- `sql.telemetry.transaction_sampling.statement_events_per_transaction.max` -- `sql.temp_object_cleaner.cleanup_interval` -- `sql.temp_object_cleaner.wait_interval` -- `sql.log.all_statements.enabled (alias: sql.trace.log_statement_execute)` -- `sql.trace.stmt.enable_threshold` -- `sql.trace.txn.enable_threshold` -- `sql.ttl.changefeed_replication.disabled` -- `sql.ttl.default_delete_batch_size` -- `sql.ttl.default_delete_rate_limit` -- `sql.ttl.default_select_batch_size` -- `sql.ttl.default_select_rate_limit` -- `sql.ttl.job.enabled` -- `sql.txn.read_committed_isolation.enabled` -- `sql.txn.repeatable_read_isolation.enabled (alias: sql.txn.snapshot_isolation.enabled)` -- `sql.txn_fingerprint_id_cache.capacity` -- `storage.ingestion.value_blocks.enabled` -- `storage.max_sync_duration.fatal.enabled` -- `trace.debug_http_endpoint.enabled (alias: trace.debug.enable)` -- `trace.opentelemetry.collector` -- `trace.snapshot.rate` -- `trace.span_registry.enabled` -- `trace.zipkin.collector` -- `ui.display_timezone` -- `version` +{% assign app_settings_string = "" %} + +{% for row in site.data[version].cluster-settings %} + {% if row.class == "application" %} + {% assign app_settings_string = app_settings_string | append: row.setting | append: "||" %} + {% endif %} +{% endfor %} + +{% assign app_settings = app_settings_string | split: "||" | uniq | sort %} + +
    + {% for setting in app_settings %} + {% unless setting == "" %} +
  • {{ setting }}
  • + {% endunless %} + {% endfor %} +
## Cluster settings scoped to the system virtual cluster {% comment %}Class=system-only{% endcomment %} -- `admission.disk_bandwidth_tokens.elastic.enabled` -- `admission.kv.enabled` -- `bulkio.stream_ingestion.minimum_flush_interval` -- `physical_replication.consumer.minimum_flush_interval` -- `kv.allocator.lease_rebalance_threshold` -- `kv.allocator.load_based_lease_rebalancing.enabled` -- `kv.allocator.load_based_rebalancing` -- `kv.allocator.load_based_rebalancing.objective` -- `kv.allocator.load_based_rebalancing_interval` -- `kv.allocator.qps_rebalance_threshold` -- `kv.allocator.range_rebalance_threshold` -- `kv.allocator.store_cpu_rebalance_threshold` -- `kv.bulk_io_write.max_rate` -- `kv.bulk_sst.max_allowed_overage` -- `kv.lease_transfer_read_summary.global_budget` -- `kv.lease_transfer_read_summary.local_budget` -- `kv.log_range_and_node_events.enabled` -- `kv.raft.leader_fortification.fraction_enabled` -- `kv.range.range_size_hard_cap` -- `kv.range_split.by_load.enabled (alias: kv.range_split.by_load_enabled)` -- `kv.range_split.load_cpu_threshold` -- `kv.range_split.load_qps_threshold` -- `kv.replica_circuit_breaker.slow_replication_threshold` -- `kv.replica_stats.addsst_request_size_factor` -- `kv.replication_reports.interval` -- `kv.snapshot_rebalance.max_rate` -- `kv.snapshot_receiver.excise.enabled` -- `kvadmission.store.provisioned_bandwidth` -- `kvadmission.store.snapshot_ingest_bandwidth_control.enabled` -- `server.consistency_check.max_rate` -- `server.rangelog.ttl` -- `server.shutdown.lease_transfer_iteration.timeout (alias: server.shutdown.lease_transfer_wait)` -- `spanconfig.bounds.enabled` -- `spanconfig.range_coalescing.system.enabled` -- `spanconfig.range_coalescing.application.enabled` -- `spanconfig.storage_coalesce_adjacent.enabled` -- `spanconfig.tenant_coalesce_adjacent.enabled` -- `storage.experimental.eventually_file_only_snapshots.enabled` -- `storage.ingest_split.enabled` -- `storage.wal_failover.unhealthy_op_threshold` -- `timeseries.storage.enabled` +{% assign app_settings_string = "" %} + +{% for row in site.data[version].cluster-settings %} + {% if row.class == "system-only" %} + {% assign app_settings_string = app_settings_string | append: row.setting | append: "||" %} + {% endif %} +{% endfor %} + +{% assign app_settings = app_settings_string | split: "||" | uniq | sort %} + +
    + {% for setting in app_settings %} + {% unless setting == "" %} +
  • {{ setting }}
  • + {% endunless %} + {% endfor %} +
## System-visible cluster settings {% comment %}Class=system-visible{% endcomment %} -- `cluster.organization` -- `diagnostics.active_query_dumps.enabled` -- `diagnostics.memory_monitoring_dumps.enabled` -- `enterprise.license` -- `kv.bulk_sst.target_size` -- `kv.closed_timestamp.follower_reads.enabled (alias: kv.closed_timestamp.follower_reads_enabled)` -- `kv.closed_timestamp.lead_for_global_reads_override` -- `kv.closed_timestamp.side_transport_interval` -- `kv.closed_timestamp.target_duration` -- `kv.protectedts.reconciliation.interval` -- `kv.rangefeed.closed_timestamp_refresh_interval` -- `kv.rangefeed.enabled` -- `security.client_cert.subject_required.enabled` -- `sql.schema.telemetry.recurrence` -- `storage.columnar_blocks.enabled` -- `storage.delete_compaction_excise.enabled` -- `storage.max_sync_duration` -- `storage.sstable.compression_algorithm` -- `storage.sstable.compression_algorithm_backup_storage` -- `storage.sstable.compression_algorithm_backup_transport` -- `timeseries.storage.resolution_10s.ttl` -- `timeseries.storage.resolution_30m.ttl` +{% assign app_settings_string = "" %} + +{% for row in site.data[version].cluster-settings %} + {% if row.class == "system-visible" %} + {% assign app_settings_string = app_settings_string | append: row.setting | append: "||" %} + {% endif %} +{% endfor %} + +{% assign app_settings = app_settings_string | split: "||" | uniq | sort %} + +
    + {% for setting in app_settings %} + {% unless setting == "" %} +
  • {{ setting }}
  • + {% endunless %} + {% endfor %} +
## See also