diff --git a/src/current/_data/v25.3/metrics/available-metrics-not-in-metrics-list.csv b/src/current/_data/v25.3/metrics/available-metrics-not-in-metrics-list.csv deleted file mode 100644 index 1cd86aace0a..00000000000 --- a/src/current/_data/v25.3/metrics/available-metrics-not-in-metrics-list.csv +++ /dev/null @@ -1,19 +0,0 @@ -metric_id,description,y-axis label,type,unit -"security.certificate.expiration.ca","Expiration for the CA certificate. 0 means no certificate or error.","Certificate Expiration",GAUGE,TIMESTAMP_SEC -"security.certificate.expiration.client-ca","Expiration for the client CA certificate. 0 means no certificate or error.","Certificate Expiration",GAUGE,TIMESTAMP_SEC -"security.certificate.expiration.client","Minimum expiration for client certificates, labeled by SQL user. 0 means no certificate or error.","Certificate Expiration",GAUGE,TIMESTAMP_SEC -"security.certificate.expiration.ui-ca","Expiration for the UI CA certificate. 0 means no certificate or error.","Certificate Expiration",GAUGE,TIMESTAMP_SEC -"security.certificate.expiration.node","Expiration for the node certificate. 0 means no certificate or error.","Certificate Expiration",GAUGE,TIMESTAMP_SEC -"security.certificate.expiration.node-client","Expiration for the node's client certificate. 0 means no certificate or error.","Certificate Expiration",GAUGE,TIMESTAMP_SEC -"security.certificate.expiration.ui","Expiration for the UI certificate. 0 means no certificate or error.","Certificate Expiration",GAUGE,TIMESTAMP_SEC -"security.certificate.expiration.ca-client-tenant","Expiration for the Tenant Client CA certificate. 0 means no certificate or error.","Certificate Expiration",GAUGE,TIMESTAMP_SEC -"security.certificate.expiration.client-tenant","Expiration for the Tenant Client certificate. 0 means no certificate or error.","Certificate Expiration",GAUGE,TIMESTAMP_SEC -"security.certificate.ttl.ca","Seconds till expiration for the CA certificate. 0 means expired, no certificate or error.","Certificate TTL",GAUGE,TIMESTAMP_SEC -"security.certificate.ttl.client-ca","Seconds till expiration for the client CA certificate. 0 means expired, no certificate or error.","Certificate TTL",GAUGE,TIMESTAMP_SEC -"security.certificate.ttl.client","Seconds till expiration for the client certificates, labeled by SQL user. 0 means expired, no certificate or error.","Certificate TTL",GAUGE,TIMESTAMP_SEC -"security.certificate.ttl.ui-ca","Seconds till expiration for the UI CA certificate. 0 means expired, no certificate or error.","Certificate TTL",GAUGE,TIMESTAMP_SEC -"security.certificate.ttl.node","Seconds till expiration for the node certificate. 0 means expired, no certificate or error.","Certificate TTL",GAUGE,TIMESTAMP_SEC -"security.certificate.ttl.node-client","Seconds till expiration for the node's client certificate. 0 means expired, no certificate or error.","Certificate TTL",GAUGE,TIMESTAMP_SEC -"security.certificate.ttl.ui","Seconds till expiration for the UI certificate. 0 means expired, no certificate or error.","Certificate TTL",GAUGE,TIMESTAMP_SEC -"security.certificate.ttl.ca-client-tenant","Seconds till expiration for the Tenant Client CA certificate. 0 means expired, no certificate or error.","Certificate TTL",GAUGE,TIMESTAMP_SEC -"security.certificate.ttl.client-tenant","Seconds till expiration for the Tenant Client certificate. 0 means expired, no certificate or error.","Certificate TTL",GAUGE,TIMESTAMP_SEC \ No newline at end of file diff --git a/src/current/_data/v25.3/metrics/available-metrics-not-in-metrics-list.yaml b/src/current/_data/v25.3/metrics/available-metrics-not-in-metrics-list.yaml new file mode 100644 index 00000000000..2467c98e316 --- /dev/null +++ b/src/current/_data/v25.3/metrics/available-metrics-not-in-metrics-list.yaml @@ -0,0 +1,133 @@ +- metric_id: seconds.until.enterprise.license.expiry + description: Seconds until enterprise license expiry (0 if no license present or running without enterprise features) + y-axis label: License Expiration + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. + essential: true +- metric_id: security.certificate.expiration.ca + labeled_name: 'security.certificate.expiration{certificate_type=ca}' + description: Expiration for the CA certificate. 0 means no certificate or error. + y-axis label: Certificate Expiration + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. + essential: true +- metric_id: security.certificate.expiration.client-ca + labeled_name: 'security.certificate.expiration{certificate_type=client-ca}' + description: Expiration for the client CA certificate. 0 means no certificate or error. + y-axis label: Certificate Expiration + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. + essential: true +- metric_id: security.certificate.expiration.client + labeled_name: 'security.certificate.expiration{certificate_type=client}' + description: Minimum expiration for client certificates, labeled by SQL user. 0 means no certificate or error. + y-axis label: Certificate Expiration + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. + essential: true +- metric_id: security.certificate.expiration.ui-ca + labeled_name: 'security.certificate.expiration{certificate_type=ui-ca}' + description: Expiration for the UI CA certificate. 0 means no certificate or error. + y-axis label: Certificate Expiration + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. + essential: true +- metric_id: security.certificate.expiration.node + labeled_name: 'security.certificate.expiration{certificate_type=node}' + description: Expiration for the node certificate. 0 means no certificate or error. + y-axis label: Certificate Expiration + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. + essential: true +- metric_id: security.certificate.expiration.node-client + labeled_name: 'security.certificate.expiration{certificate_type=node-client}' + description: Expiration for the node's client certificate. 0 means no certificate or error. + y-axis label: Certificate Expiration + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. + essential: true +- metric_id: security.certificate.expiration.ui + labeled_name: 'security.certificate.expiration{certificate_type=ui}' + description: Expiration for the UI certificate. 0 means no certificate or error. + y-axis label: Certificate Expiration + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. + essential: true +- metric_id: security.certificate.expiration.ca-client-tenant + labeled_name: 'security.certificate.expiration{certificate_type=ca-client-tenant}' + description: Expiration for the Tenant Client CA certificate. 0 means no certificate or error. + y-axis label: Certificate Expiration + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. + essential: true +- metric_id: security.certificate.expiration.client-tenant + labeled_name: 'security.certificate.expiration{certificate_type=client-tenant}' + description: Expiration for the Tenant Client certificate. 0 means no certificate or error. + y-axis label: Certificate Expiration + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. + essential: true +- metric_id: security.certificate.ttl.ca + description: Seconds till expiration for the CA certificate. 0 means expired, no certificate or error. + y-axis label: Certificate TTL + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. +- metric_id: security.certificate.ttl.client-ca + description: Seconds till expiration for the client CA certificate. 0 means expired, no certificate or error. + y-axis label: Certificate TTL + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. +- metric_id: security.certificate.ttl.client + description: Seconds till expiration for the client certificates, labeled by SQL user. 0 means expired, no certificate or error. + y-axis label: Certificate TTL + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. +- metric_id: security.certificate.ttl.ui-ca + description: Seconds till expiration for the UI CA certificate. 0 means expired, no certificate or error. + y-axis label: Certificate TTL + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. +- metric_id: security.certificate.ttl.node + description: Seconds till expiration for the node certificate. 0 means expired, no certificate or error. + y-axis label: Certificate TTL + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. +- metric_id: security.certificate.ttl.node-client + description: Seconds till expiration for the node's client certificate. 0 means expired, no certificate or error. + y-axis label: Certificate TTL + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. +- metric_id: security.certificate.ttl.ui + description: Seconds till expiration for the UI certificate. 0 means expired, no certificate or error. + y-axis label: Certificate TTL + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. +- metric_id: security.certificate.ttl.ca-client-tenant + description: Seconds till expiration for the Tenant Client CA certificate. 0 means expired, no certificate or error. + y-axis label: Certificate TTL + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. +- metric_id: security.certificate.ttl.client-tenant + description: Seconds till expiration for the Tenant Client certificate. 0 means expired, no certificate or error. + y-axis label: Certificate TTL + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. diff --git a/src/current/_data/v25.3/metrics/datadog-cockroachdb.csv b/src/current/_data/v25.3/metrics/datadog-cockroachdb.csv new file mode 100644 index 00000000000..fcca1607102 --- /dev/null +++ b/src/current/_data/v25.3/metrics/datadog-cockroachdb.csv @@ -0,0 +1,2779 @@ +prefix,datadog_id,type,description +cockroachdb,abortspanbytes,gauge,"Number of bytes in the abort span +Shown as byte" +cockroachdb,addsstable.applications,count,"[OpenMetrics v1] Number of SSTable ingestions applied (i.e. applied by Replicas) +Shown as operation" +cockroachdb,addsstable.applications.count,count,"[OpenMetrics v2] Number of SSTable ingestions applied (i.e. applied by Replicas) +Shown as operation" +cockroachdb,addsstable.copies,count,"[OpenMetrics v1] number of SSTable ingestions that required copying files during application +Shown as operation" +cockroachdb,addsstable.copies.count,count,"[OpenMetrics v2] number of SSTable ingestions that required copying files during application +Shown as operation" +cockroachdb,addsstable.delay.count,count,"Amount by which evaluation of AddSSTable requests was delayed +Shown as nanosecond" +cockroachdb,addsstable.delay.enginebackpressure.count,count,"Amount by which evaluation of AddSSTable requests was delayed by storage-engine backpressure +Shown as nanosecond" +cockroachdb,addsstable.proposals,count,"[OpenMetrics v1] Number of SSTable ingestions proposed (i.e. sent to Raft by lease holders) +Shown as operation" +cockroachdb,addsstable.proposals.count,count,"[OpenMetrics v2] Number of SSTable ingestions proposed (i.e. sent to Raft by lease holders) +Shown as operation" +cockroachdb,admission.admitted.elastic_cpu.bulk_normal_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.elastic_cpu.count,count,Number of requests admitted +cockroachdb,admission.admitted.elastic_cpu.normal_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.kv,count,"[OpenMetrics v1] Number of KV requests admitted +Shown as request" +cockroachdb,admission.admitted.kv.bulk_normal_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.kv.count,count,"[OpenMetrics v2] Number of KV requests admitted +Shown as request" +cockroachdb,admission.admitted.kv.high_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.kv.locking_normal_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.kv.normal_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.kv_stores,count,"[OpenMetrics v1] Number of KV stores requests admitted +Shown as request" +cockroachdb,admission.admitted.kv_stores.bulk_normal_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.kv_stores.count,count,"[OpenMetrics v2] Number of KV stores requests admitted +Shown as request" +cockroachdb,admission.admitted.kv_stores.high_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.kv_stores.locking_normal_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.kv_stores.normal_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.kv_stores.ttl_low_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.sql.leaf.start,count,"[OpenMetrics v1] Number of SQL leaf start requests admitted +Shown as request" +cockroachdb,admission.admitted.sql.leaf.start.count,count,"[OpenMetrics v2] Number of SQL leaf start requests admitted +Shown as request" +cockroachdb,admission.admitted.sql.root.start,count,"[OpenMetrics v1] Number of SQL root start requests admitted +Shown as request" +cockroachdb,admission.admitted.sql.root.start.count,count,"[OpenMetrics v2] Number of SQL root start requests admitted +Shown as request" +cockroachdb,admission.admitted.sql_kv.response,count,"[OpenMetrics v1] Number of SQL KV response requests admitted +Shown as request" +cockroachdb,admission.admitted.sql_kv.response.count,count,"[OpenMetrics v2] Number of SQL KV response requests admitted +Shown as request" +cockroachdb,admission.admitted.sql_kv_response.locking_normal_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.sql_kv_response.normal_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.sql_leaf_start.count,count,Number of requests admitted +cockroachdb,admission.admitted.sql_leaf_start.locking_normal_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.sql_leaf_start.normal_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.sql_root_start.count,count,Number of requests admitted +cockroachdb,admission.admitted.sql_root_start.locking_normal_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.sql_root_start.normal_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.sql_sql.response,count,"[OpenMetrics v1] Number of Distributed SQL response requests admitted +Shown as request" +cockroachdb,admission.admitted.sql_sql.response.count,count,"[OpenMetrics v2] Number of Distributed SQL response requests admitted +Shown as request" +cockroachdb,admission.admitted.sql_sql_response.locking_normal_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.sql_sql_response.normal_pri.count,count,Number of requests admitted +cockroachdb,admission.elastic_cpu.acquired_nanos.count,count,"Total CPU nanoseconds acquired by elastic work +Shown as nanosecond" +cockroachdb,admission.elastic_cpu.available_nanos,gauge,"Instantaneous available CPU nanoseconds per second ignoring utilization limit +Shown as nanosecond" +cockroachdb,admission.elastic_cpu.max_available_nanos.count,count,"Maximum available CPU nanoseconds per second ignoring utilization limit +Shown as nanosecond" +cockroachdb,admission.elastic_cpu.nanos_exhausted_duration,gauge,"Total duration when elastic CPU nanoseconds were exhausted, in micros" +cockroachdb,admission.elastic_cpu.over_limit_durations.bucket,count,"Measurement of how much over the prescribed limit elastic requests ran (not recorded if requests don’t run over) +Shown as nanosecond" +cockroachdb,admission.elastic_cpu.over_limit_durations.count,count,"Measurement of how much over the prescribed limit elastic requests ran (not recorded if requests don’t run over) +Shown as nanosecond" +cockroachdb,admission.elastic_cpu.over_limit_durations.sum,count,"Measurement of how much over the prescribed limit elastic requests ran (not recorded if requests don’t run over) +Shown as nanosecond" +cockroachdb,admission.elastic_cpu.pre_work_nanos.count,count,"Total CPU nanoseconds spent doing pre-work, before doing elastic work +Shown as nanosecond" +cockroachdb,admission.elastic_cpu.returned_nanos.count,count,"Total CPU nanoseconds returned by elastic work +Shown as nanosecond" +cockroachdb,admission.elastic_cpu.utilization,gauge,"CPU utilization by elastic work +Shown as percent" +cockroachdb,admission.elastic_cpu.utilization_limit,gauge,"Utilization limit set for the elastic CPU work +Shown as percent" +cockroachdb,admission.errored.elastic_cpu.bulk_normal_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.elastic_cpu.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.elastic_cpu.normal_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.kv.bulk_normal_pri.count,count,Number of requests admitted +cockroachdb,admission.errored.kv.count,count,"[OpenMetrics v1] Number of KV requests not admitted due to error +Shown as request" +cockroachdb,admission.errored.kv.countt,count,"[OpenMetrics v2] Number of KV requests not admitted due to error +Shown as request" +cockroachdb,admission.errored.kv.high_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.kv.locking_normal_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.kv.normal_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.kv_stores.bulk_normal_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.kv_stores.count,count,"[OpenMetrics v1] Number of KV stores requests not admitted due to error +Shown as request" +cockroachdb,admission.errored.kv_stores.countt,count,"[OpenMetrics v2] Number of KV stores requests not admitted due to error +Shown as request" +cockroachdb,admission.errored.kv_stores.high_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.kv_stores.locking_normal_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.kv_stores.normal_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.kv_stores.ttl_low_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.sql.leaf.start,count,"[OpenMetrics v1] Number of SQL leaf start requests not admitted due to error +Shown as request" +cockroachdb,admission.errored.sql.leaf.start.count,count,"[OpenMetrics v2] Number of SQL leaf start requests not admitted due to error +Shown as request" +cockroachdb,admission.errored.sql.root.start,count,"[OpenMetrics v1] Number of SQL root start requests not admitted due to error +Shown as request" +cockroachdb,admission.errored.sql.root.start.count,count,"[OpenMetrics v2] Number of SQL root start requests not admitted due to error +Shown as request" +cockroachdb,admission.errored.sql_kv.response,count,"[OpenMetrics v1] Number of SQL KV requests not admitted due to error +Shown as request" +cockroachdb,admission.errored.sql_kv.response.count,count,"[OpenMetrics v2] Number of SQL KV requests not admitted due to error +Shown as request" +cockroachdb,admission.errored.sql_kv_response.locking_normal_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.sql_kv_response.normal_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.sql_leaf_start.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.sql_leaf_start.locking_normal_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.sql_leaf_start.normal_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.sql_root_start.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.sql_root_start.locking_normal_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.sql_root_start.normal_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.sql_sql.response,count,"[OpenMetrics v1] Number of Distributed SQL requests not admitted due to error +Shown as request" +cockroachdb,admission.errored.sql_sql.response.count,count,"[OpenMetrics v2] Number of Distributed SQL start requests not admitted due to error +Shown as request" +cockroachdb,admission.errored.sql_sql_response.locking_normal_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.sql_sql_response.normal_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.granter.cpu_load_long_period_duration.kv.count,count,"Total duration when CPULoad was being called with a long period, in micros" +cockroachdb,admission.granter.cpu_load_short_period_duration.kv.count,count,"Total duration when CPULoad was being called with a short period, in micros" +cockroachdb,admission.granter.elastic_io_tokens_available.kv,gauge,Number of tokens available +cockroachdb,admission.granter.io.tokens.exhausted.duration.kv,count,"[OpenMetrics v1] Total duration when IO tokens were exhausted, in micros +Shown as microsecond" +cockroachdb,admission.granter.io.tokens.exhausted.duration.kv.count,count,"[OpenMetrics v2] Total duration when IO tokens were exhausted, in micros +Shown as microsecond" +cockroachdb,admission.granter.io_tokens_available.kv,gauge,Number of tokens available +cockroachdb,admission.granter.io_tokens_bypassed.kv.count,count,"Total number of tokens taken by work bypassing admission control (for example, follower writes without flow control)" +cockroachdb,admission.granter.io_tokens_exhausted_duration.kv.count,count,"Total duration when IO tokens were exhausted, in micros" +cockroachdb,admission.granter.io_tokens_returned.kv.count,count,Total number of tokens returned +cockroachdb,admission.granter.io_tokens_taken.kv.count,count,Total number of tokens taken +cockroachdb,admission.granter.slot_adjuster_decrements.kv.count,count,Number of decrements of the total KV slots +cockroachdb,admission.granter.slot_adjuster_increments.kv.count,count,Number of increments of the total KV slots +cockroachdb,admission.granter.slots_exhausted_duration.kv.count,count,"Total duration when KV slots were exhausted, in micros" +cockroachdb,admission.granter.total.slots.kv,gauge,[OpenMetrics v1 & v2] Total slots for KV work +cockroachdb,admission.granter.total_slots.kv,gauge,Total slots for kv work +cockroachdb,admission.granter.used.slots.kv,gauge,[OpenMetrics v1 & v2] Used slots for KV work +cockroachdb,admission.granter.used.slots.sql.leaf.start,gauge,[OpenMetrics v1 & v2] Used slots for SQL leaf start work +cockroachdb,admission.granter.used.slots.sql.root.start,gauge,[OpenMetrics v1 & v2] Used slots for SQL root start work +cockroachdb,admission.granter.used_slots.kv,gauge,Used slots +cockroachdb,admission.granter.used_slots.sql_leaf_start,gauge,Used slots +cockroachdb,admission.granter.used_slots.sql_root_start,gauge,Used slots +cockroachdb,admission.io.overload,gauge,1-normalized float indicating whether IO admission control considers the store as overloaded with respect to compaction out of L0 (considers sub-level and file counts). +cockroachdb,admission.l0_compacted_bytes.kv.count,count,Total bytes compacted out of L0 (used to generate IO tokens) +cockroachdb,admission.l0_tokens_produced.kv.count,count,Total number of generated token of L0 +cockroachdb,admission.raft.paused_replicas,gauge,"Number of followers (i.e. Replicas) to which replication is currently paused to help them recover from I/O overload.Such Replicas will be ignored for the purposes of proposal quota, and will notreceive replication traffic. They are essentially treated as offline for thepurpose of replication. This serves as a crude form of admission control.The count is emitted by the leaseholder of each range." +cockroachdb,admission.raft.paused_replicas_dropped_msgs.count,count,Number of messages dropped instead of being sent to paused replicas.The messages are dropped to help these replicas to recover from I/O overload. +cockroachdb,admission.requested.elastic_cpu.bulk_normal_pri.count,count,Number of requests +cockroachdb,admission.requested.elastic_cpu.count,count,Number of requests +cockroachdb,admission.requested.elastic_cpu.normal_pri.count,count,Number of requests +cockroachdb,admission.requested.kv,count,"[OpenMetrics v1] Number of KV admission requests +Shown as request" +cockroachdb,admission.requested.kv.bulk_normal_pri.count,count,Number of requests admitted +cockroachdb,admission.requested.kv.count,count,"[OpenMetrics v2] Number of KV admission requests +Shown as request" +cockroachdb,admission.requested.kv.high_pri.count,count,Number of requests +cockroachdb,admission.requested.kv.locking_normal_pri.count,count,Number of requests +cockroachdb,admission.requested.kv.normal_pri.count,count,Number of requests +cockroachdb,admission.requested.kv_stores.bulk_normal_pri.count,count,Number of requests +cockroachdb,admission.requested.kv_stores.count,count,"[OpenMetrics v2] Number of KV stores admission requests +Shown as request" +cockroachdb,admission.requested.kv_stores.high_pri.count,count,Number of requests +cockroachdb,admission.requested.kv_stores.locking_normal_pri.count,count,Number of requests +cockroachdb,admission.requested.kv_stores.normal_pri.count,count,Number of requests +cockroachdb,admission.requested.kv_stores.ttl_low_pri.count,count,Number of requests +cockroachdb,admission.requested.sql.leaf.start,count,"[OpenMetrics v1] Number of SQL leaf start admission requests +Shown as request" +cockroachdb,admission.requested.sql.leaf.start.count,count,"[OpenMetrics v2] Number of SQL leaf start admission requests +Shown as request" +cockroachdb,admission.requested.sql_kv.response,count,"[OpenMetrics v1] Number of SQL KV admission requests +Shown as request" +cockroachdb,admission.requested.sql_kv.response.count,count,"[OpenMetrics v2] Number of SQL KV admission requests +Shown as request" +cockroachdb,admission.requested.sql_kv_response.locking_normal_pri.count,count,Number of requests +cockroachdb,admission.requested.sql_kv_response.normal_pri.count,count,Number of requests +cockroachdb,admission.requested.sql_leaf_start.locking_normal_pri.count,count,Number of requests +cockroachdb,admission.requested.sql_leaf_start.normal_pri.count,count,Number of requests +cockroachdb,admission.requested.sql_root_start.count,count,Number of requests +cockroachdb,admission.requested.sql_root_start.locking_normal_pri.count,count,Number of requests +cockroachdb,admission.requested.sql_root_start.normal_pri.count,count,Number of requests +cockroachdb,admission.requested.sql_sql.response,count,"[OpenMetrics v1] Number of Distributed SQL admission requests +Shown as request" +cockroachdb,admission.requested.sql_sql.response.count,count,"[OpenMetrics v2] Number of Distributed SQL admission requests +Shown as request" +cockroachdb,admission.requested.sql_sql_response.locking_normal_pri.count,count,Number of requests +cockroachdb,admission.requested.sql_sql_response.normal_pri.count,count,Number of requests +cockroachdb,admission.scheduler_latency_listener.p99_nanos,gauge,"The scheduling latency at p99 as observed by the scheduler latency listener +Shown as nanosecond" +cockroachdb,admission.wait.durations.kv,gauge,"[OpenMetrics v1] Wait time durations for KV requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.kv.bucket,count,"[OpenMetrics v2] Wait time durations for KV requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.kv.count,count,"[OpenMetrics v2] Wait time durations for KV requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.kv.sum,count,"[OpenMetrics v2] Wait time durations for KV requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.kv_stores,gauge,"[OpenMetrics v1] Wait time durations for KV stores requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.kv_stores.bucket,count,"[OpenMetrics v2] Wait time durations for KV stores requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.kv_stores.count,count,"[OpenMetrics v2] Wait time durations for KV stores requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.kv_stores.sum,count,"[OpenMetrics v2] Wait time durations for KV stores requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.sql.leaf.start,gauge,"[OpenMetrics v1] Wait time durations for SQL leaf start requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.sql.leaf.start.bucket,count,"[OpenMetrics v2] Wait time durations for SQL leaf start requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.sql.leaf.start.count,count,"[OpenMetrics v2] Wait time durations for SQL leaf start requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.sql.leaf.start.sum,count,"[OpenMetrics v2] Wait time durations for SQL leaf start requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.sql_kv.response,gauge,"[OpenMetrics v1] Wait time durations for SQL KV response requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.sql_kv.response.bucket,count,"[OpenMetrics v2] Wait time durations for SQL KV response requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.sql_kv.response.count,count,"[OpenMetrics v2] Wait time durations for SQL KV response requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.sql_kv.response.sum,count,"[OpenMetrics v2] Wait time durations for SQL KV response requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.sql_sql.response,gauge,"[OpenMetrics v1] Wait time durations for Distributed SQL requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.sql_sql.response.bucket,count,"[OpenMetrics v2] Wait time durations for Distributed SQL requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.sql_sql.response.count,count,"[OpenMetrics v2] Wait time durations for Distributed SQL requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.sql_sql.response.sum,count,"[OpenMetrics v2] Wait time durations for Distributed SQL requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.queue.length.kv,gauge,[OpenMetrics v1 & v2] Length of KV wait queue +cockroachdb,admission.wait.queue.length.kv_stores,gauge,[OpenMetrics v1 & v2] Length of KV stores wait queue +cockroachdb,admission.wait.queue.length.sql.leaf.start,gauge,[OpenMetrics v1 & v2] Length of SQL leaf start wait queue +cockroachdb,admission.wait.queue.length.sql_kv.response,gauge,[OpenMetrics v1 & v2] Length of SQL KV wait queue +cockroachdb,admission.wait.queue.length.sql_sql.response,gauge,[OpenMetrics v1 & v2] Length of Distributed SQL wait queue +cockroachdb,admission.wait.queue.lengths.sql.root.start,gauge,[OpenMetrics v1 & v2] Length of SQL root start wait queue +cockroachdb,admission.wait.sum.kv,count,"[OpenMetrics v1] Total KV wait time in micros +Shown as microsecond" +cockroachdb,admission.wait.sum.kv.count,count,"[OpenMetrics v2] Total KV wait time in micros +Shown as microsecond" +cockroachdb,admission.wait.sum.kv_stores,count,"[OpenMetrics v1] Total KV stores wait time in micros +Shown as microsecond" +cockroachdb,admission.wait.sum.kv_stores.count,count,"[OpenMetrics v2] Total KV stores wait time in micros +Shown as microsecond" +cockroachdb,admission.wait.sum.sql.root.start,count,"[OpenMetrics v1] Total SQL root start wait time in micros +Shown as microsecond" +cockroachdb,admission.wait.sum.sql.root.start.count,count,"[OpenMetrics v2] Total SQL root start wait time in micros +Shown as microsecond" +cockroachdb,admission.wait.sum.sql_kv.response,count,"[OpenMetrics v1] Total SQL KV wait time in micros +Shown as microsecond" +cockroachdb,admission.wait.sum.sql_kv.response.count,count,"[OpenMetrics v2] Total SQL KV wait time in micros +Shown as microsecond" +cockroachdb,admission.wait.sum.sql_sql.response,count,"[OpenMetrics v1] Total Distributed SQL wait time in micros +Shown as microsecond" +cockroachdb,admission.wait.sum.sql_sql.response.count,count,"[OpenMetrics v2] Total Distributed SQL wait time in micros +Shown as microsecond" +cockroachdb,admission.wait_durations.elastic_cpu.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.elastic_cpu.bulk_normal_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.elastic_cpu.bulk_normal_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.elastic_cpu.bulk_normal_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.elastic_cpu.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.elastic_cpu.normal_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.elastic_cpu.normal_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.elastic_cpu.normal_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.elastic_cpu.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv.bulk_normal_pri,count,Number of requests admitted +cockroachdb,admission.wait_durations.kv.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv.high_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv.high_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv.high_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv.locking_normal_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv.locking_normal_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv.locking_normal_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv.normal_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv.normal_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv.normal_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.bulk_normal_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.bulk_normal_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.bulk_normal_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.high_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.high_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.high_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.locking_normal_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.locking_normal_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.locking_normal_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.normal_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.normal_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.normal_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.ttl_low_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.ttl_low_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.ttl_low_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_kv_response.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_kv_response.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_kv_response.locking_normal_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_kv_response.locking_normal_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_kv_response.locking_normal_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_kv_response.normal_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_kv_response.normal_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_kv_response.normal_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_kv_response.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_leaf_start.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_leaf_start.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_leaf_start.locking_normal_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_leaf_start.locking_normal_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_leaf_start.locking_normal_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_leaf_start.normal_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_leaf_start.normal_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_leaf_start.normal_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_leaf_start.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_root_start.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_root_start.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_root_start.locking_normal_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_root_start.locking_normal_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_root_start.locking_normal_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_root_start.normal_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_root_start.normal_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_root_start.normal_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_root_start.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_sql_response.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_sql_response.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_sql_response.locking_normal_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_sql_response.locking_normal_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_sql_response.locking_normal_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_sql_response.normal_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_sql_response.normal_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_sql_response.normal_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_sql_response.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_queue_length.elastic_cpu,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.elastic_cpu.bulk_normal_pri,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.elastic_cpu.normal_pri,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.kv,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.kv.bulk_normal_pri,count,Number of requests admitted +cockroachdb,admission.wait_queue_length.kv.high_pri,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.kv.locking_normal_pri,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.kv.normal_pri,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.kv_stores,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.kv_stores.bulk_normal_pri,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.kv_stores.high_pri,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.kv_stores.locking_normal_pri,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.kv_stores.normal_pri,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.kv_stores.ttl_low_pri,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.sql_kv_response,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.sql_kv_response.locking_normal_pri,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.sql_kv_response.normal_pri,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.sql_leaf_start,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.sql_leaf_start.locking_normal_pri,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.sql_leaf_start.normal_pri,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.sql_root_start,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.sql_root_start.locking_normal_pri,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.sql_root_start.normal_pri,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.sql_sql_response,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.sql_sql_response.locking_normal_pri,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.sql_sql_response.normal_pri,gauge,Length of wait queue +cockroachdb,backup.last_failed_time.kms_inaccessible,gauge,The unix timestamp of the most recent failure of backup due to errKMSInaccessible by a backup specified as maintaining this metric +cockroachdb,batch_requests.bytes.count,count,"Total byte count of batch requests processed +Shown as byte" +cockroachdb,batch_requests.cross_region.bytes.count,count,"Total byte count of batch requests processed cross region when region tiers are configured +Shown as byte" +cockroachdb,batch_requests.cross_zone.bytes.count,count,"Total byte count of batch requests processed cross zone within the same region when region and zone tiers are configured. However, if the region tiers are not configured, this count may also include batch data sent between different regions. Ensuring consistent configuration of region and zone tiers across nodes helps to accurately monitor the data transmitted. +Shown as byte" +cockroachdb,batch_responses.bytes.count,count,"Total byte count of batch responses received +Shown as byte" +cockroachdb,batch_responses.cross_region.bytes.count,count,"Total byte count of batch responses received cross region when region tiers are configured +Shown as byte" +cockroachdb,batch_responses.cross_zone.bytes.count,count,"Total byte count of batch responses received cross zone within the same region when region and zone tiers are configured. However, if the region tiers are not configured, this count may also include batch data received between different regions. Ensuring consistent configuration of region and zone tiers across nodes helps to accurately monitor the data transmitted. +Shown as byte" +cockroachdb,build.timestamp,gauge,"[OpenMetrics v1 & v2] Build information +Shown as time" +cockroachdb,capacity.available,gauge,"[OpenMetrics v1 & v2] Available storage capacity +Shown as byte" +cockroachdb,capacity.reserved,gauge,"[OpenMetrics v1 & v2] Capacity reserved for snapshots +Shown as byte" +cockroachdb,capacity.total,gauge,"[OpenMetrics v1 & v2] Total storage capacity +Shown as byte" +cockroachdb,capacity.used,gauge,"[OpenMetrics v1 & v2] Used storage capacity +Shown as byte" +cockroachdb,changefeed.admit.latency,gauge,"[OpenMetrics v1] Event admission latency: a difference between event MVCC timestamp and the time it was admitted into changefeed pipeline +Shown as nanosecond" +cockroachdb,changefeed.admit.latency.bucket,count,"[OpenMetrics v2] Event admission latency: a difference between event MVCC timestamp and the time it was admitted into changefeed pipeline +Shown as nanosecond" +cockroachdb,changefeed.admit.latency.count,count,"[OpenMetrics v2] Event admission latency: a difference between event MVCC timestamp and the time it was admitted into changefeed pipeline +Shown as nanosecond" +cockroachdb,changefeed.admit.latency.sum,count,"[OpenMetrics v2] Event admission latency: a difference between event MVCC timestamp and the time it was admitted into changefeed pipeline +Shown as nanosecond" +cockroachdb,changefeed.aggregator_progress,gauge,The earliest timestamp up to which any aggregator is guaranteed to have emitted all values for +cockroachdb,changefeed.backfill,gauge,[OpenMetrics v1 & v2] Number of changefeeds currently executing backfill +cockroachdb,changefeed.backfill.pending.ranges,gauge,[OpenMetrics v1 & v2] Number of ranges in an ongoing backfill that are yet to be fully emitted +cockroachdb,changefeed.backfill_count,gauge,Number of changefeeds currently executing backfill +cockroachdb,changefeed.batch_reduction_count,gauge,Number of times a changefeed aggregator node attempted to reduce the size of message batches it emitted to the sink +cockroachdb,changefeed.buffer_entries.allocated_mem,gauge,"Current quota pool memory allocation +Shown as byte" +cockroachdb,changefeed.buffer_entries.flush.count,count,Number of flush elements added to the buffer +cockroachdb,changefeed.buffer_entries.in.count,count,Total entries entering the buffer between raft and changefeed sinks +cockroachdb,changefeed.buffer_entries.kv.count,count,Number of kv elements added to the buffer +cockroachdb,changefeed.buffer_entries.out.count,count,Total entries leaving the buffer between raft and changefeed sinks +cockroachdb,changefeed.buffer_entries.released.count,count,"Total entries processed, emitted and acknowledged by the sinks" +cockroachdb,changefeed.buffer_entries.resolved.count,count,Number of resolved elements added to the buffer +cockroachdb,changefeed.buffer_entries_mem.acquired.count,count,Total amount of memory acquired for entries as they enter the system +cockroachdb,changefeed.buffer_entries_mem.released.count,count,Total amount of memory released by the entries after they have been emitted +cockroachdb,changefeed.buffer_pushback.count,count,"Total time spent waiting while the buffer was full +Shown as nanosecond" +cockroachdb,changefeed.bytes.messages_pushback.count,count,"Total time spent throttled for bytes quota +Shown as nanosecond" +cockroachdb,changefeed.checkpoint_hist_nanos.bucket,count,"Time spent checkpointing changefeed progress +Shown as nanosecond" +cockroachdb,changefeed.checkpoint_hist_nanos.count,count,"Time spent checkpointing changefeed progress +Shown as nanosecond" +cockroachdb,changefeed.checkpoint_hist_nanos.sum,count,"Time spent checkpointing changefeed progress +Shown as nanosecond" +cockroachdb,changefeed.checkpoint_progress,gauge,The earliest timestamp of any changefeed’s persisted checkpoint (values prior to this timestamp will never need to be re-emitted) +cockroachdb,changefeed.cloudstorage_buffered_bytes,gauge,The number of bytes buffered in cloudstorage sink files which have not been emitted yet +cockroachdb,changefeed.commit.latency,gauge,"[OpenMetrics v1] Event commit latency: a difference between event MVCC timestamp and the time it was acknowledged by the downstream sink +Shown as nanosecond" +cockroachdb,changefeed.commit.latency.bucket,count,"[OpenMetrics v2] Event commit latency: a difference between event MVCC timestamp and the time it was acknowledged by the downstream sink +Shown as nanosecond" +cockroachdb,changefeed.commit.latency.count,count,"[OpenMetrics v2] Event commit latency: a difference between event MVCC timestamp and the time it was acknowledged by the downstream sink +Shown as nanosecond" +cockroachdb,changefeed.commit.latency.sum,count,"[OpenMetrics v2] Event commit latency: a difference between event MVCC timestamp and the time it was acknowledged by the downstream sink +Shown as nanosecond" +cockroachdb,changefeed.emitted.messages,count,[OpenMetrics v1] Messages emitted by all feeds +cockroachdb,changefeed.emitted.messages.count,count,[OpenMetrics v2] Messages emitted by all feeds +cockroachdb,changefeed.emitted_bytes,count,"Bytes emitted by all feeds +Shown as byte" +cockroachdb,changefeed.emitted_bytes.count,count,Bytes emitted by all feeds +cockroachdb,changefeed.emitted_messages,count,Messages emitted by all feeds +cockroachdb,changefeed.error.retries,count,[OpenMetrics v1] Total retryable errors encountered by all changefeeds +cockroachdb,changefeed.error.retries.count,count,[OpenMetrics v2] Total retryable errors encountered by all changefeeds +cockroachdb,changefeed.error_retries,count,Total retryable errors encountered by all changefeeds +cockroachdb,changefeed.failures,count,[OpenMetrics v1] Total number of changefeed jobs which have failed +cockroachdb,changefeed.failures.count,count,[OpenMetrics v2] Total number of changefeed jobs which have failed +cockroachdb,changefeed.filtered_messages.count,count,Messages filtered out by all feeds. This count does not include the number of messages that may be filtered due to the range constraints. +cockroachdb,changefeed.flush.messages_pushback.count,count,"Total time spent throttled for flush quota +Shown as nanosecond" +cockroachdb,changefeed.flush_hist_nanos.bucket,count,"Time spent flushing messages across all changefeeds +Shown as nanosecond" +cockroachdb,changefeed.flush_hist_nanos.count,count,"Time spent flushing messages across all changefeeds +Shown as nanosecond" +cockroachdb,changefeed.flush_hist_nanos.sum,count,"Time spent flushing messages across all changefeeds +Shown as nanosecond" +cockroachdb,changefeed.flushed_bytes.count,count,"Bytes emitted by all feeds; maybe different from changefeed.emitted_bytes when compression is enabled +Shown as byte" +cockroachdb,changefeed.flushes.count,count,Total flushes across all feeds +cockroachdb,changefeed.forwarded_resolved_messages.count,count,Resolved timestamps forwarded from the change aggregator to the change frontier +cockroachdb,changefeed.frontier_updates.count,count,Number of change frontier updates across all feeds +cockroachdb,changefeed.internal_retry_message,gauge,Number of messages for which an attempt to retry them within an aggregator node was made +cockroachdb,changefeed.lagging_ranges,gauge,The number of ranges considered to be lagging behind +cockroachdb,changefeed.max.behind.nanos,gauge,[OpenMetrics v1 & v2] Largest commit-to-emit duration of any running feed +cockroachdb,changefeed.max_behind_nanos,gauge,"(Deprecated in favor of checkpoint_progress) The most any changefeed’s persisted checkpoint is behind the present +Shown as nanosecond" +cockroachdb,changefeed.message.size.hist,gauge,[OpenMetrics v1] Message size histogram +cockroachdb,changefeed.message.size.hist.bucket,count,[OpenMetrics v2] Message size histogram +cockroachdb,changefeed.message.size.hist.count,count,[OpenMetrics v2] Message size histogram +cockroachdb,changefeed.message.size.hist.sum,count,[OpenMetrics v2] Message size histogram +cockroachdb,changefeed.message_size_hist.bucket,count,"Message size histogram +Shown as byte" +cockroachdb,changefeed.message_size_hist.count,count,"Message size histogram +Shown as byte" +cockroachdb,changefeed.message_size_hist.sum,count,"Message size histogram +Shown as byte" +cockroachdb,changefeed.messages.messages_pushback.count,count,"Total time spent throttled for messages quota +Shown as nanosecond" +cockroachdb,changefeed.nprocs_consume_event_nanos.bucket,count,"Total time spent waiting to add an event to the parallel consumer +Shown as nanosecond" +cockroachdb,changefeed.nprocs_consume_event_nanos.count,count,"Total time spent waiting to add an event to the parallel consumer +Shown as nanosecond" +cockroachdb,changefeed.nprocs_consume_event_nanos.sum,count,"Total time spent waiting to add an event to the parallel consumer +Shown as nanosecond" +cockroachdb,changefeed.nprocs_flush_nanos.bucket,count,"Total time spent idle waiting for the parallel consumer to flush +Shown as nanosecond" +cockroachdb,changefeed.nprocs_flush_nanos.count,count,"Total time spent idle waiting for the parallel consumer to flush +Shown as nanosecond" +cockroachdb,changefeed.nprocs_flush_nanos.sum,count,"Total time spent idle waiting for the parallel consumer to flush +Shown as nanosecond" +cockroachdb,changefeed.nprocs_in_flight,gauge,Number of buffered events in the parallel consumer +cockroachdb,changefeed.parallel_io_queue_nanos.bucket,count,"Time spent with outgoing requests to the sink waiting in queue due to inflight requests with conflicting keys +Shown as nanosecond" +cockroachdb,changefeed.parallel_io_queue_nanos.count,count,"Time spent with outgoing requests to the sink waiting in queue due to inflight requests with conflicting keys +Shown as nanosecond" +cockroachdb,changefeed.parallel_io_queue_nanos.sum,count,"Time spent with outgoing requests to the sink waiting in queue due to inflight requests with conflicting keys +Shown as nanosecond" +cockroachdb,changefeed.queue_time.count,count,"Time KV event spent waiting to be processed +Shown as nanosecond" +cockroachdb,changefeed.running,gauge,"[OpenMetrics v1 & v2] Number of currently running changefeeds, including sinkless" +cockroachdb,changefeed.schema_registry.registrations.count,count,Number of registration attempts with the schema registry +cockroachdb,changefeed.schema_registry.retry.count,count,Number of retries encountered when sending requests to the schema registry +cockroachdb,changefeed.schemafeed.table_history_scans.count,count,The number of table history scans during polling +cockroachdb,changefeed.schemafeed.table_metadata.count,count,"Time blocked while verifying table metadata histories +Shown as nanosecond" +cockroachdb,changefeed.sink_batch_hist_nanos.bucket,count,"Time spent batched in the sink buffer before being flushed and acknowledged +Shown as nanosecond" +cockroachdb,changefeed.sink_batch_hist_nanos.count,count,"Time spent batched in the sink buffer before being flushed and acknowledged +Shown as nanosecond" +cockroachdb,changefeed.sink_batch_hist_nanos.sum,count,"Time spent batched in the sink buffer before being flushed and acknowledged +Shown as nanosecond" +cockroachdb,changefeed.sink_io_inflight,gauge,The number of keys currently inflight as IO requests being sent to the sink +cockroachdb,changefeed.size_based_flushes.count,count,Total size based flushes across all feeds +cockroachdb,clock.offset.meannanos,gauge,"[OpenMetrics v1 & v2] Mean clock offset with other nodes in nanoseconds +Shown as nanosecond" +cockroachdb,clock.offset.stddevnanos,gauge,"[OpenMetrics v1 & v2] Stdddev clock offset with other nodes in nanoseconds +Shown as nanosecond" +cockroachdb,cloud.read_bytes.count,count,"Number of bytes read +Shown as byte" +cockroachdb,cloud.write_bytes.count,count,"Number of bytes read +Shown as byte" +cockroachdb,cluster.preserve_downgrade_option.last_updated,gauge,Timestamp of the last time the preserve_downgrade_option was updated +cockroachdb,compactor.compactingnanos,count,"[OpenMetrics v1] Number of nanoseconds spent compacting ranges +Shown as nanosecond" +cockroachdb,compactor.compactingnanos.count,count,"[OpenMetrics v2] Number of nanoseconds spent compacting ranges +Shown as nanosecond" +cockroachdb,compactor.compactions.failure,count,"[OpenMetrics v1] Number of failed compaction requests sent to the storage engine +Shown as request" +cockroachdb,compactor.compactions.failure.count,count,"[OpenMetrics v2] Number of failed compaction requests sent to the storage engine +Shown as request" +cockroachdb,compactor.compactions.success,count,"[OpenMetrics v1] Number of successful compaction requests sent to the storage engine +Shown as request" +cockroachdb,compactor.compactions.success.count,count,"[OpenMetrics v2] Number of successful compaction requests sent to the storage engine +Shown as request" +cockroachdb,compactor.suggestionbytes.compacted,count,"[OpenMetrics v1] Number of logical bytes compacted from suggested compactions +Shown as byte" +cockroachdb,compactor.suggestionbytes.compacted.count,count,"[OpenMetrics v2] Number of logical bytes compacted from suggested compactions +Shown as byte" +cockroachdb,compactor.suggestionbytes.queued,gauge,"[OpenMetrics v1 & v2] Number of logical bytes in suggested compactions in the queue +Shown as byte" +cockroachdb,compactor.suggestionbytes.skipped,count,"[OpenMetrics v1] Number of logical bytes in suggested compactions which were not compacted +Shown as byte" +cockroachdb,compactor.suggestionbytes.skipped.count,count,"[OpenMetrics v2] Number of logical bytes in suggested compactions which were not compacted +Shown as byte" +cockroachdb,distsender.batch_requests.cross_region.bytes.count,count,"Total byte count of replica-addressed batch requests processed cross region when region tiers are configured +Shown as byte" +cockroachdb,distsender.batch_requests.cross_zone.bytes.count,count,"Total byte count of replica-addressed batch requests processed cross zone within the same region when region and zone tiers are configured. However, if the region tiers are not configured, this count may also include batch data sent between different regions. Ensuring consistent configuration of region and zone tiers across nodes helps to accurately monitor the data transmitted. +Shown as byte" +cockroachdb,distsender.batch_requests.replica_addressed.bytes.count,count,"Total byte count of replica-addressed batch requests processed +Shown as byte" +cockroachdb,distsender.batch_responses.cross_region.bytes.count,count,"Total byte count of replica-addressed batch responses received cross region when region tiers are configured +Shown as byte" +cockroachdb,distsender.batch_responses.cross_zone.bytes.count,count,"Total byte count of replica-addressed batch responses received cross zone within the same region when region and zone tiers are configured. However, if the region tiers are not configured, this count may also include batch data received between different regions. Ensuring consistent configuration of region and zone tiers across nodes helps to accurately monitor the data transmitted. +Shown as byte" +cockroachdb,distsender.batch_responses.replica_addressed.bytes.count,count,"Total byte count of replica-addressed batch responses received +Shown as byte" +cockroachdb,distsender.batches.async.sent.count,count,Number of partial batches sent asynchronously +cockroachdb,distsender.batches.async.throttled.count,count,Number of partial batches not sent asynchronously due to throttling +cockroachdb,distsender.batches.count,count,Number of batches processed +cockroachdb,distsender.batches.partial,count,[OpenMetrics v1] Number of partial batches processed +cockroachdb,distsender.batches.partial.count,count,[OpenMetrics v2] Number of partial batches processed +cockroachdb,distsender.batches.total,count,[OpenMetrics v1] Number of batches processed +cockroachdb,distsender.batches.total.count,count,[OpenMetrics v2] Number of batches processed +cockroachdb,distsender.errors.inleasetransferbackoffs.count,count,Number of times backed off due to NotLeaseHolderErrors during lease transfer +cockroachdb,distsender.errors.notleaseholder,count,"[OpenMetrics v1] Number of NotLeaseHolderErrors encountered +Shown as error" +cockroachdb,distsender.errors.notleaseholder.count,count,"[OpenMetrics v2] Number of NotLeaseHolderErrors encountered +Shown as error" +cockroachdb,distsender.rangefeed.catchup_ranges,gauge,Number of ranges in catchup modeThis counts the number of ranges with an active rangefeed that are performing catchup scan. +cockroachdb,distsender.rangefeed.error_catchup_ranges.count,count,Number of ranges in catchup mode which experienced an error +cockroachdb,distsender.rangefeed.restart_ranges.count,count,Number of ranges that were restarted due to transient errors +cockroachdb,distsender.rangefeed.retry.logical_ops_missing.count,count,Number of ranges that encountered retryable LOGICAL_OPS_MISSING error +cockroachdb,distsender.rangefeed.retry.no_leaseholder.count,count,Number of ranges that encountered retryable NO_LEASEHOLDER error +cockroachdb,distsender.rangefeed.retry.node_not_found.count,count,Number of ranges that encountered retryable node not found error +cockroachdb,distsender.rangefeed.retry.raft_snapshot.count,count,Number of ranges that encountered retryable RAFT_SNAPSHOT error +cockroachdb,distsender.rangefeed.retry.range_key_mismatch.count,count,Number of ranges that encountered retryable range key mismatch error +cockroachdb,distsender.rangefeed.retry.range_merged.count,count,Number of ranges that encountered retryable RANGE_MERGED error +cockroachdb,distsender.rangefeed.retry.range_not_found.count,count,Number of ranges that encountered retryable range not found error +cockroachdb,distsender.rangefeed.retry.range_split.count,count,Number of ranges that encountered retryable RANGE_SPLIT error +cockroachdb,distsender.rangefeed.retry.rangefeed_closed.count,count,Number of ranges that encountered retryable RANGEFEED_CLOSED error +cockroachdb,distsender.rangefeed.retry.replica_removed.count,count,Number of ranges that encountered retryable REPLICA_REMOVED error +cockroachdb,distsender.rangefeed.retry.send.count,count,Number of ranges that encountered retryable send error +cockroachdb,distsender.rangefeed.retry.slow_consumer.count,count,Number of ranges that encountered retryable SLOW_CONSUMER error +cockroachdb,distsender.rangefeed.retry.store_not_found.count,count,Number of ranges that encountered retryable store not found error +cockroachdb,distsender.rangefeed.retry.stuck.count,count,Number of ranges that encountered retryable stuck error +cockroachdb,distsender.rangefeed.total_ranges,gauge,Number of ranges executing rangefeedThis counts the number of ranges with an active rangefeed. +cockroachdb,distsender.rangelookups.count,count,Number of range lookups +cockroachdb,distsender.rpc.addsstable.sent.count,count,"Number of AddSSTable requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.adminchangereplicas.sent.count,count,"Number of AdminChangeReplicas requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.adminmerge.sent.count,count,"Number of AdminMerge requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.adminrelocaterange.sent.count,count,"Number of AdminRelocateRange requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.adminscatter.sent.count,count,"Number of AdminScatter requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.adminsplit.sent.count,count,"Number of AdminSplit requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.admintransferlease.sent.count,count,"Number of AdminTransferLease requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.adminunsplit.sent.count,count,"Number of AdminUnsplit requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.adminverifyprotectedtimestamp.sent.count,count,"Number of AdminVerifyProtectedTimestamp requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.barrier.sent.count,count,"Number of Barrier requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.checkconsistency.sent.count,count,"Number of CheckConsistency requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.clearrange.sent.count,count,"Number of ClearRange requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.computechecksum.sent.count,count,"Number of ComputeChecksum requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.conditionalput.sent.count,count,"Number of ConditionalPut requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.delete.sent.count,count,"Number of Delete requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.deleterange.sent.count,count,"Number of DeleteRange requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.endtxn.sent.count,count,"Number of EndTxn requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.err.ambiguousresulterrtype.count,count,Number of AmbiguousResultErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.batchtimestampbeforegcerrtype.count,count,Number of BatchTimestampBeforeGCErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.communicationerrtype.count,count,Number of CommunicationErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.conditionfailederrtype.count,count,Number of ConditionFailedErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.errordetailtype.count,count,Number of ErrorDetailType (tagged by their number) errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.indeterminatecommiterrtype.count,count,Number of IndeterminateCommitErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.integeroverflowerrtype.count,count,Number of IntegerOverflowErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.intentmissingerrtype.count,count,Number of IntentMissingErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.internalerrtype.count,count,Number of InternalErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.invalidleaseerrtype.count,count,Number of InvalidLeaseErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.leaserejectederrtype.count,count,Number of LeaseRejectedErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.lockconflicterrtype.count,count,Number of LockConflictErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.mergeinprogresserrtype.count,count,Number of MergeInProgressErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.mintimestampboundunsatisfiableerrtype.count,count,Number of MinTimestampBoundUnsatisfiableErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.mvcchistorymutationerrtype.count,count,Number of MVCCHistoryMutationErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.nodeunavailableerrtype.count,count,Number of NodeUnavailableErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.notleaseholdererrtype.count,count,Number of NotLeaseHolderErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.oprequirestxnerrtype.count,count,Number of OpRequiresTxnErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.optimisticevalconflictserrtype.count,count,Number of OptimisticEvalConflictsErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.raftgroupdeletederrtype.count,count,Number of RaftGroupDeletedErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.rangefeedretryerrtype.count,count,Number of RangeFeedRetryErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.rangekeymismatcherrtype.count,count,Number of RangeKeyMismatchErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.rangenotfounderrtype.count,count,Number of RangeNotFoundErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.readwithinuncertaintyintervalerrtype.count,count,Number of ReadWithinUncertaintyIntervalErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.refreshfailederrtype.count,count,Number of RefreshFailedErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.replicacorruptionerrtype.count,count,Number of ReplicaCorruptionErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.replicatooolderrtype.count,count,Number of ReplicaTooOldErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.storenotfounderrtype.count,count,Number of StoreNotFoundErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.transactionabortederrtype.count,count,Number of TransactionAbortedErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.transactionpusherrtype.count,count,Number of TransactionPushErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.transactionretryerrtype.count,count,Number of TransactionRetryErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.transactionretrywithprotorefresherrtype.count,count,Number of TransactionRetryWithProtoRefreshErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.transactionstatuserrtype.count,count,Number of TransactionStatusErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.txnalreadyencounterederrtype.count,count,Number of TxnAlreadyEncounteredErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.unsupportedrequesterrtype.count,count,Number of UnsupportedRequestErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.writeintenterrtype.count,count,Number of WriteIntentErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.writetooolderrtype.count,count,Number of WriteTooOldErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.export.sent.count,count,"Number of Export requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.gc.sent.count,count,"Number of GC requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.get.sent.count,count,"Number of Get requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.heartbeattxn.sent.count,count,"Number of HeartbeatTxn requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.increment.sent.count,count,"Number of Increment requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.initput.sent.count,count,"Number of InitPut requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.isspanempty.sent.count,count,"Number of IsSpanEmpty requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.leaseinfo.sent.count,count,"Number of LeaseInfo requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.merge.sent.count,count,"Number of Merge requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.migrate.sent.count,count,"Number of Migrate requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.probe.sent.count,count,"Number of Probe requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.pushtxn.sent.count,count,"Number of PushTxn requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.put.sent.count,count,"Number of Put requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.queryintent.sent.count,count,"Number of QueryIntent requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.querylocks.sent.count,count,"Number of QueryLocks requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.queryresolvedtimestamp.sent.count,count,"Number of QueryResolvedTimestamp requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.querytxn.sent.count,count,"Number of QueryTxn requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.rangestats.sent.count,count,"Number of RangeStats requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.recomputestats.sent.count,count,"Number of RecomputeStats requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.recovertxn.sent.count,count,"Number of RecoverTxn requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.refresh.sent.count,count,"Number of Refresh requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.refreshrange.sent.count,count,"Number of RefreshRange requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.requestlease.sent.count,count,"Number of RequestLease requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.resolveintent.sent.count,count,"Number of ResolveIntent requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.resolveintentrange.sent.count,count,"Number of ResolveIntentRange requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.reversescan.sent.count,count,"Number of ReverseScan requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.revertrange.sent.count,count,"Number of RevertRange requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.scan.sent.count,count,"Number of Scan requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.sent.count,count,Number of replica-addressed RPCs sent +cockroachdb,distsender.rpc.sent.local,count,[OpenMetrics v1] Number of local RPCs sent +cockroachdb,distsender.rpc.sent.local.count,count,[OpenMetrics v2] Number of local RPCs sent +cockroachdb,distsender.rpc.sent.nextreplicaerror,count,"[OpenMetrics v1] Number of RPCs sent due to per-replica errors +Shown as error" +cockroachdb,distsender.rpc.sent.nextreplicaerror.count,count,"[OpenMetrics v2] Number of RPCs sent due to per-replica errors +Shown as error" +cockroachdb,distsender.rpc.sent.total,count,[OpenMetrics v1] Number of RPCs sent +cockroachdb,distsender.rpc.sent.total.count,count,[OpenMetrics v2] Number of replica-addressed RPCs sent +cockroachdb,distsender.rpc.subsume.sent.count,count,"Number of Subsume requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.transferlease.sent.count,count,"Number of TransferLease requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.truncatelog.sent.count,count,"Number of TruncateLog requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.writebatch.sent.count,count,"Number of WriteBatch requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,exec.error,count,"[OpenMetrics v1] Number of batch KV requests that failed to execute on this node. These are warnings denoting cleanup rather than errors, and can be disregarded as part of operation. +Shown as request" +cockroachdb,exec.error.count,count,"[OpenMetrics v2] Number of batch KV requests that failed to execute on this node. These are warnings denoting cleanup rather than errors, and can be disregarded as part of operation. +Shown as request" +cockroachdb,exec.latency,gauge,"[OpenMetrics v1] Latency in nanoseconds of batch KV requests executed on this node +Shown as nanosecond" +cockroachdb,exec.latency.bucket,count,"[OpenMetrics v2] Latency in nanoseconds of batch KV requests executed on this node +Shown as nanosecond" +cockroachdb,exec.latency.count,count,"[OpenMetrics v2] Latency in nanoseconds of batch KV requests executed on this node +Shown as nanosecond" +cockroachdb,exec.latency.sum,count,"[OpenMetrics v2] Latency in nanoseconds of batch KV requests executed on this node +Shown as nanosecond" +cockroachdb,exec.success,count,"[OpenMetrics v1] Number of batch KV requests executed successfully on this node +Shown as request" +cockroachdb,exec.success.count,count,"[OpenMetrics v2] Number of batch KV requests executed successfully on this node +Shown as request" +cockroachdb,exportrequest.delay.count,count,Number of Export requests delayed due to concurrent requests. +cockroachdb,follower_reads.success_count.count,count,Number of successful follower reads +cockroachdb,gcbytesage,gauge,"[OpenMetrics v1 & v2] Cumulative age of non-live data in seconds +Shown as second" +cockroachdb,gossip.bytes.received,count,"[OpenMetrics v1] Number of received gossip bytes +Shown as byte" +cockroachdb,gossip.bytes.received.count,count,"[OpenMetrics v2] Number of received gossip bytes +Shown as byte" +cockroachdb,gossip.bytes.sent,count,"[OpenMetrics v1] Number of sent gossip bytes +Shown as byte" +cockroachdb,gossip.bytes.sent.count,count,"[OpenMetrics v2] Number of sent gossip bytes +Shown as byte" +cockroachdb,gossip.connections.incoming,gauge,"[OpenMetrics v1 & v2] Number of active incoming gossip connections +Shown as connection" +cockroachdb,gossip.connections.outgoing,gauge,"[OpenMetrics v1 & v2] Number of active outgoing gossip connections +Shown as connection" +cockroachdb,gossip.connections.refused,count,"[OpenMetrics v1] Number of refused incoming gossip connections +Shown as connection" +cockroachdb,gossip.connections.refused.count,count,"[OpenMetrics v2] Number of refused incoming gossip connections +Shown as connection" +cockroachdb,gossip.infos.received,count,[OpenMetrics v1] Number of received gossip Info objects +cockroachdb,gossip.infos.received.count,count,[OpenMetrics v2] Number of received gossip Info objects +cockroachdb,gossip.infos.sent,count,[OpenMetrics v1] Number of sent gossip Info objects +cockroachdb,gossip.infos.sent.count,count,[OpenMetrics v2] Number of sent gossip Info objects +cockroachdb,intentage,gauge,"[OpenMetrics v1 & v2] Cumulative age of intents in seconds +Shown as second" +cockroachdb,intentbytes,gauge,"[OpenMetrics v1 & v2] Number of bytes in intent KV pairs +Shown as byte" +cockroachdb,intentcount,gauge,"[OpenMetrics v1 & v2] Count of intent keys +Shown as key" +cockroachdb,intentresolver.async.throttled,count,Number of intent resolution attempts not run asynchronously due to throttling +cockroachdb,intentresolver.async.throttled.count,count,Number of intent resolution attempts not run asynchronously due to throttling +cockroachdb,intentresolver.finalized_txns.failed,count,Number of finalized transaction cleanup failures. Transaction cleanup refers to the process of resolving all of a transactions intents and then garbage collecting its transaction record. +cockroachdb,intentresolver.finalized_txns.failed.count,count,Number of finalized transaction cleanup failures. Transaction cleanup refers to the process of resolving all of a transactions intents and then garbage collecting its transaction record. +cockroachdb,intentresolver.intents.failed,count,"Number of intent resolution failures. The unit of measurement is a single intent, so if a batch of intent resolution requests fails, the metric will be incremented for each request in the batch." +cockroachdb,intentresolver.intents.failed.count,count,"Number of intent resolution failures. The unit of measurement is a single intent, so if a batch of intent resolution requests fails, the metric will be incremented for each request in the batch." +cockroachdb,intents.abort_attempts,count,Count of (point or range) non-poisoning intent abort evaluation attempts +cockroachdb,intents.abort_attempts.count,count,Count of (point or range) non-poisoning intent abort evaluation attempts +cockroachdb,intents.poison_attempts,count,Count of (point or range) poisoning intent abort evaluation attempts +cockroachdb,intents.poison_attempts.count,count,Count of (point or range) poisoning intent abort evaluation attempts +cockroachdb,intents.resolve_attempts,count,Count of (point or range) intent commit evaluation attempts +cockroachdb,intents.resolve_attempts.count,count,Count of (point or range) intent commit evaluation attempts +cockroachdb,jobs.adopt_iterations.count,count,number of job-adopt iterations performed by the registry +cockroachdb,jobs.auto.create.stats.currently_paused,gauge,Number of auto_create_stats jobs currently considered Paused +cockroachdb,jobs.auto.create.stats.currently_running,gauge,Number of auto_create_stats jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.auto.create.stats.resume_failed.count,count,Number of auto_create_stats jobs which failed with a non-retriable error +cockroachdb,jobs.auto_config_env_runner.currently_idle,gauge,Number of auto_config_env_runner jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.auto_config_env_runner.currently_paused,gauge,Number of auto_config_env_runner jobs currently considered Paused +cockroachdb,jobs.auto_config_env_runner.currently_running,gauge,Number of auto_config_env_runner jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.auto_config_env_runner.expired_pts_records.count,count,Number of expired protected timestamp records owned by auto_config_env_runner jobs +cockroachdb,jobs.auto_config_env_runner.fail_or_cancel_completed.count,count,Number of auto_config_env_runner jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.auto_config_env_runner.fail_or_cancel_failed.count,count,Number of auto_config_env_runner jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.auto_config_env_runner.fail_or_cancel_retry_error.count,count,Number of auto_config_env_runner jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.auto_config_env_runner.protected_age_sec,gauge,"The age of the oldest PTS record protected by auto_config_env_runner jobs +Shown as second" +cockroachdb,jobs.auto_config_env_runner.protected_record_count,gauge,Number of protected timestamp records held by auto_config_env_runner jobs +cockroachdb,jobs.auto_config_env_runner.resume_completed.count,count,Number of auto_config_env_runner jobs which successfully resumed to completion +cockroachdb,jobs.auto_config_env_runner.resume_failed.count,count,Number of auto_config_env_runner jobs which failed with a non-retriable error +cockroachdb,jobs.auto_config_env_runner.resume_retry_error.count,count,Number of auto_config_env_runner jobs which failed with a retriable error +cockroachdb,jobs.auto_config_runner.currently_idle,gauge,Number of auto_config_runner jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.auto_config_runner.currently_paused,gauge,Number of auto_config_runner jobs currently considered Paused +cockroachdb,jobs.auto_config_runner.currently_running,gauge,Number of auto_config_runner jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.auto_config_runner.expired_pts_records.count,count,Number of expired protected timestamp records owned by auto_config_runner jobs +cockroachdb,jobs.auto_config_runner.fail_or_cancel_completed.count,count,Number of auto_config_runner jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.auto_config_runner.fail_or_cancel_failed.count,count,Number of auto_config_runner jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.auto_config_runner.fail_or_cancel_retry_error.count,count,Number of auto_config_runner jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.auto_config_runner.protected_age_sec,gauge,"The age of the oldest PTS record protected by auto_config_runner jobs +Shown as second" +cockroachdb,jobs.auto_config_runner.protected_record_count,gauge,Number of protected timestamp records held by auto_config_runner jobs +cockroachdb,jobs.auto_config_runner.resume_completed.count,count,Number of auto_config_runner jobs which successfully resumed to completion +cockroachdb,jobs.auto_config_runner.resume_failed.count,count,Number of auto_config_runner jobs which failed with a non-retriable error +cockroachdb,jobs.auto_config_runner.resume_retry_error.count,count,Number of auto_config_runner jobs which failed with a retriable error +cockroachdb,jobs.auto_config_task.currently_idle,gauge,Number of auto_config_task jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.auto_config_task.currently_paused,gauge,Number of auto_config_task jobs currently considered Paused +cockroachdb,jobs.auto_config_task.currently_running,gauge,Number of auto_config_task jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.auto_config_task.expired_pts_records.count,count,Number of expired protected timestamp records owned by auto_config_task jobs +cockroachdb,jobs.auto_config_task.fail_or_cancel_completed.count,count,Number of auto_config_task jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.auto_config_task.fail_or_cancel_failed.count,count,Number of auto_config_task jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.auto_config_task.fail_or_cancel_retry_error.count,count,Number of auto_config_task jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.auto_config_task.protected_age_sec,gauge,"The age of the oldest PTS record protected by auto_config_task jobs +Shown as second" +cockroachdb,jobs.auto_config_task.protected_record_count,gauge,Number of protected timestamp records held by auto_config_task jobs +cockroachdb,jobs.auto_config_task.resume_completed.count,count,Number of auto_config_task jobs which successfully resumed to completion +cockroachdb,jobs.auto_config_task.resume_failed.count,count,Number of auto_config_task jobs which failed with a non-retriable error +cockroachdb,jobs.auto_config_task.resume_retry_error.count,count,Number of auto_config_task jobs which failed with a retriable error +cockroachdb,jobs.auto_create_stats.currently_idle,gauge,Number of auto_create_stats jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.auto_create_stats.currently_paused,gauge,Number of auto_create_stats jobs currently considered Paused +cockroachdb,jobs.auto_create_stats.currently_running,gauge,Number of auto_create_stats jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.auto_create_stats.expired_pts_records.count,count,Number of expired protected timestamp records owned by auto_create_stats jobs +cockroachdb,jobs.auto_create_stats.fail_or_cancel_completed.count,count,Number of auto_create_stats jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.auto_create_stats.fail_or_cancel_failed.count,count,Number of auto_create_stats jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.auto_create_stats.fail_or_cancel_retry_error.count,count,Number of auto_create_stats jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.auto_create_stats.protected_age_sec,gauge,"The age of the oldest PTS record protected by auto_create_stats jobs +Shown as second" +cockroachdb,jobs.auto_create_stats.protected_record_count,gauge,Number of protected timestamp records held by auto_create_stats jobs +cockroachdb,jobs.auto_create_stats.resume_completed.count,count,Number of auto_create_stats jobs which successfully resumed to completion +cockroachdb,jobs.auto_create_stats.resume_failed.count,count,Number of auto_create_stats jobs which failed with a non-retriable error +cockroachdb,jobs.auto_create_stats.resume_retry_error.count,count,Number of auto_create_stats jobs which failed with a retriable error +cockroachdb,jobs.auto_schema_telemetry.currently_idle,gauge,Number of auto_schema_telemetry jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.auto_schema_telemetry.currently_paused,gauge,Number of auto_schema_telemetry jobs currently considered Paused +cockroachdb,jobs.auto_schema_telemetry.currently_running,gauge,Number of auto_schema_telemetry jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.auto_schema_telemetry.expired_pts_records.count,count,Number of expired protected timestamp records owned by auto_schema_telemetry jobs +cockroachdb,jobs.auto_schema_telemetry.fail_or_cancel_completed.count,count,Number of auto_schema_telemetry jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.auto_schema_telemetry.fail_or_cancel_failed.count,count,Number of auto_schema_telemetry jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.auto_schema_telemetry.fail_or_cancel_retry_error.count,count,Number of auto_schema_telemetry jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.auto_schema_telemetry.protected_age_sec,gauge,"The age of the oldest PTS record protected by auto_schema_telemetry jobs +Shown as second" +cockroachdb,jobs.auto_schema_telemetry.protected_record_count,gauge,Number of protected timestamp records held by auto_schema_telemetry jobs +cockroachdb,jobs.auto_schema_telemetry.resume_completed.count,count,Number of auto_schema_telemetry jobs which successfully resumed to completion +cockroachdb,jobs.auto_schema_telemetry.resume_failed.count,count,Number of auto_schema_telemetry jobs which failed with a non-retriable error +cockroachdb,jobs.auto_schema_telemetry.resume_retry_error.count,count,Number of auto_schema_telemetry jobs which failed with a retriable error +cockroachdb,jobs.auto_span_config_reconciliation.currently_idle,gauge,Number of auto_span_config_reconciliation jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.auto_span_config_reconciliation.currently_paused,gauge,Number of auto_span_config_reconciliation jobs currently considered Paused +cockroachdb,jobs.auto_span_config_reconciliation.currently_running,gauge,Number of auto_span_config_reconciliation jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.auto_span_config_reconciliation.expired_pts_records.count,count,Number of expired protected timestamp records owned by auto_span_config_reconciliation jobs +cockroachdb,jobs.auto_span_config_reconciliation.fail_or_cancel_completed.count,count,Number of auto_span_config_reconciliation jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.auto_span_config_reconciliation.fail_or_cancel_failed.count,count,Number of auto_span_config_reconciliation jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.auto_span_config_reconciliation.fail_or_cancel_retry_error.count,count,Number of auto_span_config_reconciliation jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.auto_span_config_reconciliation.protected_age_sec,gauge,"The age of the oldest PTS record protected by auto_span_config_reconciliation jobs +Shown as second" +cockroachdb,jobs.auto_span_config_reconciliation.protected_record_count,gauge,Number of protected timestamp records held by auto_span_config_reconciliation jobs +cockroachdb,jobs.auto_span_config_reconciliation.resume_completed.count,count,Number of auto_span_config_reconciliation jobs which successfully resumed to completion +cockroachdb,jobs.auto_span_config_reconciliation.resume_failed.count,count,Number of auto_span_config_reconciliation jobs which failed with a non-retriable error +cockroachdb,jobs.auto_span_config_reconciliation.resume_retry_error.count,count,Number of auto_span_config_reconciliation jobs which failed with a retriable error +cockroachdb,jobs.auto_sql_stats_compaction.currently_idle,gauge,Number of auto_sql_stats_compaction jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.auto_sql_stats_compaction.currently_paused,gauge,Number of auto_sql_stats_compaction jobs currently considered Paused +cockroachdb,jobs.auto_sql_stats_compaction.currently_running,gauge,Number of auto_sql_stats_compaction jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.auto_sql_stats_compaction.expired_pts_records.count,count,Number of expired protected timestamp records owned by auto_sql_stats_compaction jobs +cockroachdb,jobs.auto_sql_stats_compaction.fail_or_cancel_completed.count,count,Number of auto_sql_stats_compaction jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.auto_sql_stats_compaction.fail_or_cancel_failed.count,count,Number of auto_sql_stats_compaction jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.auto_sql_stats_compaction.fail_or_cancel_retry_error.count,count,Number of auto_sql_stats_compaction jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.auto_sql_stats_compaction.protected_age_sec,gauge,"The age of the oldest PTS record protected by auto_sql_stats_compaction jobs +Shown as second" +cockroachdb,jobs.auto_sql_stats_compaction.protected_record_count,gauge,Number of protected timestamp records held by auto_sql_stats_compaction jobs +cockroachdb,jobs.auto_sql_stats_compaction.resume_completed.count,count,Number of auto_sql_stats_compaction jobs which successfully resumed to completion +cockroachdb,jobs.auto_sql_stats_compaction.resume_failed.count,count,Number of auto_sql_stats_compaction jobs which failed with a non-retriable error +cockroachdb,jobs.auto_sql_stats_compaction.resume_retry_error.count,count,Number of auto_sql_stats_compaction jobs which failed with a retriable error +cockroachdb,jobs.auto_update_sql_activity.currently_idle,gauge,Number of auto_update_sql_activity jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.auto_update_sql_activity.currently_paused,gauge,Number of auto_update_sql_activity jobs currently considered Paused +cockroachdb,jobs.auto_update_sql_activity.currently_running,gauge,Number of auto_update_sql_activity jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.auto_update_sql_activity.expired_pts_records.count,count,Number of expired protected timestamp records owned by auto_update_sql_activity jobs +cockroachdb,jobs.auto_update_sql_activity.fail_or_cancel_completed.count,count,Number of auto_update_sql_activity jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.auto_update_sql_activity.fail_or_cancel_failed.count,count,Number of auto_update_sql_activity jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.auto_update_sql_activity.fail_or_cancel_retry_error.count,count,Number of auto_update_sql_activity jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.auto_update_sql_activity.protected_age_sec,gauge,"The age of the oldest PTS record protected by auto_update_sql_activity jobs +Shown as second" +cockroachdb,jobs.auto_update_sql_activity.protected_record_count,gauge,Number of protected timestamp records held by auto_update_sql_activity jobs +cockroachdb,jobs.auto_update_sql_activity.resume_completed.count,count,Number of auto_update_sql_activity jobs which successfully resumed to completion +cockroachdb,jobs.auto_update_sql_activity.resume_failed.count,count,Number of auto_update_sql_activity jobs which failed with a non-retriable error +cockroachdb,jobs.auto_update_sql_activity.resume_retry_error.count,count,Number of auto_update_sql_activity jobs which failed with a retriable error +cockroachdb,jobs.backup.currently_idle,gauge,"[OpenMetrics v1 & v2] Number of backup jobs currently considered Idle and can be freely shut down +Shown as job" +cockroachdb,jobs.backup.currently_paused,gauge,Number of backup jobs currently considered Paused +cockroachdb,jobs.backup.currently_running,gauge,"[OpenMetrics v1 & v2] Number of backup jobs currently running in Resume or OnFailOrCancel state +Shown as job" +cockroachdb,jobs.backup.expired_pts_records.count,count,Number of expired protected timestamp records owned by backup jobs +cockroachdb,jobs.backup.fail_or_cancel_completed.count,count,Number of backup jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.backup.fail_or_cancel_failed,count,"[OpenMetrics v1] Number of backup jobs which failed with a non-retriable error on their failure or cancelation process +Shown as job" +cockroachdb,jobs.backup.fail_or_cancel_failed.count,count,"[OpenMetrics v2] Number of backup jobs which failed with a non-retriable error on their failure or cancelation process +Shown as job" +cockroachdb,jobs.backup.fail_or_cancel_retry_error,count,"[OpenMetrics v1] Number of backup jobs which failed with a retriable error on their failure or cancelation process +Shown as job" +cockroachdb,jobs.backup.fail_or_cancel_retry_error.count,count,"[OpenMetrics v2] Number of backup jobs which failed with a retriable error on their failure or cancelation process +Shown as job" +cockroachdb,jobs.backup.protected_age_sec,gauge,"The age of the oldest PTS record protected by backup jobs +Shown as second" +cockroachdb,jobs.backup.protected_record_count,gauge,Number of protected timestamp records held by backup jobs +cockroachdb,jobs.backup.resume_completed.count,count,Number of backup jobs which successfully resumed to completion +cockroachdb,jobs.backup.resume_failed,count,"[OpenMetrics v1] Number of backup jobs which failed with a non-retriable error +Shown as job" +cockroachdb,jobs.backup.resume_failed.count,count,"[OpenMetrics v2] Number of backup jobs which failed with a non-retriable error +Shown as job" +cockroachdb,jobs.backup.resume_retry_error,count,"[OpenMetrics v1] Number of backup jobs which failed with a retriable error +Shown as job" +cockroachdb,jobs.backup.resume_retry_error.count,count,"[OpenMetrics v2] Number of backup jobs which failed with a retriable error +Shown as job" +cockroachdb,jobs.changefeed.currently_idle,gauge,Number of changefeed jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.changefeed.currently_paused,gauge,Number of changefeed jobs currently considered Paused +cockroachdb,jobs.changefeed.currently_running,gauge,Number of changefeed jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.changefeed.expired_pts_records.count,count,Number of expired protected timestamp records owned by changefeed jobs +cockroachdb,jobs.changefeed.fail_or_cancel_completed.count,count,Number of changefeed jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.changefeed.fail_or_cancel_failed.count,count,Number of changefeed jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.changefeed.fail_or_cancel_retry_error.count,count,Number of changefeed jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.changefeed.protected_age_sec,gauge,"The age of the oldest PTS record protected by changefeed jobs +Shown as second" +cockroachdb,jobs.changefeed.protected_record_count,gauge,Number of protected timestamp records held by changefeed jobs +cockroachdb,jobs.changefeed.resume.retry.error,count,[OpenMetrics v1] Number of changefeed jobs which failed with a retriable error +cockroachdb,jobs.changefeed.resume.retry.error.count,count,[OpenMetrics v2] Number of changefeed jobs which failed with a retriable error +cockroachdb,jobs.changefeed.resume_completed.count,count,Number of changefeed jobs which successfully resumed to completion +cockroachdb,jobs.changefeed.resume_failed.count,count,Number of changefeed jobs which failed with a non-retriable error +cockroachdb,jobs.changefeed.resume_retry_error.count,count,Number of changefeed jobs which failed with a retriable error +cockroachdb,jobs.claimed_jobs.count,count,number of jobs claimed in job-adopt iterations +cockroachdb,jobs.create.stats.currently_running,gauge,Number of create_stats jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.create_stats.currently_idle,gauge,Number of create_stats jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.create_stats.currently_paused,gauge,Number of create_stats jobs currently considered Paused +cockroachdb,jobs.create_stats.currently_running,gauge,Number of create_stats jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.create_stats.expired_pts_records.count,count,Number of expired protected timestamp records owned by create_stats jobs +cockroachdb,jobs.create_stats.fail_or_cancel_completed.count,count,Number of create_stats jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.create_stats.fail_or_cancel_failed.count,count,Number of create_stats jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.create_stats.fail_or_cancel_retry_error.count,count,Number of create_stats jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.create_stats.protected_age_sec,gauge,"The age of the oldest PTS record protected by create_stats jobs +Shown as second" +cockroachdb,jobs.create_stats.protected_record_count,gauge,Number of protected timestamp records held by create_stats jobs +cockroachdb,jobs.create_stats.resume_completed.count,count,Number of create_stats jobs which successfully resumed to completion +cockroachdb,jobs.create_stats.resume_failed.count,count,Number of create_stats jobs which failed with a non-retriable error +cockroachdb,jobs.create_stats.resume_retry_error.count,count,Number of create_stats jobs which failed with a retriable error +cockroachdb,jobs.import.currently_idle,gauge,Number of import jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.import.currently_paused,gauge,Number of import jobs currently considered Paused +cockroachdb,jobs.import.currently_running,gauge,Number of import jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.import.expired_pts_records.count,count,Number of expired protected timestamp records owned by import jobs +cockroachdb,jobs.import.fail_or_cancel_completed.count,count,Number of import jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.import.fail_or_cancel_failed.count,count,Number of import jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.import.fail_or_cancel_retry_error.count,count,Number of import jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.import.protected_age_sec,gauge,"The age of the oldest PTS record protected by import jobs +Shown as second" +cockroachdb,jobs.import.protected_record_count,gauge,Number of protected timestamp records held by import jobs +cockroachdb,jobs.import.resume_completed.count,count,Number of import jobs which successfully resumed to completion +cockroachdb,jobs.import.resume_failed.count,count,Number of import jobs which failed with a non-retriable error +cockroachdb,jobs.import.resume_retry_error.count,count,Number of import jobs which failed with a retriable error +cockroachdb,jobs.key_visualizer.currently_idle,gauge,Number of key_visualizer jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.key_visualizer.currently_paused,gauge,Number of key_visualizer jobs currently considered Paused +cockroachdb,jobs.key_visualizer.currently_running,gauge,Number of key_visualizer jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.key_visualizer.expired_pts_records.count,count,Number of expired protected timestamp records owned by key_visualizer jobs +cockroachdb,jobs.key_visualizer.fail_or_cancel_completed.count,count,Number of key_visualizer jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.key_visualizer.fail_or_cancel_failed.count,count,Number of key_visualizer jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.key_visualizer.fail_or_cancel_retry_error.count,count,Number of key_visualizer jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.key_visualizer.protected_age_sec,gauge,"The age of the oldest PTS record protected by key_visualizer jobs +Shown as second" +cockroachdb,jobs.key_visualizer.protected_record_count,gauge,Number of protected timestamp records held by key_visualizer jobs +cockroachdb,jobs.key_visualizer.resume_completed.count,count,Number of key_visualizer jobs which successfully resumed to completion +cockroachdb,jobs.key_visualizer.resume_failed.count,count,Number of key_visualizer jobs which failed with a non-retriable error +cockroachdb,jobs.key_visualizer.resume_retry_error.count,count,Number of key_visualizer jobs which failed with a retriable error +cockroachdb,jobs.metrics.task_failed.count,count,Number of metrics sql activity updater tasks that failed +cockroachdb,jobs.migration.currently_idle,gauge,Number of migration jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.migration.currently_paused,gauge,Number of migration jobs currently considered Paused +cockroachdb,jobs.migration.currently_running,gauge,Number of migration jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.migration.expired_pts_records.count,count,Number of expired protected timestamp records owned by migration jobs +cockroachdb,jobs.migration.fail_or_cancel_completed.count,count,Number of migration jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.migration.fail_or_cancel_failed.count,count,Number of migration jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.migration.fail_or_cancel_retry_error.count,count,Number of migration jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.migration.protected_age_sec,gauge,"The age of the oldest PTS record protected by migration jobs +Shown as second" +cockroachdb,jobs.migration.protected_record_count,gauge,Number of protected timestamp records held by migration jobs +cockroachdb,jobs.migration.resume_completed.count,count,Number of migration jobs which successfully resumed to completion +cockroachdb,jobs.migration.resume_failed.count,count,Number of migration jobs which failed with a non-retriable error +cockroachdb,jobs.migration.resume_retry_error.count,count,Number of migration jobs which failed with a retriable error +cockroachdb,jobs.mvcc_statistics_update.currently_idle,gauge,Number of mvcc_statistics_update jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.mvcc_statistics_update.currently_paused,gauge,Number of mvcc_statistics_update jobs currently considered Paused +cockroachdb,jobs.mvcc_statistics_update.currently_running,gauge,Number of mvcc_statistics_update jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.mvcc_statistics_update.expired_pts_records.count,count,Number of expired protected timestamp records owned by mvcc_statistics_update jobs +cockroachdb,jobs.mvcc_statistics_update.fail_or_cancel_completed.count,count,Number of mvcc_statistics_update jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.mvcc_statistics_update.fail_or_cancel_failed.count,count,Number of mvcc_statistics_update jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.mvcc_statistics_update.fail_or_cancel_retry_error.count,count,Number of mvcc_statistics_update jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.mvcc_statistics_update.protected_age_sec,gauge,"The age of the oldest PTS record protected by mvcc_statistics_update jobs +Shown as second" +cockroachdb,jobs.mvcc_statistics_update.protected_record_count,gauge,Number of protected timestamp records held by mvcc_statistics_update jobs +cockroachdb,jobs.mvcc_statistics_update.resume_completed.count,count,Number of mvcc_statistics_update jobs which successfully resumed to completion +cockroachdb,jobs.mvcc_statistics_update.resume_failed.count,count,Number of mvcc_statistics_update jobs which failed with a non-retriable error +cockroachdb,jobs.mvcc_statistics_update.resume_retry_error.count,count,Number of mvcc_statistics_update jobs which failed with a retriable error +cockroachdb,jobs.new_schema_change.currently_idle,gauge,Number of new_schema_change jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.new_schema_change.currently_paused,gauge,Number of new_schema_change jobs currently considered Paused +cockroachdb,jobs.new_schema_change.currently_running,gauge,Number of new_schema_change jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.new_schema_change.expired_pts_records.count,count,Number of expired protected timestamp records owned by new_schema_change jobs +cockroachdb,jobs.new_schema_change.fail_or_cancel_completed.count,count,Number of new_schema_change jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.new_schema_change.fail_or_cancel_failed.count,count,Number of new_schema_change jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.new_schema_change.fail_or_cancel_retry_error.count,count,Number of new_schema_change jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.new_schema_change.protected_age_sec,gauge,"The age of the oldest PTS record protected by new_schema_change jobs +Shown as second" +cockroachdb,jobs.new_schema_change.protected_record_count,gauge,Number of protected timestamp records held by new_schema_change jobs +cockroachdb,jobs.new_schema_change.resume_completed.count,count,Number of new_schema_change jobs which successfully resumed to completion +cockroachdb,jobs.new_schema_change.resume_failed.count,count,Number of new_schema_change jobs which failed with a non-retriable error +cockroachdb,jobs.new_schema_change.resume_retry_error.count,count,Number of new_schema_change jobs which failed with a retriable error +cockroachdb,jobs.poll_jobs_stats.currently_idle,gauge,Number of poll_jobs_stats jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.poll_jobs_stats.currently_paused,gauge,Number of poll_jobs_stats jobs currently considered Paused +cockroachdb,jobs.poll_jobs_stats.currently_running,gauge,Number of poll_jobs_stats jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.poll_jobs_stats.expired_pts_records.count,count,Number of expired protected timestamp records owned by poll_jobs_stats jobs +cockroachdb,jobs.poll_jobs_stats.fail_or_cancel_completed.count,count,Number of poll_jobs_stats jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.poll_jobs_stats.fail_or_cancel_failed.count,count,Number of poll_jobs_stats jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.poll_jobs_stats.fail_or_cancel_retry_error.count,count,Number of poll_jobs_stats jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.poll_jobs_stats.protected_age_sec,gauge,"The age of the oldest PTS record protected by poll_jobs_stats jobs +Shown as second" +cockroachdb,jobs.poll_jobs_stats.protected_record_count,gauge,Number of protected timestamp records held by poll_jobs_stats jobs +cockroachdb,jobs.poll_jobs_stats.resume_completed.count,count,Number of poll_jobs_stats jobs which successfully resumed to completion +cockroachdb,jobs.poll_jobs_stats.resume_failed.count,count,Number of poll_jobs_stats jobs which failed with a non-retriable error +cockroachdb,jobs.poll_jobs_stats.resume_retry_error.count,count,Number of poll_jobs_stats jobs which failed with a retriable error +cockroachdb,jobs.replication_stream_ingestion.currently_idle,gauge,Number of replication_stream_ingestion jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.replication_stream_ingestion.currently_paused,gauge,Number of replication_stream_ingestion jobs currently considered Paused +cockroachdb,jobs.replication_stream_ingestion.currently_running,gauge,Number of replication_stream_ingestion jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.replication_stream_ingestion.expired_pts_records.count,count,Number of expired protected timestamp records owned by replication_stream_ingestion jobs +cockroachdb,jobs.replication_stream_ingestion.fail_or_cancel_completed.count,count,Number of replication_stream_ingestion jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.replication_stream_ingestion.fail_or_cancel_failed.count,count,Number of replication_stream_ingestion jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.replication_stream_ingestion.fail_or_cancel_retry_error.count,count,Number of replication_stream_ingestion jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.replication_stream_ingestion.protected_age_sec,gauge,"The age of the oldest PTS record protected by replication_stream_ingestion jobs +Shown as second" +cockroachdb,jobs.replication_stream_ingestion.protected_record_count,gauge,Number of protected timestamp records held by replication_stream_ingestion jobs +cockroachdb,jobs.replication_stream_ingestion.resume_completed.count,count,Number of replication_stream_ingestion jobs which successfully resumed to completion +cockroachdb,jobs.replication_stream_ingestion.resume_failed.count,count,Number of replication_stream_ingestion jobs which failed with a non-retriable error +cockroachdb,jobs.replication_stream_ingestion.resume_retry_error.count,count,Number of replication_stream_ingestion jobs which failed with a retriable error +cockroachdb,jobs.replication_stream_producer.currently_idle,gauge,Number of replication_stream_producer jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.replication_stream_producer.currently_paused,gauge,Number of replication_stream_producer jobs currently considered Paused +cockroachdb,jobs.replication_stream_producer.currently_running,gauge,Number of replication_stream_producer jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.replication_stream_producer.expired_pts_records.count,count,Number of expired protected timestamp records owned by replication_stream_producer jobs +cockroachdb,jobs.replication_stream_producer.fail_or_cancel_completed.count,count,Number of replication_stream_producer jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.replication_stream_producer.fail_or_cancel_failed.count,count,Number of replication_stream_producer jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.replication_stream_producer.fail_or_cancel_retry_error.count,count,Number of replication_stream_producer jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.replication_stream_producer.protected_age_sec,gauge,"The age of the oldest PTS record protected by replication_stream_producer jobs +Shown as second" +cockroachdb,jobs.replication_stream_producer.protected_record_count,gauge,Number of protected timestamp records held by replication_stream_producer jobs +cockroachdb,jobs.replication_stream_producer.resume_completed.count,count,Number of replication_stream_producer jobs which successfully resumed to completion +cockroachdb,jobs.replication_stream_producer.resume_failed.count,count,Number of replication_stream_producer jobs which failed with a non-retriable error +cockroachdb,jobs.replication_stream_producer.resume_retry_error.count,count,Number of replication_stream_producer jobs which failed with a retriable error +cockroachdb,jobs.restore.currently_idle,gauge,Number of restore jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.restore.currently_paused,gauge,Number of restore jobs currently considered Paused +cockroachdb,jobs.restore.currently_running,gauge,Number of restore jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.restore.expired_pts_records.count,count,Number of expired protected timestamp records owned by restore jobs +cockroachdb,jobs.restore.fail_or_cancel_completed.count,count,Number of restore jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.restore.fail_or_cancel_failed.count,count,Number of restore jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.restore.fail_or_cancel_retry_error.count,count,Number of restore jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.restore.protected_age_sec,gauge,"The age of the oldest PTS record protected by restore jobs +Shown as second" +cockroachdb,jobs.restore.protected_record_count,gauge,Number of protected timestamp records held by restore jobs +cockroachdb,jobs.restore.resume_completed.count,count,Number of restore jobs which successfully resumed to completion +cockroachdb,jobs.restore.resume_failed.count,count,Number of restore jobs which failed with a non-retriable error +cockroachdb,jobs.restore.resume_retry_error.count,count,Number of restore jobs which failed with a retriable error +cockroachdb,jobs.resumed_claimed_jobs.count,count,number of claimed-jobs resumed in job-adopt iterations +cockroachdb,jobs.row.level.ttl.currently_paused,gauge,Number of row_level_ttl jobs currently considered Paused +cockroachdb,jobs.row.level.ttl.currently_running,gauge,Number of row_level_ttl jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.row.level.ttl.resume_completed.count,count,Number of row_level_ttl jobs which successfully resumed to completion +cockroachdb,jobs.row.level.ttl.resume_failed.count,count,Number of row_level_ttl jobs which failed with a non-retriable error +cockroachdb,jobs.row.level.ttl.rows_deleted.count,count,Number of rows deleted by the row level TTL job. +cockroachdb,jobs.row.level.ttl.rows_selected.count,count,Number of rows selected for deletion by the row level TTL job. +cockroachdb,jobs.row_level_ttl.currently_idle,gauge,Number of row_level_ttl jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.row_level_ttl.currently_paused,gauge,Number of row_level_ttl jobs currently considered Paused +cockroachdb,jobs.row_level_ttl.currently_running,gauge,Number of row_level_ttl jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.row_level_ttl.delete_duration.bucket,count,"Duration for delete requests during row level TTL. +Shown as nanosecond" +cockroachdb,jobs.row_level_ttl.delete_duration.count,count,"Duration for delete requests during row level TTL. +Shown as nanosecond" +cockroachdb,jobs.row_level_ttl.delete_duration.sum,count,"Duration for delete requests during row level TTL. +Shown as nanosecond" +cockroachdb,jobs.row_level_ttl.expired_pts_records.count,count,Number of expired protected timestamp records owned by row_level_ttl jobs +cockroachdb,jobs.row_level_ttl.fail_or_cancel_completed.count,count,Number of row_level_ttl jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.row_level_ttl.fail_or_cancel_failed.count,count,Number of row_level_ttl jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.row_level_ttl.fail_or_cancel_retry_error.count,count,Number of row_level_ttl jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.row_level_ttl.num_active_spans,gauge,Number of active spans the TTL job is deleting from. +cockroachdb,jobs.row_level_ttl.protected_age_sec,gauge,"The age of the oldest PTS record protected by row_level_ttl jobs +Shown as second" +cockroachdb,jobs.row_level_ttl.protected_record_count,gauge,Number of protected timestamp records held by row_level_ttl jobs +cockroachdb,jobs.row_level_ttl.resume_completed.count,count,Number of row_level_ttl jobs which successfully resumed to completion +cockroachdb,jobs.row_level_ttl.resume_failed.count,count,Number of row_level_ttl jobs which failed with a non-retriable error +cockroachdb,jobs.row_level_ttl.resume_retry_error.count,count,Number of row_level_ttl jobs which failed with a retriable error +cockroachdb,jobs.row_level_ttl.rows_deleted.count,count,Number of rows deleted by the row level TTL job. +cockroachdb,jobs.row_level_ttl.rows_selected.count,count,Number of rows selected for deletion by the row level TTL job. +cockroachdb,jobs.row_level_ttl.select_duration.bucket,count,"Duration for select requests during row level TTL. +Shown as nanosecond" +cockroachdb,jobs.row_level_ttl.select_duration.count,count,"Duration for select requests during row level TTL. +Shown as nanosecond" +cockroachdb,jobs.row_level_ttl.select_duration.sum,count,"Duration for select requests during row level TTL. +Shown as nanosecond" +cockroachdb,jobs.row_level_ttl.span_total_duration.bucket,count,"Duration for processing a span during row level TTL. +Shown as nanosecond" +cockroachdb,jobs.row_level_ttl.span_total_duration.count,count,"Duration for processing a span during row level TTL. +Shown as nanosecond" +cockroachdb,jobs.row_level_ttl.span_total_duration.sum,count,"Duration for processing a span during row level TTL. +Shown as nanosecond" +cockroachdb,jobs.row_level_ttl.total_expired_rows,gauge,Approximate number of rows that have expired the TTL on the TTL table. +cockroachdb,jobs.row_level_ttl.total_rows,gauge,Approximate number of rows on the TTL table. +cockroachdb,jobs.running_non_idle,gauge,number of running jobs that are not idle +cockroachdb,jobs.schema_change.currently_idle,gauge,Number of schema_change jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.schema_change.currently_paused,gauge,Number of schema_change jobs currently considered Paused +cockroachdb,jobs.schema_change.currently_running,gauge,Number of schema_change jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.schema_change.expired_pts_records.count,count,Number of expired protected timestamp records owned by schema_change jobs +cockroachdb,jobs.schema_change.fail_or_cancel_completed.count,count,Number of schema_change jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.schema_change.fail_or_cancel_failed.count,count,Number of schema_change jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.schema_change.fail_or_cancel_retry_error.count,count,Number of schema_change jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.schema_change.protected_age_sec,gauge,"The age of the oldest PTS record protected by schema_change jobs +Shown as second" +cockroachdb,jobs.schema_change.protected_record_count,gauge,Number of protected timestamp records held by schema_change jobs +cockroachdb,jobs.schema_change.resume_completed.count,count,Number of schema_change jobs which successfully resumed to completion +cockroachdb,jobs.schema_change.resume_failed.count,count,Number of schema_change jobs which failed with a non-retriable error +cockroachdb,jobs.schema_change.resume_retry_error.count,count,Number of schema_change jobs which failed with a retriable error +cockroachdb,jobs.schema_change_gc.currently_idle,gauge,Number of schema_change_gc jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.schema_change_gc.currently_paused,gauge,Number of schema_change_gc jobs currently considered Paused +cockroachdb,jobs.schema_change_gc.currently_running,gauge,Number of schema_change_gc jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.schema_change_gc.expired_pts_records.count,count,Number of expired protected timestamp records owned by schema_change_gc jobs +cockroachdb,jobs.schema_change_gc.fail_or_cancel_completed.count,count,Number of schema_change_gc jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.schema_change_gc.fail_or_cancel_failed.count,count,Number of schema_change_gc jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.schema_change_gc.fail_or_cancel_retry_error.count,count,Number of schema_change_gc jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.schema_change_gc.protected_age_sec,gauge,"The age of the oldest PTS record protected by schema_change_gc jobs +Shown as second" +cockroachdb,jobs.schema_change_gc.protected_record_count,gauge,Number of protected timestamp records held by schema_change_gc jobs +cockroachdb,jobs.schema_change_gc.resume_completed.count,count,Number of schema_change_gc jobs which successfully resumed to completion +cockroachdb,jobs.schema_change_gc.resume_failed.count,count,Number of schema_change_gc jobs which failed with a non-retriable error +cockroachdb,jobs.schema_change_gc.resume_retry_error.count,count,Number of schema_change_gc jobs which failed with a retriable error +cockroachdb,jobs.typedesc_schema_change.currently_idle,gauge,Number of typedesc_schema_change jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.typedesc_schema_change.currently_paused,gauge,Number of typedesc_schema_change jobs currently considered Paused +cockroachdb,jobs.typedesc_schema_change.currently_running,gauge,Number of typedesc_schema_change jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.typedesc_schema_change.expired_pts_records.count,count,Number of expired protected timestamp records owned by typedesc_schema_change jobs +cockroachdb,jobs.typedesc_schema_change.fail_or_cancel_completed.count,count,Number of typedesc_schema_change jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.typedesc_schema_change.fail_or_cancel_failed.count,count,Number of typedesc_schema_change jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.typedesc_schema_change.fail_or_cancel_retry_error.count,count,Number of typedesc_schema_change jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.typedesc_schema_change.protected_age_sec,gauge,"The age of the oldest PTS record protected by typedesc_schema_change jobs +Shown as second" +cockroachdb,jobs.typedesc_schema_change.protected_record_count,gauge,Number of protected timestamp records held by typedesc_schema_change jobs +cockroachdb,jobs.typedesc_schema_change.resume_completed.count,count,Number of typedesc_schema_change jobs which successfully resumed to completion +cockroachdb,jobs.typedesc_schema_change.resume_failed.count,count,Number of typedesc_schema_change jobs which failed with a non-retriable error +cockroachdb,jobs.typedesc_schema_change.resume_retry_error.count,count,Number of typedesc_schema_change jobs which failed with a retriable error +cockroachdb,keybytes,gauge,"[OpenMetrics v1 & v2] Number of bytes taken up by keys +Shown as byte" +cockroachdb,keycount,gauge,"[OpenMetrics v1 & v2] Count of all keys +Shown as key" +cockroachdb,kv.allocator.load_based_lease_transfers.cannot_find_better_candidate.count,count,The number times the allocator determined that the lease was on the best possible replica +cockroachdb,kv.allocator.load_based_lease_transfers.delta_not_significant.count,count,The number times the allocator determined that the delta between the existing store and the best candidate was not significant +cockroachdb,kv.allocator.load_based_lease_transfers.existing_not_overfull.count,count,The number times the allocator determined that the lease was not on an overfull store +cockroachdb,kv.allocator.load_based_lease_transfers.follow_the_workload.count,count,The number times the allocator determined that the lease should be transferred to another replica for locality. +cockroachdb,kv.allocator.load_based_lease_transfers.missing_stats_for_existing_stores.count,count,The number times the allocator was missing qps stats for the leaseholder +cockroachdb,kv.allocator.load_based_lease_transfers.should_transfer.count,count,The number times the allocator determined that the lease should be transferred to another replica for better load distribution +cockroachdb,kv.allocator.load_based_replica_rebalancing.cannot_find_better_candidate.count,count,The number times the allocator determined that the range was on the best possible stores +cockroachdb,kv.allocator.load_based_replica_rebalancing.delta_not_significant.count,count,The number times the allocator determined that the delta between an existing store and the best replacement candidate was not high enough +cockroachdb,kv.allocator.load_based_replica_rebalancing.existing_not_overfull.count,count,The number times the allocator determined that none of the range’s replicas were on overfull stores +cockroachdb,kv.allocator.load_based_replica_rebalancing.missing_stats_for_existing_store.count,count,The number times the allocator was missing the qps stats for the existing store +cockroachdb,kv.allocator.load_based_replica_rebalancing.should_transfer.count,count,The number times the allocator determined that the replica should be rebalanced to another store for better load distribution +cockroachdb,kv.closed_timestamp.max_behind_nanos,gauge,"Largest latency between realtime and replica max closed timestamp +Shown as nanosecond" +cockroachdb,kv.concurrency.avg_lock_hold_duration_nanos,gauge,"Average lock hold duration across locks currently held in lock tables. Does not include replicated locks (intents) that are not held in memory +Shown as nanosecond" +cockroachdb,kv.concurrency.avg_lock_wait_duration_nanos,gauge,"Average lock wait duration across requests currently waiting in lock wait-queues +Shown as nanosecond" +cockroachdb,kv.concurrency.lock_wait_queue_waiters,gauge,Number of requests actively waiting in a lock wait-queue +cockroachdb,kv.concurrency.locks,gauge,Number of active locks held in lock tables. Does not include replicated locks (intents) that are not held in memory +cockroachdb,kv.concurrency.locks_with_wait_queues,gauge,Number of active locks held in lock tables with active wait-queues +cockroachdb,kv.concurrency.max_lock_hold_duration_nanos,gauge,"Maximum length of time any lock in a lock table is held. Does not include replicated locks (intents) that are not held in memory +Shown as nanosecond" +cockroachdb,kv.concurrency.max_lock_wait_duration_nanos,gauge,"Maximum lock wait duration across requests currently waiting in lock wait-queues +Shown as nanosecond" +cockroachdb,kv.concurrency.max_lock_wait_queue_waiters_for_lock,gauge,Maximum number of requests actively waiting in any single lock wait-queue +cockroachdb,kv.loadsplitter.nosplitkey.count,count,Load-based splitter could not find a split key. +cockroachdb,kv.loadsplitter.popularkey.count,count,Load-based splitter could not find a split key and the most popular sampled split key occurs in >= 25% of the samples. +cockroachdb,kv.prober.planning_attempts.count,count,Number of attempts at planning out probes made; in order to probe KV we need to plan out which ranges to probe; +cockroachdb,kv.prober.planning_failures.count,count,"Number of attempts at planning out probes that failed; in order to probe KV we need to plan out which ranges to probe; if planning fails, then kvprober is not able to send probes to all ranges; consider alerting on this metric as a result" +cockroachdb,kv.prober.read.attempts.count,count,"Number of attempts made to read probe KV, regardless of outcome" +cockroachdb,kv.prober.read.failures.count,count,"Number of attempts made to read probe KV that failed, whether due to error or timeout" +cockroachdb,kv.prober.read.latency.bucket,count,"Latency of successful KV read probes +Shown as nanosecond" +cockroachdb,kv.prober.read.latency.count,count,"Latency of successful KV read probes +Shown as nanosecond" +cockroachdb,kv.prober.read.latency.sum,count,"Latency of successful KV read probes +Shown as nanosecond" +cockroachdb,kv.prober.write.attempts.count,count,"Number of attempts made to write probe KV, regardless of outcome" +cockroachdb,kv.prober.write.failures.count,count,"Number of attempts made to write probe KV that failed, whether due to error or timeout" +cockroachdb,kv.prober.write.latency.bucket,count,"Latency of successful KV write probes +Shown as nanosecond" +cockroachdb,kv.prober.write.latency.count,count,"Latency of successful KV write probes +Shown as nanosecond" +cockroachdb,kv.prober.write.latency.sum,count,"Latency of successful KV write probes +Shown as nanosecond" +cockroachdb,kv.prober.write.quarantine.oldest_duration,gauge,"The duration that the oldest range in the write quarantine pool has remained +Shown as second" +cockroachdb,kv.protectedts.reconciliation.errors.count,count,number of errors encountered during reconciliation runs on this node +cockroachdb,kv.protectedts.reconciliation.num_runs.count,count,number of successful reconciliation runs on this node +cockroachdb,kv.protectedts.reconciliation.records_processed.count,count,number of records processed without error during reconciliation on this node +cockroachdb,kv.protectedts.reconciliation.records_removed.count,count,number of records removed during reconciliation runs on this node +cockroachdb,kv.rangefeed.budget_allocation_blocked.count,count,Number of times RangeFeed waited for budget availability +cockroachdb,kv.rangefeed.budget_allocation_failed.count,count,Number of times RangeFeed failed because memory budget was exceeded +cockroachdb,kv.rangefeed.catchup_scan_nanos.count,count,"Time spent in RangeFeed catchup scan +Shown as nanosecond" +cockroachdb,kv.rangefeed.mem_shared,gauge,"Memory usage by rangefeeds +Shown as byte" +cockroachdb,kv.rangefeed.mem_system,gauge,"Memory usage by rangefeeds on system ranges +Shown as byte" +cockroachdb,kv.rangefeed.processors_goroutine,gauge,Number of active RangeFeed processors using goroutines +cockroachdb,kv.rangefeed.processors_scheduler,gauge,Number of active RangeFeed processors using scheduler +cockroachdb,kv.rangefeed.registrations,gauge,Number of active RangeFeed registrations +cockroachdb,kv.rangefeed.scheduler.normal.latency.bucket,count,"KV RangeFeed normal scheduler latency +Shown as nanosecond" +cockroachdb,kv.rangefeed.scheduler.normal.latency.count,count,"KV RangeFeed normal scheduler latency +Shown as nanosecond" +cockroachdb,kv.rangefeed.scheduler.normal.latency.sum,count,"KV RangeFeed normal scheduler latency +Shown as nanosecond" +cockroachdb,kv.rangefeed.scheduler.normal.queue_size,gauge,Number of entries in the KV RangeFeed normal scheduler queue +cockroachdb,kv.rangefeed.scheduler.system.latency.bucket,count,"KV RangeFeed system scheduler latency +Shown as nanosecond" +cockroachdb,kv.rangefeed.scheduler.system.latency.count,count,"KV RangeFeed system scheduler latency +Shown as nanosecond" +cockroachdb,kv.rangefeed.scheduler.system.latency.sum,count,"KV RangeFeed system scheduler latency +Shown as nanosecond" +cockroachdb,kv.rangefeed.scheduler.system.queue_size,gauge,Number of entries in the KV RangeFeed system scheduler queue +cockroachdb,kv.replica_circuit_breaker.num_tripped_events.count,count,Number of times the per-Replica circuit breakers tripped since process start. +cockroachdb,kv.replica_circuit_breaker.num_tripped_replicas,gauge,"Number of Replicas for which the per-Replica circuit breaker is currently tripped.A nonzero value indicates range or replica unavailability, and should be investigated.Replicas in this state will fail-fast all inbound requests." +cockroachdb,kv.replica_read_batch_evaluate.dropped_latches_before_eval.count,count,Number of times read-only batches dropped latches before evaluation. +cockroachdb,kv.replica_read_batch_evaluate.latency.bucket,count,"Execution duration for evaluating a BatchRequest on the read-only path after latches have been acquired.A measurement is recorded regardless of outcome (i.e. also in case of an error). If internal retries occur, each instance is recorded separately. +Shown as nanosecond" +cockroachdb,kv.replica_read_batch_evaluate.latency.count,count,"Execution duration for evaluating a BatchRequest on the read-only path after latches have been acquired.A measurement is recorded regardless of outcome (i.e. also in case of an error). If internal retries occur, each instance is recorded separately. +Shown as nanosecond" +cockroachdb,kv.replica_read_batch_evaluate.latency.sum,count,"Execution duration for evaluating a BatchRequest on the read-only path after latches have been acquired.A measurement is recorded regardless of outcome (i.e. also in case of an error). If internal retries occur, each instance is recorded separately. +Shown as nanosecond" +cockroachdb,kv.replica_read_batch_evaluate.without_interleaving_iter.count,count,Number of read-only batches evaluated without an intent interleaving iter. +cockroachdb,kv.replica_write_batch_evaluate.latency.bucket,count,"Execution duration for evaluating a BatchRequest on the read-write path after latches have been acquired.A measurement is recorded regardless of outcome (i.e. also in case of an error). If internal retries occur, each instance is recorded separately.Note that the measurement does not include the duration for replicating the evaluated command. +Shown as nanosecond" +cockroachdb,kv.replica_write_batch_evaluate.latency.count,count,"Execution duration for evaluating a BatchRequest on the read-write path after latches have been acquired.A measurement is recorded regardless of outcome (i.e. also in case of an error). If internal retries occur, each instance is recorded separately.Note that the measurement does not include the duration for replicating the evaluated command. +Shown as nanosecond" +cockroachdb,kv.replica_write_batch_evaluate.latency.sum,count,"Execution duration for evaluating a BatchRequest on the read-write path after latches have been acquired.A measurement is recorded regardless of outcome (i.e. also in case of an error). If internal retries occur, each instance is recorded separately.Note that the measurement does not include the duration for replicating the evaluated command. +Shown as nanosecond" +cockroachdb,kv.tenant_rate_limit.current_blocked,gauge,Number of requests currently blocked by the rate limiter +cockroachdb,kv.tenant_rate_limit.num_tenants,gauge,Number of tenants currently being tracked +cockroachdb,kv.tenant_rate_limit.read_batches_admitted.count,count,Number of read batches admitted by the rate limiter +cockroachdb,kv.tenant_rate_limit.read_bytes_admitted.count,count,"Number of read bytes admitted by the rate limiter +Shown as byte" +cockroachdb,kv.tenant_rate_limit.read_requests_admitted.count,count,Number of read requests admitted by the rate limiter +cockroachdb,kv.tenant_rate_limit.write_batches_admitted.count,count,Number of write batches admitted by the rate limiter +cockroachdb,kv.tenant_rate_limit.write_bytes_admitted.count,count,"Number of write bytes admitted by the rate limiter +Shown as byte" +cockroachdb,kv.tenant_rate_limit.write_requests_admitted.count,count,Number of write requests admitted by the rate limiter +cockroachdb,kvadmission.flow_controller.elastic_blocked_stream_count,gauge,Number of replication streams with no flow tokens available for elastic requests +cockroachdb,kvadmission.flow_controller.elastic_requests_admitted.count,count,Number of elastic requests admitted by the flow controller +cockroachdb,kvadmission.flow_controller.elastic_requests_bypassed.count,count,Number of elastic waiting requests that bypassed the flow controller due to disconnecting streams +cockroachdb,kvadmission.flow_controller.elastic_requests_errored.count,count,Number of elastic requests that errored out while waiting for flow tokens +cockroachdb,kvadmission.flow_controller.elastic_requests_waiting,gauge,Number of elastic requests waiting for flow tokens +cockroachdb,kvadmission.flow_controller.elastic_stream_count,gauge,Total number of replication streams for elastic requests +cockroachdb,kvadmission.flow_controller.elastic_tokens_available,gauge,"Flow tokens available for elastic requests, across all replication streams +Shown as byte" +cockroachdb,kvadmission.flow_controller.elastic_tokens_deducted.count,count,"Flow tokens deducted by elastic requests, across all replication streams +Shown as byte" +cockroachdb,kvadmission.flow_controller.elastic_tokens_returned.count,count,"Flow tokens returned by elastic requests, across all replication streams +Shown as byte" +cockroachdb,kvadmission.flow_controller.elastic_tokens_unaccounted.count,count,"Flow tokens returned by elastic requests that were unaccounted for, across all replication streams +Shown as byte" +cockroachdb,kvadmission.flow_controller.elastic_wait_duration.bucket,count,"Latency histogram for time elastic requests spent waiting for flow tokens +Shown as nanosecond" +cockroachdb,kvadmission.flow_controller.elastic_wait_duration.count,count,"Latency histogram for time elastic requests spent waiting for flow tokens +Shown as nanosecond" +cockroachdb,kvadmission.flow_controller.elastic_wait_duration.sum,count,"Latency histogram for time elastic requests spent waiting for flow tokens +Shown as nanosecond" +cockroachdb,kvadmission.flow_controller.regular_blocked_stream_count,gauge,Number of replication streams with no flow tokens available for regular requests +cockroachdb,kvadmission.flow_controller.regular_requests_admitted.count,count,Number of regular requests admitted by the flow controller +cockroachdb,kvadmission.flow_controller.regular_requests_bypassed.count,count,Number of regular waiting requests that bypassed the flow controller due to disconnecting streams +cockroachdb,kvadmission.flow_controller.regular_requests_errored.count,count,Number of regular requests that errored out while waiting for flow tokens +cockroachdb,kvadmission.flow_controller.regular_requests_waiting,gauge,Number of regular requests waiting for flow tokens +cockroachdb,kvadmission.flow_controller.regular_stream_count,gauge,Total number of replication streams for regular requests +cockroachdb,kvadmission.flow_controller.regular_tokens_available,gauge,"Flow tokens available for regular requests, across all replication streams +Shown as byte" +cockroachdb,kvadmission.flow_controller.regular_tokens_deducted.count,count,"Flow tokens deducted by regular requests, across all replication streams +Shown as byte" +cockroachdb,kvadmission.flow_controller.regular_tokens_returned.count,count,"Flow tokens returned by regular requests, across all replication streams +Shown as byte" +cockroachdb,kvadmission.flow_controller.regular_tokens_unaccounted.count,count,"Flow tokens returned by regular requests that were unaccounted for, across all replication streams +Shown as byte" +cockroachdb,kvadmission.flow_controller.regular_wait_duration.bucket,count,"Latency histogram for time regular requests spent waiting for flow tokens +Shown as nanosecond" +cockroachdb,kvadmission.flow_controller.regular_wait_duration.count,count,"Latency histogram for time regular requests spent waiting for flow tokens +Shown as nanosecond" +cockroachdb,kvadmission.flow_controller.regular_wait_duration.sum,count,"Latency histogram for time regular requests spent waiting for flow tokens +Shown as nanosecond" +cockroachdb,kvadmission.flow_handle.elastic_requests_admitted.count,count,Number of elastic requests admitted by the flow handle +cockroachdb,kvadmission.flow_handle.elastic_requests_errored.count,count,"Number of elastic requests that errored out while waiting for flow tokens, at the handle level" +cockroachdb,kvadmission.flow_handle.elastic_requests_waiting,gauge,"Number of elastic requests waiting for flow tokens, at the handle level" +cockroachdb,kvadmission.flow_handle.elastic_wait_duration.bucket,count,"Latency histogram for time elastic requests spent waiting for flow tokens, at the handle level +Shown as nanosecond" +cockroachdb,kvadmission.flow_handle.elastic_wait_duration.count,count,"Latency histogram for time elastic requests spent waiting for flow tokens, at the handle level +Shown as nanosecond" +cockroachdb,kvadmission.flow_handle.elastic_wait_duration.sum,count,"Latency histogram for time elastic requests spent waiting for flow tokens, at the handle level +Shown as nanosecond" +cockroachdb,kvadmission.flow_handle.regular_requests_admitted.count,count,Number of regular requests admitted by the flow handle +cockroachdb,kvadmission.flow_handle.regular_requests_errored.count,count,"Number of regular requests that errored out while waiting for flow tokens, at the handle level" +cockroachdb,kvadmission.flow_handle.regular_requests_waiting,gauge,"Number of regular requests waiting for flow tokens, at the handle level" +cockroachdb,kvadmission.flow_handle.regular_wait_duration.bucket,count,"Latency histogram for time regular requests spent waiting for flow tokens, at the handle level +Shown as nanosecond" +cockroachdb,kvadmission.flow_handle.regular_wait_duration.count,count,"Latency histogram for time regular requests spent waiting for flow tokens, at the handle level +Shown as nanosecond" +cockroachdb,kvadmission.flow_handle.regular_wait_duration.sum,count,"Latency histogram for time regular requests spent waiting for flow tokens, at the handle level +Shown as nanosecond" +cockroachdb,kvadmission.flow_handle.streams_connected.count,count,"Number of times we’ve connected to a stream, at the handle level" +cockroachdb,kvadmission.flow_handle.streams_disconnected.count,count,"Number of times we’ve disconnected from a stream, at the handle level" +cockroachdb,kvadmission.flow_token_dispatch.coalesced_elastic.count,count,Number of coalesced elastic flow token dispatches (where we’re informing the sender of a higher log entry being admitted) +cockroachdb,kvadmission.flow_token_dispatch.coalesced_regular.count,count,Number of coalesced regular flow token dispatches (where we’re informing the sender of a higher log entry being admitted) +cockroachdb,kvadmission.flow_token_dispatch.local_elastic.count,count,Number of local elastic flow token dispatches +cockroachdb,kvadmission.flow_token_dispatch.local_regular.count,count,Number of local regular flow token dispatches +cockroachdb,kvadmission.flow_token_dispatch.pending_elastic,gauge,Number of pending elastic flow token dispatches +cockroachdb,kvadmission.flow_token_dispatch.pending_nodes,gauge,Number of nodes pending flow token dispatches +cockroachdb,kvadmission.flow_token_dispatch.pending_regular,gauge,Number of pending regular flow token dispatches +cockroachdb,kvadmission.flow_token_dispatch.remote_elastic.count,count,Number of remote elastic flow token dispatches +cockroachdb,kvadmission.flow_token_dispatch.remote_regular.count,count,Number of remote regular flow token dispatches +cockroachdb,lastupdatenanos,gauge,"[OpenMetrics v1 & v2] Time in nanoseconds since Unix epoch at which bytes/keys/intents metrics were last updated +Shown as nanosecond" +cockroachdb,leases.epoch,gauge,[OpenMetrics v1 & v2] Number of replica leaseholders using epoch-based leases +cockroachdb,leases.error,count,"[OpenMetrics v1] Number of failed lease requests +Shown as request" +cockroachdb,leases.error.count,count,"[OpenMetrics v2] Number of failed lease requests +Shown as request" +cockroachdb,leases.expiration,gauge,[OpenMetrics v1 & v2] Number of replica leaseholders using expiration-based leases +cockroachdb,leases.liveness,gauge,Number of replica leaseholders for the liveness range(s) +cockroachdb,leases.preferences.less_preferred,gauge,Number of replica leaseholders which satisfy a lease preference which is not the most preferred +cockroachdb,leases.preferences.violating,gauge,Number of replica leaseholders which violate lease preferences +cockroachdb,leases.requests.latency.bucket,count,"Lease request latency (all types and outcomes, coalesced) +Shown as nanosecond" +cockroachdb,leases.requests.latency.count,count,"Lease request latency (all types and outcomes, coalesced) +Shown as nanosecond" +cockroachdb,leases.requests.latency.sum,count,"Lease request latency (all types and outcomes, coalesced) +Shown as nanosecond" +cockroachdb,leases.success,count,"[OpenMetrics v1] Number of successful lease requests +Shown as request" +cockroachdb,leases.success.count,count,"[OpenMetrics v2] Number of successful lease requests +Shown as request" +cockroachdb,leases.transfers.error,count,[OpenMetrics v1] Number of failed lease transfers +cockroachdb,leases.transfers.error.count,count,[OpenMetrics v2] Number of failed lease transfers +cockroachdb,leases.transfers.success,count,[OpenMetrics v1] Number of successful lease transfers +cockroachdb,leases.transfers.success.count,count,[OpenMetrics v2] Number of successful lease transfers +cockroachdb,livebytes,gauge,"[OpenMetrics v1 & v2] Number of bytes of live data (keys plus values) +Shown as byte" +cockroachdb,livecount,gauge,"[OpenMetrics v1 & v2] Count of live keys +Shown as key" +cockroachdb,liveness.epochincrements,count,[OpenMetrics v1] Number of times this node has incremented its liveness epoch +cockroachdb,liveness.epochincrements.count,count,[OpenMetrics v2] Number of times this node has incremented its liveness epoch +cockroachdb,liveness.heartbeatfailures,count,[OpenMetrics v1] Number of failed node liveness heartbeats from this node +cockroachdb,liveness.heartbeatfailures.count,count,[OpenMetrics v2] Number of failed node liveness heartbeats from this node +cockroachdb,liveness.heartbeatlatency,gauge,"[OpenMetrics v1] Node liveness heartbeat latency in nanoseconds +Shown as nanosecond" +cockroachdb,liveness.heartbeatlatency.bucket,count,"[OpenMetrics v2] Node liveness heartbeat latency in nanoseconds +Shown as nanosecond" +cockroachdb,liveness.heartbeatlatency.count,count,"[OpenMetrics v2] Node liveness heartbeat latency in nanoseconds +Shown as nanosecond" +cockroachdb,liveness.heartbeatlatency.sum,count,"[OpenMetrics v2] Node liveness heartbeat latency in nanoseconds +Shown as nanosecond" +cockroachdb,liveness.heartbeatsinflight,gauge,Number of in-flight liveness heartbeats from this node +cockroachdb,liveness.heartbeatsuccesses,count,[OpenMetrics v1] Number of successful node liveness heartbeats from this node +cockroachdb,liveness.heartbeatsuccesses.count,count,[OpenMetrics v2] Number of successful node liveness heartbeats from this node +cockroachdb,liveness.livenodes,gauge,[OpenMetrics v1 & v2] Number of live nodes in the cluster (will be 0 if this node is not itself live) +cockroachdb,lockbytes,gauge,"Number of bytes taken up by replicated lock key-values (shared and exclusive strength, not intent strength) +Shown as byte" +cockroachdb,lockcount,gauge,"Count of replicated locks (shared, exclusive, and intent strength)" +cockroachdb,log.buffered.messages.dropped.count,count,"Count of log messages that are dropped by buffered log sinks. When CRDB attempts to buffer a log message in a buffered log sink whose buffer is already full, it drops the oldest buffered messages to make space for the new message" +cockroachdb,log.fluent.sink.conn.errors.count,count,Number of connection errors experienced by fluent-server logging sinks +cockroachdb,log.messages.count,count,Count of messages logged on the node since startup. Note that this does not measure the fan-out of single log messages to the various configured logging sinks. +cockroachdb,node_id,gauge,[OpenMetrics v1 & v2] node ID with labels for advertised RPC and HTTP addresses +cockroachdb,physical_replication.admit_latency.bucket,count,"Event admission latency: a difference between event MVCC timestamp and the time it was admitted into ingestion processor +Shown as nanosecond" +cockroachdb,physical_replication.admit_latency.count,count,"Event admission latency: a difference between event MVCC timestamp and the time it was admitted into ingestion processor +Shown as nanosecond" +cockroachdb,physical_replication.admit_latency.sum,count,"Event admission latency: a difference between event MVCC timestamp and the time it was admitted into ingestion processor +Shown as nanosecond" +cockroachdb,physical_replication.commit_latency.bucket,count,"Event commit latency: a difference between event MVCC timestamp and the time it was flushed into disk. If we batch events, then the difference between the oldest event in the batch and flush is recorded +Shown as nanosecond" +cockroachdb,physical_replication.commit_latency.count,count,"Event commit latency: a difference between event MVCC timestamp and the time it was flushed into disk. If we batch events, then the difference between the oldest event in the batch and flush is recorded +Shown as nanosecond" +cockroachdb,physical_replication.commit_latency.sum,count,"Event commit latency: a difference between event MVCC timestamp and the time it was flushed into disk. If we batch events, then the difference between the oldest event in the batch and flush is recorded +Shown as nanosecond" +cockroachdb,physical_replication.cutover_progress,gauge,The number of ranges left to revert in order to complete an inflight cutover +cockroachdb,physical_replication.distsql_replan_count.count,count,Total number of dist sql replanning events +cockroachdb,physical_replication.earliest_data_checkpoint_span,gauge,The earliest timestamp of the last checkpoint forwarded by an ingestion data processor +cockroachdb,physical_replication.events_ingested.count,count,Events ingested by all replication jobs +cockroachdb,physical_replication.flush_hist_nanos.bucket,count,"Time spent flushing messages across all replication streams +Shown as nanosecond" +cockroachdb,physical_replication.flush_hist_nanos.count,count,"Time spent flushing messages across all replication streams +Shown as nanosecond" +cockroachdb,physical_replication.flush_hist_nanos.sum,count,"Time spent flushing messages across all replication streams +Shown as nanosecond" +cockroachdb,physical_replication.flushes.count,count,Total flushes across all replication jobs +cockroachdb,physical_replication.job_progress_updates.count,count,Total number of updates to the ingestion job progress +cockroachdb,physical_replication.latest_data_checkpoint_span,gauge,The latest timestamp of the last checkpoint forwarded by an ingestion data processor +cockroachdb,physical_replication.logical_bytes.count,count,"Logical bytes (sum of keys + values) ingested by all replication jobs +Shown as byte" +cockroachdb,physical_replication.replicated_time_seconds,gauge,"The replicated time of the physical replication stream in seconds since the unix epoch. +Shown as second" +cockroachdb,physical_replication.resolved_events_ingested.count,count,Resolved events ingested by all replication jobs +cockroachdb,physical_replication.running,gauge,Number of currently running replication streams +cockroachdb,physical_replication.sst_bytes.count,count,"SST bytes (compressed) sent to KV by all replication jobs +Shown as byte" +cockroachdb,queue.consistency.pending,gauge,[OpenMetrics v1 & v2] Number of pending replicas in the consistency checker queue +cockroachdb,queue.consistency.process.failure,count,[OpenMetrics v1] Number of replicas which failed processing in the consistency checker queue +cockroachdb,queue.consistency.process.failure.count,count,[OpenMetrics v2] Number of replicas which failed processing in the consistency checker queue +cockroachdb,queue.consistency.process.success,count,[OpenMetrics v1] Number of replicas successfully processed by the consistency checker queue +cockroachdb,queue.consistency.process.success.count,count,[OpenMetrics v2] Number of replicas successfully processed by the consistency checker queue +cockroachdb,queue.consistency.processingnanos,count,"[OpenMetrics v1] Nanoseconds spent processing replicas in the consistency checker queue +Shown as nanosecond" +cockroachdb,queue.consistency.processingnanos.count,count,"[OpenMetrics v2] Nanoseconds spent processing replicas in the consistency checker queue +Shown as nanosecond" +cockroachdb,queue.gc.info.abortspanconsidered,count,[OpenMetrics v1] Number of AbortSpan entries old enough to be considered for removal +cockroachdb,queue.gc.info.abortspanconsidered.count,count,[OpenMetrics v2] Number of AbortSpan entries old enough to be considered for removal +cockroachdb,queue.gc.info.abortspangcnum,count,[OpenMetrics v1] Number of AbortSpan entries fit for removal +cockroachdb,queue.gc.info.abortspangcnum.count,count,[OpenMetrics v2] Number of AbortSpan entries fit for removal +cockroachdb,queue.gc.info.abortspanscanned,count,"[OpenMetrics v1] Number of transactions present in the AbortSpan scanned from the engine +Shown as transaction" +cockroachdb,queue.gc.info.abortspanscanned.count,count,"[OpenMetrics v2] Number of transactions present in the AbortSpan scanned from the engine +Shown as transaction" +cockroachdb,queue.gc.info.clearrangefailed.count,count,Number of failed ClearRange operations during GC +cockroachdb,queue.gc.info.clearrangesuccess.count,count,Number of successful ClearRange operations during GC +cockroachdb,queue.gc.info.enqueuehighpriority.count,count,Number of replicas enqueued for GC with high priority +cockroachdb,queue.gc.info.intentsconsidered,count,[OpenMetrics v1] Number of ‘old’ intents +cockroachdb,queue.gc.info.intentsconsidered.count,count,[OpenMetrics v2] Number of ‘old’ intents +cockroachdb,queue.gc.info.intenttxns,count,"[OpenMetrics v1] Number of associated distinct transactions +Shown as transaction" +cockroachdb,queue.gc.info.intenttxns.count,count,"[OpenMetrics v2] Number of associated distinct transactions +Shown as transaction" +cockroachdb,queue.gc.info.numkeysaffected,count,"[OpenMetrics v1] Number of keys with GC’able data +Shown as key" +cockroachdb,queue.gc.info.numkeysaffected.count,count,"[OpenMetrics v2] Number of keys with GC’able data +Shown as key" +cockroachdb,queue.gc.info.numrangekeysaffected.count,count,Number of range keys GC’able +cockroachdb,queue.gc.info.pushtxn,count,[OpenMetrics v1] Number of attempted pushes +cockroachdb,queue.gc.info.pushtxn.count,count,[OpenMetrics v2] Number of attempted pushes +cockroachdb,queue.gc.info.resolvefailed.count,count,Number of cleanup intent failures during GC +cockroachdb,queue.gc.info.resolvesuccess,count,[OpenMetrics v1] Number of successful intent resolutions +cockroachdb,queue.gc.info.resolvesuccess.count,count,[OpenMetrics v2] Number of successful intent resolutions +cockroachdb,queue.gc.info.resolvetotal,count,[OpenMetrics v1] Number of attempted intent resolutions +cockroachdb,queue.gc.info.resolvetotal.count,count,[OpenMetrics v2] Number of attempted intent resolutions +cockroachdb,queue.gc.info.transactionresolvefailed.count,count,Number of intent cleanup failures for local transactions during GC +cockroachdb,queue.gc.info.transactionspangcaborted,count,[OpenMetrics v1] Number of GC’able entries corresponding to aborted txns +cockroachdb,queue.gc.info.transactionspangcaborted.count,count,[OpenMetrics v2] Number of GC’able entries corresponding to aborted txns +cockroachdb,queue.gc.info.transactionspangccommitted,count,[OpenMetrics v1] Number of GC’able entries corresponding to committed txns +cockroachdb,queue.gc.info.transactionspangccommitted.count,count,[OpenMetrics v2] Number of GC’able entries corresponding to committed txns +cockroachdb,queue.gc.info.transactionspangcpending,count,[OpenMetrics v1] Number of GC’able entries corresponding to pending txns +cockroachdb,queue.gc.info.transactionspangcpending.count,count,[OpenMetrics v2] Number of GC’able entries corresponding to pending txns +cockroachdb,queue.gc.info.transactionspangcstaging.count,count,Number of GC’able entries corresponding to staging txns +cockroachdb,queue.gc.info.transactionspanscanned,count,[OpenMetrics v1] Number of entries in transaction spans scanned from the engine +cockroachdb,queue.gc.info.transactionspanscanned.count,count,[OpenMetrics v2] Number of entries in transaction spans scanned from the engine +cockroachdb,queue.gc.pending,gauge,[OpenMetrics v1 & v2] Number of pending replicas in the GC queue +cockroachdb,queue.gc.process.failure,count,[OpenMetrics v1] Number of replicas which failed processing in the GC queue +cockroachdb,queue.gc.process.failure.count,count,[OpenMetrics v2] Number of replicas which failed processing in the GC queue +cockroachdb,queue.gc.process.success,count,[OpenMetrics v1] Number of replicas successfully processed by the GC queue +cockroachdb,queue.gc.process.success.count,count,[OpenMetrics v2] Number of replicas successfully processed by the GC queue +cockroachdb,queue.gc.processingnanos,count,"[OpenMetrics v1] Nanoseconds spent processing replicas in the GC queue +Shown as nanosecond" +cockroachdb,queue.gc.processingnanos.count,count,"[OpenMetrics v2] Nanoseconds spent processing replicas in the GC queue +Shown as nanosecond" +cockroachdb,queue.merge.pending,gauge,Number of pending replicas in the merge queue +cockroachdb,queue.merge.process.failure.count,count,Number of replicas which failed processing in the merge queue +cockroachdb,queue.merge.process.success.count,count,Number of replicas successfully processed by the merge queue +cockroachdb,queue.merge.processingnanos.count,count,"Nanoseconds spent processing replicas in the merge queue +Shown as nanosecond" +cockroachdb,queue.merge.purgatory,gauge,"Number of replicas in the merge queue’s purgatory, waiting to become mergeable" +cockroachdb,queue.raftlog.pending,gauge,[OpenMetrics v1 & v2] Number of pending replicas in the Raft log queue +cockroachdb,queue.raftlog.process.failure,count,[OpenMetrics v1] Number of replicas which failed processing in the Raft log queue +cockroachdb,queue.raftlog.process.failure.count,count,[OpenMetrics v2] Number of replicas which failed processing in the Raft log queue +cockroachdb,queue.raftlog.process.success,count,[OpenMetrics v1] Number of replicas successfully processed by the Raft log queue +cockroachdb,queue.raftlog.process.success.count,count,[OpenMetrics v2] Number of replicas successfully processed by the Raft log queue +cockroachdb,queue.raftlog.processingnanos,count,"[OpenMetrics v1] Nanoseconds spent processing replicas in the Raft log queue +Shown as nanosecond" +cockroachdb,queue.raftlog.processingnanos.count,count,"[OpenMetrics v2] Nanoseconds spent processing replicas in the Raft log queue +Shown as nanosecond" +cockroachdb,queue.raftsnapshot.pending,gauge,[OpenMetrics v1 & v2] Number of pending replicas in the Raft repair queue +cockroachdb,queue.raftsnapshot.process.failure,count,[OpenMetrics v1] Number of replicas which failed processing in the Raft repair queue +cockroachdb,queue.raftsnapshot.process.failure.count,count,[OpenMetrics v2] Number of replicas which failed processing in the Raft repair queue +cockroachdb,queue.raftsnapshot.process.success,count,[OpenMetrics v1] Number of replicas successfully processed by the Raft repair queue +cockroachdb,queue.raftsnapshot.process.success.count,count,[OpenMetrics v2] Number of replicas successfully processed by the Raft repair queue +cockroachdb,queue.raftsnapshot.processingnanos,count,"[OpenMetrics v1] Nanoseconds spent processing replicas in the Raft repair queue +Shown as nanosecond" +cockroachdb,queue.raftsnapshot.processingnanos.count,count,"[OpenMetrics v2] Nanoseconds spent processing replicas in the Raft repair queue +Shown as nanosecond" +cockroachdb,queue.replicagc.pending,gauge,[OpenMetrics v1 & v2] Number of pending replicas in the replica GC queue +cockroachdb,queue.replicagc.process.failure,count,[OpenMetrics v1] Number of replicas which failed processing in the replica GC queue +cockroachdb,queue.replicagc.process.failure.count,count,[OpenMetrics v2] Number of replicas which failed processing in the replica GC queue +cockroachdb,queue.replicagc.process.success,count,[OpenMetrics v1] Number of replicas successfully processed by the replica GC queue +cockroachdb,queue.replicagc.process.success.count,count,[OpenMetrics v2] Number of replicas successfully processed by the replica GC queue +cockroachdb,queue.replicagc.processingnanos,count,"[OpenMetrics v1] Nanoseconds spent processing replicas in the replica GC queue +Shown as nanosecond" +cockroachdb,queue.replicagc.processingnanos.count,count,"[OpenMetrics v2] Nanoseconds spent processing replicas in the replica GC queue +Shown as nanosecond" +cockroachdb,queue.replicagc.removereplica,count,[OpenMetrics v1] Number of replica removals attempted by the replica gc queue +cockroachdb,queue.replicagc.removereplica.count,count,[OpenMetrics v2] Number of replica removals attempted by the replica gc queue +cockroachdb,queue.replicate.addnonvoterreplica.count,count,Number of non-voter replica additions attempted by the replicate queue +cockroachdb,queue.replicate.addreplica,count,[OpenMetrics v1] Number of replica additions attempted by the replicate queue +cockroachdb,queue.replicate.addreplica.count,count,[OpenMetrics v2] Number of replica additions attempted by the replicate queue +cockroachdb,queue.replicate.addreplica.error.count,count,Number of failed replica additions processed by the replicate queue +cockroachdb,queue.replicate.addreplica.success.count,count,Number of successful replica additions processed by the replicate queue +cockroachdb,queue.replicate.addvoterreplica.count,count,Number of voter replica additions attempted by the replicate queue +cockroachdb,queue.replicate.nonvoterpromotions.count,count,Number of non-voters promoted to voters by the replicate queue +cockroachdb,queue.replicate.pending,gauge,[OpenMetrics v1 & v2] Number of pending replicas in the replicate queue +cockroachdb,queue.replicate.process.failure,count,[OpenMetrics v1] Number of replicas which failed processing in the replicate queue +cockroachdb,queue.replicate.process.failure.count,count,[OpenMetrics v2] Number of replicas which failed processing in the replicate queue +cockroachdb,queue.replicate.process.success,count,[OpenMetrics v1] Number of replicas successfully processed by the replicate queue +cockroachdb,queue.replicate.process.success.count,count,[OpenMetrics v2] Number of replicas successfully processed by the replicate queue +cockroachdb,queue.replicate.processingnanos,count,"[OpenMetrics v1] Nanoseconds spent processing replicas in the replicate queue +Shown as nanosecond" +cockroachdb,queue.replicate.processingnanos.count,count,"[OpenMetrics v2] Nanoseconds spent processing replicas in the replicate queue +Shown as nanosecond" +cockroachdb,queue.replicate.purgatory,gauge,"[OpenMetrics v1 & v2] Number of replicas in the replicate queue’s purgatory, awaiting allocation options" +cockroachdb,queue.replicate.rebalancenonvoterreplica.count,count,Number of non-voter replica rebalancer-initiated additions attempted by the replicate queue +cockroachdb,queue.replicate.rebalancereplica,count,[OpenMetrics v1] Number of replica rebalancer-initiated additions attempted by the replicate queue +cockroachdb,queue.replicate.rebalancereplica.count,count,[OpenMetrics v2] Number of replica rebalancer-initiated additions attempted by the replicate queue +cockroachdb,queue.replicate.rebalancevoterreplica.count,count,Number of voter replica rebalancer-initiated additions attempted by the replicate queue +cockroachdb,queue.replicate.removedeadnonvoterreplica.count,count,Number of dead non-voter replica removals attempted by the replicate queue (typically in response to a node outage) +cockroachdb,queue.replicate.removedeadreplica,count,[OpenMetrics v1] Number of dead replica removals attempted by the replicate queue (typically in response to a node outage) +cockroachdb,queue.replicate.removedeadreplica.count,count,[OpenMetrics v2] Number of dead replica removals attempted by the replicate queue (typically in response to a node outage) +cockroachdb,queue.replicate.removedeadreplica.error.count,count,Number of failed dead replica removals processed by the replicate queue +cockroachdb,queue.replicate.removedeadreplica.success.count,count,Number of successful dead replica removals processed by the replicate queue +cockroachdb,queue.replicate.removedeadvoterreplica.count,count,Number of dead voter replica removals attempted by the replicate queue (typically in response to a node outage) +cockroachdb,queue.replicate.removedecommissioningnonvoterreplica.count,count,Number of decommissioning non-voter replica removals attempted by the replicate queue (typically in response to a node outage) +cockroachdb,queue.replicate.removedecommissioningreplica.count,count,Number of decommissioning replica removals attempted by the replicate queue (typically in response to a node outage) +cockroachdb,queue.replicate.removedecommissioningreplica.error.count,count,Number of failed decommissioning replica removals processed by the replicate queue +cockroachdb,queue.replicate.removedecommissioningreplica.success.count,count,Number of successful decommissioning replica removals processed by the replicate queue +cockroachdb,queue.replicate.removedecommissioningvoterreplica.count,count,Number of decommissioning voter replica removals attempted by the replicate queue (typically in response to a node outage) +cockroachdb,queue.replicate.removelearnerreplica.count,count,Number of learner replica removals attempted by the replicate queue (typically due to internal race conditions) +cockroachdb,queue.replicate.removenonvoterreplica.count,count,Number of non-voter replica removals attempted by the replicate queue (typically in response to a rebalancer-initiated addition) +cockroachdb,queue.replicate.removereplica,count,[OpenMetrics v1] Number of replica removals attempted by the replicate queue (typically in response to a rebalancer-initiated addition) +cockroachdb,queue.replicate.removereplica.count,count,[OpenMetrics v2] Number of replica removals attempted by the replicate queue (typically in response to a rebalancer-initiated addition) +cockroachdb,queue.replicate.removereplica.error.count,count,Number of failed replica removals processed by the replicate queue +cockroachdb,queue.replicate.removereplica.success.count,count,Number of successful replica removals processed by the replicate queue +cockroachdb,queue.replicate.removevoterreplica.count,count,Number of voter replica removals attempted by the replicate queue (typically in response to a rebalancer-initiated addition) +cockroachdb,queue.replicate.replacedeadreplica.error.count,count,Number of failed dead replica replacements processed by the replicate queue +cockroachdb,queue.replicate.replacedeadreplica.success.count,count,Number of successful dead replica replacements processed by the replicate queue +cockroachdb,queue.replicate.replacedecommissioningreplica.error.count,count,Number of failed decommissioning replica replica replacements processed by the replicate queue +cockroachdb,queue.replicate.replacedecommissioningreplica.success.count,count,Number of successful decommissioning replica replacements processed by the replicate queue +cockroachdb,queue.replicate.transferlease,count,[OpenMetrics v1] Number of range lease transfers attempted by the replicate queue +cockroachdb,queue.replicate.transferlease.count,count,[OpenMetrics v2] Number of range lease transfers attempted by the replicate queue +cockroachdb,queue.replicate.voterdemotions.count,count,Number of voters demoted to non-voters by the replicate queue +cockroachdb,queue.split.load_based.count,count,Number of range splits due to a range being greater than the configured max range load +cockroachdb,queue.split.pending,gauge,[OpenMetrics v1 & v2] Number of pending replicas in the split queue +cockroachdb,queue.split.process.failure,count,[OpenMetrics v1] Number of replicas which failed processing in the split queue +cockroachdb,queue.split.process.failure.count,count,[OpenMetrics v2] Number of replicas which failed processing in the split queue +cockroachdb,queue.split.process.success,count,[OpenMetrics v1] Number of replicas successfully processed by the split queue +cockroachdb,queue.split.process.success.count,count,[OpenMetrics v2] Number of replicas successfully processed by the split queue +cockroachdb,queue.split.processingnanos,count,"[OpenMetrics v1] Nanoseconds spent processing replicas in the split queue +Shown as nanosecond" +cockroachdb,queue.split.processingnanos.count,count,"[OpenMetrics v2] Nanoseconds spent processing replicas in the split queue +Shown as nanosecond" +cockroachdb,queue.split.purgatory,gauge,"Number of replicas in the split queue’s purgatory, waiting to become splittable" +cockroachdb,queue.split.size_based.count,count,Number of range splits due to a range being greater than the configured max range size +cockroachdb,queue.split.span_config_based.count,count,Number of range splits due to span configuration +cockroachdb,queue.tsmaintenance.pending,gauge,[OpenMetrics v1 & v2] Number of pending replicas in the timeseries maintenance queue +cockroachdb,queue.tsmaintenance.process.failure,count,[OpenMetrics v1] Number of replicas which failed processing in the timeseries maintenance queue +cockroachdb,queue.tsmaintenance.process.failure.count,count,[OpenMetrics v2] Number of replicas which failed processing in the timeseries maintenance queue +cockroachdb,queue.tsmaintenance.process.success,count,[OpenMetrics v1] Number of replicas successfully processed by the timeseries maintenance queue +cockroachdb,queue.tsmaintenance.process.success.count,count,[OpenMetrics v2] Number of replicas successfully processed by the timeseries maintenance queue +cockroachdb,queue.tsmaintenance.processingnanos,count,"[OpenMetrics v1] Nanoseconds spent processing replicas in the timeseries maintenance queue +Shown as nanosecond" +cockroachdb,queue.tsmaintenance.processingnanos.count,count,"[OpenMetrics v2] Nanoseconds spent processing replicas in the timeseries maintenance queue +Shown as nanosecond" +cockroachdb,raft.commands.proposed.count,count,Number of Raft commands proposed.The number of proposals and all kinds of reproposals made by leaseholders. Thismetric approximates the number of commands submitted through Raft. +cockroachdb,raft.commands.reproposed.new_lai.count,count,"Number of Raft commands re-proposed with a newer LAI.The number of Raft commands that leaseholders re-proposed with a modified LAI.Such re-proposals happen for commands that are committed to Raft out of intendedorder, and hence can not be applied as is." +cockroachdb,raft.commands.reproposed.unchanged.count,count,"Number of Raft commands re-proposed without modification.The number of Raft commands that leaseholders re-proposed without modification.Such re-proposals happen for commands that are not committed/applied within atimeout, and have a high chance of being dropped." +cockroachdb,raft.commandsapplied,count,"[OpenMetrics v1] Count of Raft commands applied +Shown as command" +cockroachdb,raft.commandsapplied.count,count,"[OpenMetrics v2] Count of Raft commands applied +Shown as command" +cockroachdb,raft.dropped.count,count,"Number of Raft proposals dropped (this counts individial raftpb.Entry, not raftpb.MsgProp)" +cockroachdb,raft.dropped_leader.count,count,"Number of Raft proposals dropped by a Replica that believes itself to be the leader; each update also increments raft.dropped (this counts individial raftpb.Entry, not raftpb.MsgProp)" +cockroachdb,raft.enqueued.pending,gauge,[OpenMetrics v1 & v2] Number of pending outgoing messages in the Raft Transport queue +cockroachdb,raft.entrycache.accesses.count,count,Number of cache lookups in the Raft entry cache +cockroachdb,raft.entrycache.bytes,gauge,"Aggregate size of all Raft entries in the Raft entry cache +Shown as byte" +cockroachdb,raft.entrycache.hits.count,count,Number of successful cache lookups in the Raft entry cache +cockroachdb,raft.entrycache.read_bytes.count,count,"Counter of bytes in entries returned from the Raft entry cache +Shown as byte" +cockroachdb,raft.entrycache.size,gauge,Number of Raft entries in the Raft entry cache +cockroachdb,raft.heartbeats.pending,gauge,[OpenMetrics v1 & v2] Number of pending heartbeats and responses waiting to be coalesced +cockroachdb,raft.process.applycommitted.latency.bucket,count,"Latency histogram for applying all committed Raft commands in a Raft ready.This measures the end-to-end latency of applying all commands in a Raft ready. Note thatthis closes over possibly multiple measurements of the ‘raft.process.commandcommit.latency’metric, which receives datapoints for each sub-batch processed in the process. +Shown as nanosecond" +cockroachdb,raft.process.applycommitted.latency.count,count,"Latency histogram for applying all committed Raft commands in a Raft ready.This measures the end-to-end latency of applying all commands in a Raft ready. Note thatthis closes over possibly multiple measurements of the ‘raft.process.commandcommit.latency’metric, which receives datapoints for each sub-batch processed in the process. +Shown as nanosecond" +cockroachdb,raft.process.applycommitted.latency.sum,count,"Latency histogram for applying all committed Raft commands in a Raft ready.This measures the end-to-end latency of applying all commands in a Raft ready. Note thatthis closes over possibly multiple measurements of the ‘raft.process.commandcommit.latency’metric, which receives datapoints for each sub-batch processed in the process. +Shown as nanosecond" +cockroachdb,raft.process.commandcommit.latency,gauge,"[OpenMetrics v1] Latency histogram in nanoseconds for committing Raft commands +Shown as nanosecond" +cockroachdb,raft.process.commandcommit.latency.bucket,count,"[OpenMetrics v2] Latency histogram in nanoseconds for committing Raft commands +Shown as nanosecond" +cockroachdb,raft.process.commandcommit.latency.count,count,"[OpenMetrics v2] Latency histogram in nanoseconds for committing Raft commands +Shown as nanosecond" +cockroachdb,raft.process.commandcommit.latency.sum,count,"[OpenMetrics v2] Latency histogram in nanoseconds for committing Raft commands +Shown as nanosecond" +cockroachdb,raft.process.handleready.latency.bucket,count,"Latency histogram for handling a Raft ready.This measures the end-to-end-latency of the Raft state advancement loop, including:- snapshot application- SST ingestion- durably appending to the Raft log (i.e. includes fsync)- entry application (incl. replicated side effects, notably log truncation). +Shown as nanosecond" +cockroachdb,raft.process.handleready.latency.count,count,"Latency histogram for handling a Raft ready.This measures the end-to-end-latency of the Raft state advancement loop, including:- snapshot application- SST ingestion- durably appending to the Raft log (i.e. includes fsync)- entry application (incl. replicated side effects, notably log truncation). +Shown as nanosecond" +cockroachdb,raft.process.handleready.latency.sum,count,"Latency histogram for handling a Raft ready.This measures the end-to-end-latency of the Raft state advancement loop, including:- snapshot application- SST ingestion- durably appending to the Raft log (i.e. includes fsync)- entry application (incl. replicated side effects, notably log truncation). +Shown as nanosecond" +cockroachdb,raft.process.logcommit.latency,gauge,"[OpenMetrics v1] Latency histogram in nanoseconds for committing Raft log entries +Shown as nanosecond" +cockroachdb,raft.process.logcommit.latency.bucket,count,"[OpenMetrics v2] Latency histogram in nanoseconds for committing Raft log entries +Shown as nanosecond" +cockroachdb,raft.process.logcommit.latency.count,count,"[OpenMetrics v2] Latency histogram in nanoseconds for committing Raft log entries +Shown as nanosecond" +cockroachdb,raft.process.logcommit.latency.sum,count,"[OpenMetrics v2] Latency histogram in nanoseconds for committing Raft log entries +Shown as nanosecond" +cockroachdb,raft.process.tickingnanos,count,"[OpenMetrics v1] Nanoseconds spent in store.processRaft() processing replica.Tick() +Shown as nanosecond" +cockroachdb,raft.process.tickingnanos.count,count,"[OpenMetrics v2] Nanoseconds spent in store.processRaft() processing replica.Tick() +Shown as nanosecond" +cockroachdb,raft.process.workingnanos,count,"[OpenMetrics v1] Nanoseconds spent in store.processRaft() working +Shown as nanosecond" +cockroachdb,raft.process.workingnanos.count,count,"[OpenMetrics v2] Nanoseconds spent in store.processRaft() working +Shown as nanosecond" +cockroachdb,raft.quota_pool.percent_used.bucket,count,Histogram of proposal quota pool utilization (0-100) per leaseholder per metrics interval +cockroachdb,raft.quota_pool.percent_used.count,count,Histogram of proposal quota pool utilization (0-100) per leaseholder per metrics interval +cockroachdb,raft.quota_pool.percent_used.sum,count,Histogram of proposal quota pool utilization (0-100) per leaseholder per metrics interval +cockroachdb,raft.rcvd.app,count,[OpenMetrics v1] Number of MsgApp messages received by this store +cockroachdb,raft.rcvd.app.count,count,[OpenMetrics v2] Number of MsgApp messages received by this store +cockroachdb,raft.rcvd.appresp,count,[OpenMetrics v1] Number of MsgAppResp messages received by this store +cockroachdb,raft.rcvd.appresp.count,count,[OpenMetrics v2] Number of MsgAppResp messages received by this store +cockroachdb,raft.rcvd.bytes.count,count,"Number of bytes in Raft messages received by this store. Note that this does not include raft snapshot received. +Shown as byte" +cockroachdb,raft.rcvd.cross_region.bytes.count,count,"Number of bytes received by this store for cross region Raft messages (when region tiers are configured). Note that this does not include raft snapshot received. +Shown as byte" +cockroachdb,raft.rcvd.cross_zone.bytes.count,count,"Number of bytes received by this store for cross zone, same region Raft messages (when region and zone tiers are configured). If region tiers are not configured, this count may include data sent between different regions. +Shown as byte" +cockroachdb,raft.rcvd.dropped,count,[OpenMetrics v1] Number of dropped incoming Raft messages +cockroachdb,raft.rcvd.dropped.count,count,[OpenMetrics v2] Number of dropped incoming Raft messages +cockroachdb,raft.rcvd.dropped_bytes.count,count,"Bytes of dropped incoming Raft messages +Shown as byte" +cockroachdb,raft.rcvd.heartbeat,count,"[OpenMetrics v1] Number of (coalesced, if enabled) MsgHeartbeat messages received by this store" +cockroachdb,raft.rcvd.heartbeat.count,count,"[OpenMetrics v2] Number of (coalesced, if enabled) MsgHeartbeat messages received by this store" +cockroachdb,raft.rcvd.heartbeatresp,count,"[OpenMetrics v1] Number of (coalesced, if enabled) MsgHeartbeatResp messages received by this store" +cockroachdb,raft.rcvd.heartbeatresp.count,count,"[OpenMetrics v2] Number of (coalesced, if enabled) MsgHeartbeatResp messages received by this store" +cockroachdb,raft.rcvd.prevote,count,[OpenMetrics v1] Number of MsgPreVote messages received by this store +cockroachdb,raft.rcvd.prevote.count,count,[OpenMetrics v2] Number of MsgPreVote messages received by this store +cockroachdb,raft.rcvd.prevoteresp,count,[OpenMetrics v1] Number of MsgPreVoteResp messages received by this store +cockroachdb,raft.rcvd.prevoteresp.count,count,[OpenMetrics v2] Number of MsgPreVoteResp messages received by this store +cockroachdb,raft.rcvd.prop,count,[OpenMetrics v1] Number of MsgProp messages received by this store +cockroachdb,raft.rcvd.prop.count,count,[OpenMetrics v2] Number of MsgProp messages received by this store +cockroachdb,raft.rcvd.queued_bytes,gauge,"Number of bytes in messages currently waiting for raft processing +Shown as byte" +cockroachdb,raft.rcvd.snap,count,[OpenMetrics v1] Number of MsgSnap messages received by this store +cockroachdb,raft.rcvd.snap.count,count,[OpenMetrics v2] Number of MsgSnap messages received by this store +cockroachdb,raft.rcvd.stepped_bytes.count,count,"Number of bytes in messages processed by Raft.Messages reflected here have been handed to Raft (via RawNode.Step). This does not imply that themessages are no longer held in memory or that IO has been performed. +Shown as byte" +cockroachdb,raft.rcvd.timeoutnow,count,[OpenMetrics v1] Number of MsgTimeoutNow messages received by this store +cockroachdb,raft.rcvd.timeoutnow.count,count,[OpenMetrics v2] Number of MsgTimeoutNow messages received by this store +cockroachdb,raft.rcvd.transferleader,count,[OpenMetrics v1] Number of MsgTransferLeader messages received by this store +cockroachdb,raft.rcvd.transferleader.count,count,[OpenMetrics v2] Number of MsgTransferLeader messages received by this store +cockroachdb,raft.rcvd.vote,count,[OpenMetrics v1] Number of MsgVote messages received by this store +cockroachdb,raft.rcvd.vote.count,count,[OpenMetrics v2] Number of MsgVote messages received by this store +cockroachdb,raft.rcvd.voteresp,count,[OpenMetrics v1] Number of MsgVoteResp messages received by this store +cockroachdb,raft.rcvd.voteresp.count,count,[OpenMetrics v2] Number of MsgVoteResp messages received by this store +cockroachdb,raft.replication.latency.bucket,count,"The duration elapsed between having evaluated a BatchRequest and it beingreflected in the proposer’s state machine (i.e. having applied fully).This encompasses time spent in the quota pool, in replication (includingreproposals), and application, but notably not sequencing latency (i.e.contention and latch acquisition)." +cockroachdb,raft.replication.latency.count,count,"The duration elapsed between having evaluated a BatchRequest and it beingreflected in the proposer’s state machine (i.e. having applied fully).This encompasses time spent in the quota pool, in replication (includingreproposals), and application, but notably not sequencing latency (i.e.contention and latch acquisition)." +cockroachdb,raft.replication.latency.sum,count,"The duration elapsed between having evaluated a BatchRequest and it beingreflected in the proposer’s state machine (i.e. having applied fully).This encompasses time spent in the quota pool, in replication (includingreproposals), and application, but notably not sequencing latency (i.e.contention and latch acquisition)." +cockroachdb,raft.scheduler.latency,count,"Queueing durations for ranges waiting to be processed by the Raft scheduler.This histogram measures the delay from when a range is registered with the schedulerfor processing to when it is actually processed. This does not include the durationof processing. +Shown as nanosecond" +cockroachdb,raft.scheduler.latency.bucket,count,"Queueing durations for ranges waiting to be processed by the Raft scheduler.This histogram measures the delay from when a range is registered with the schedulerfor processing to when it is actually processed. This does not include the durationof processing. +Shown as nanosecond" +cockroachdb,raft.scheduler.latency.count,count,"Queueing durations for ranges waiting to be processed by the Raft scheduler.This histogram measures the delay from when a range is registered with the schedulerfor processing to when it is actually processed. This does not include the durationof processing. +Shown as nanosecond" +cockroachdb,raft.scheduler.latency.sum,count,"Queueing durations for ranges waiting to be processed by the Raft scheduler.This histogram measures the delay from when a range is registered with the schedulerfor processing to when it is actually processed. This does not include the durationof processing. +Shown as nanosecond" +cockroachdb,raft.sent.bytes.count,count,"Number of bytes in Raft messages sent by this store. Note that this does not include raft snapshot sent. +Shown as byte" +cockroachdb,raft.sent.cross_region.bytes.count,count,"Number of bytes sent by this store for cross region Raft messages (when region tiers are configured). Note that this does not include raft snapshot sent. +Shown as byte" +cockroachdb,raft.sent.cross_zone.bytes.count,count,"Number of bytes sent by this store for cross zone, same region Raft messages (when region and zone tiers are configured). If region tiers are not configured, this count may include data sent between different regions. To ensure accurate monitoring of transmitted data, it is important to set up a consistent locality configuration across nodes. Note that this does not include raft snapshot sent. +Shown as byte" +cockroachdb,raft.storage.read_bytes.count,count,"Counter of raftpb.Entry.Size() read from pebble for raft log entries.These are the bytes returned from the (raft.Storage).Entries method that were notreturned via the raft entry cache. This metric plus the raft.entrycache.read_bytesmetric represent the total bytes returned from the Entries method. +Shown as byte" +cockroachdb,raft.ticks,count,[OpenMetrics v1] Number of Raft ticks queued +cockroachdb,raft.ticks.count,count,[OpenMetrics v2] Number of Raft ticks queued +cockroachdb,raft.timeoutcampaign.count,count,Number of Raft replicas campaigning after missed heartbeats from leader +cockroachdb,raft.transport.flow_token_dispatches_dropped.count,count,Number of flow token dispatches dropped by the Raft Transport +cockroachdb,raft.transport.rcvd.count,count,Number of Raft messages received by the Raft Transport +cockroachdb,raft.transport.reverse_rcvd.count,count,"Messages received from the reverse direction of a stream.These messages should be rare. They are mostly informational, and are not actualresponses to Raft messages. Responses are received over another stream." +cockroachdb,raft.transport.reverse_sent.count,count,"Messages sent in the reverse direction of a stream.These messages should be rare. They are mostly informational, and are not actualresponses to Raft messages. Responses are sent over another stream." +cockroachdb,raft.transport.send_queue_bytes,gauge,"The total byte size of pending outgoing messages in the queue.The queue is composed of multiple bounded channels associated with differentpeers. A size higher than the average baseline could indicate issues streamingmessages to at least one peer. Use this metric together with send-queue-size, tohave a fuller picture. +Shown as byte" +cockroachdb,raft.transport.send_queue_size,gauge,Number of pending outgoing messages in the Raft Transport queue.The queue is composed of multiple bounded channels associated with differentpeers. The overall size of tens of thousands could indicate issues streamingmessages to at least one peer. Use this metric in conjunction withsend-queue-bytes. +cockroachdb,raft.transport.sends_dropped.count,count,Number of Raft message sends dropped by the Raft Transport +cockroachdb,raft.transport.sent.count,count,Number of Raft messages sent by the Raft Transport +cockroachdb,raftlog.behind,gauge,"[OpenMetrics v1 & v2] Number of Raft log entries followers on other stores are behind +Shown as entry" +cockroachdb,raftlog.truncated,count,"[OpenMetrics v1] Number of Raft log entries truncated +Shown as entry" +cockroachdb,raftlog.truncated.count,count,"[OpenMetrics v2] Number of Raft log entries truncated +Shown as entry" +cockroachdb,range.adds,count,[OpenMetrics v1] Number of range additions +cockroachdb,range.adds.count,count,[OpenMetrics v2] Number of range additions +cockroachdb,range.merges.count,count,Number of range merges +cockroachdb,range.raftleaderremovals.count,count,Number of times the current Raft leader was removed from a range +cockroachdb,range.raftleadertransfers,count,[OpenMetrics v1] Number of raft leader transfers +cockroachdb,range.raftleadertransfers.count,count,[OpenMetrics v2] Number of raft leader transfers +cockroachdb,range.recoveries.count,count,Count of offline loss of quorum recovery operations performed on ranges.This count increments for every range recovered in offline loss of quorumrecovery operation. Metric is updated when node on which survivor replicais located starts following the recovery. +cockroachdb,range.removes,count,[OpenMetrics v1] Number of range removals +cockroachdb,range.removes.count,count,[OpenMetrics v2] Number of range removals +cockroachdb,range.snapshots.applied_initial.count,count,Number of snapshots applied for initial upreplication +cockroachdb,range.snapshots.applied_non_voter.count,count,Number of snapshots applied by non-voter replicas +cockroachdb,range.snapshots.applied_voter.count,count,Number of snapshots applied by voter replicas +cockroachdb,range.snapshots.cross_region.rcvd_bytes.count,count,"Number of snapshot bytes received cross region +Shown as byte" +cockroachdb,range.snapshots.cross_region.sent_bytes.count,count,"Number of snapshot bytes sent cross region +Shown as byte" +cockroachdb,range.snapshots.cross_zone.rcvd_bytes.count,count,"Number of snapshot bytes received cross zone within same region or if region tiers are not configured. This count increases for each snapshot received between different zones within the same region. However, if the region tiers are not configured, this count may also include snapshot data received between different regions. +Shown as byte" +cockroachdb,range.snapshots.cross_zone.sent_bytes.count,count,"Number of snapshot bytes sent cross zone within same region or if region tiers are not configured. This count increases for each snapshot sent between different zones within the same region. However, if the region tiers are not configured, this count may also include snapshot data sent between different regions. +Shown as byte" +cockroachdb,range.snapshots.delegate.failures.count,count,"Number of snapshots that were delegated to a different node andresulted in failure on that delegate. There are numerous reasons a failure canoccur on a delegate such as timeout, the delegate Raft log being too far behindor the delegate being too busy to send." +cockroachdb,range.snapshots.delegate.in_progress,gauge,Number of delegated snapshots that are currently in-flight. +cockroachdb,range.snapshots.delegate.sent_bytes.count,count,"Bytes sent using a delegate.The number of bytes sent as a result of a delegate snapshot requestthat was originated from a different node. This metric is useful inevaluating the network savings of not sending cross region traffic. +Shown as byte" +cockroachdb,range.snapshots.delegate.successes.count,count,Number of snapshots that were delegated to a different node andresulted in success on that delegate. This does not count self delegated snapshots. +cockroachdb,range.snapshots.generated,count,[OpenMetrics v1] Number of generated snapshots +cockroachdb,range.snapshots.generated.count,count,[OpenMetrics v2] Number of generated snapshots +cockroachdb,range.snapshots.normal_applied,count,[OpenMetrics v1] Number of applied snapshots +cockroachdb,range.snapshots.normal_applied.count,count,[OpenMetrics v2] Number of applied snapshots +cockroachdb,range.snapshots.preemptive_applied,count,[OpenMetrics v1] Number of applied pre-emptive snapshots +cockroachdb,range.snapshots.preemptive_applied.count,count,[OpenMetrics v2] Number of applied pre-emptive snapshots +cockroachdb,range.snapshots.rcvd_bytes.count,count,"Number of snapshot bytes received +Shown as byte" +cockroachdb,range.snapshots.rebalancing.rcvd_bytes.count,count,"Number of rebalancing snapshot bytes received +Shown as byte" +cockroachdb,range.snapshots.rebalancing.sent_bytes.count,count,"Number of rebalancing snapshot bytes sent +Shown as byte" +cockroachdb,range.snapshots.recovery.rcvd_bytes.count,count,"Number of recovery snapshot bytes received +Shown as byte" +cockroachdb,range.snapshots.recovery.sent_bytes.count,count,"Number of recovery snapshot bytes sent +Shown as byte" +cockroachdb,range.snapshots.recv_failed.count,count,"Number of range snapshot initialization messages that errored out on the recipient, typically before any data is transferred" +cockroachdb,range.snapshots.recv_in_progress,gauge,Number of non-empty snapshots being received +cockroachdb,range.snapshots.recv_queue,gauge,Number of snapshots queued to receive +cockroachdb,range.snapshots.recv_queue_bytes,gauge,"Total size of all snapshots in the snapshot receive queue +Shown as byte" +cockroachdb,range.snapshots.recv_total_in_progress,gauge,Number of total snapshots being received +cockroachdb,range.snapshots.recv_unusable.count,count,Number of range snapshot that were fully transmitted but determined to be unnecessary or unusable +cockroachdb,range.snapshots.send_in_progress,gauge,Number of non-empty snapshots being sent +cockroachdb,range.snapshots.send_queue,gauge,Number of snapshots queued to send +cockroachdb,range.snapshots.send_queue_bytes,gauge,"Total size of all snapshots in the snapshot send queue +Shown as byte" +cockroachdb,range.snapshots.send_total_in_progress,gauge,Number of total snapshots being sent +cockroachdb,range.snapshots.sent_bytes.count,count,"Number of snapshot bytes sent +Shown as byte" +cockroachdb,range.snapshots.unknown.rcvd_bytes.count,count,"Number of unknown snapshot bytes received +Shown as byte" +cockroachdb,range.snapshots.unknown.sent_bytes.count,count,"Number of unknown snapshot bytes sent +Shown as byte" +cockroachdb,range.splits.count,count,Number of range splits +cockroachdb,range.splits.total,count,[OpenMetrics v1] Number of range splits +cockroachdb,range.splits.total.count,count,[OpenMetrics v2] Number of range splits +cockroachdb,rangekeybytes,gauge,"Number of bytes taken up by range keys (e.g. MVCC range tombstones) +Shown as byte" +cockroachdb,rangekeycount,gauge,Count of all range keys (e.g. MVCC range tombstones) +cockroachdb,ranges,gauge,[OpenMetrics v1 & v2] Number of ranges +cockroachdb,ranges.overreplicated,gauge,[OpenMetrics v1 & v2] Number of ranges with more live replicas than the replication target +cockroachdb,ranges.unavailable,gauge,[OpenMetrics v1 & v2] Number of ranges with fewer live replicas than needed for quorum +cockroachdb,ranges.underreplicated,gauge,[OpenMetrics v1 & v2] Number of ranges with fewer live replicas than the replication target +cockroachdb,rangevalbytes,gauge,"Number of bytes taken up by range key values (e.g. MVCC range tombstones) +Shown as byte" +cockroachdb,rangevalcount,gauge,Count of all range key values (e.g. MVCC range tombstones) +cockroachdb,rebalancing.cpunanospersecond,gauge,"Average CPU nanoseconds spent on processing replica operations in the last 30 minutes. +Shown as nanosecond" +cockroachdb,rebalancing.lease.transfers.count,count,Number of lease transfers motivated by store-level load imbalances +cockroachdb,rebalancing.queriespersecond,gauge,"Number of kv-level requests received per second by the store, averaged over a large time period as used in rebalancing decisions" +cockroachdb,rebalancing.range.rebalances.count,count,Number of range rebalance operations motivated by store-level load imbalances +cockroachdb,rebalancing.readbytespersecond,gauge,"Number of bytes read recently per second, considering the last 30 minutes. +Shown as byte" +cockroachdb,rebalancing.readspersecond,gauge,"Number of keys read recently per second, considering the last 30 minutes." +cockroachdb,rebalancing.replicas.cpunanospersecond.bucket,count,"Histogram of average CPU nanoseconds spent on processing replica operations in the last 30 minutes. +Shown as nanosecond" +cockroachdb,rebalancing.replicas.cpunanospersecond.count,count,"Histogram of average CPU nanoseconds spent on processing replica operations in the last 30 minutes. +Shown as nanosecond" +cockroachdb,rebalancing.replicas.cpunanospersecond.sum,count,"Histogram of average CPU nanoseconds spent on processing replica operations in the last 30 minutes. +Shown as nanosecond" +cockroachdb,rebalancing.replicas.queriespersecond.bucket,count,Histogram of average kv-level requests received per second by replicas on the store in the last 30 minutes. +cockroachdb,rebalancing.replicas.queriespersecond.count,count,Histogram of average kv-level requests received per second by replicas on the store in the last 30 minutes. +cockroachdb,rebalancing.replicas.queriespersecond.sum,count,Histogram of average kv-level requests received per second by replicas on the store in the last 30 minutes. +cockroachdb,rebalancing.requestspersecond,gauge,"Number of requests received recently per second, considering the last 30 minutes." +cockroachdb,rebalancing.state.imbalanced_overfull_options_exhausted.count,count,Number of occurrences where this store was overfull but failed to shed load after exhausting available rebalance options +cockroachdb,rebalancing.writebytespersecond,gauge,"Number of bytes written recently per second, considering the last 30 minutes. +Shown as byte" +cockroachdb,rebalancing.writespersecond,gauge,"[OpenMetrics v1 & v2] Number of keys written (i.e. applied by raft) per second to the store, averaged over a large time period as used in rebalancing decisions +Shown as key" +cockroachdb,replicas,gauge,Number of replicas +cockroachdb,replicas.commandqueue.combinedqueuesize,gauge,"[OpenMetrics v1 & v2] Number of commands in all CommandQueues combined +Shown as command" +cockroachdb,replicas.commandqueue.combinedreadcount,gauge,"[OpenMetrics v1 & v2] Number of read-only commands in all CommandQueues combined +Shown as command" +cockroachdb,replicas.commandqueue.combinedwritecount,gauge,"[OpenMetrics v1 & v2] Number of read-write commands in all CommandQueues combined +Shown as command" +cockroachdb,replicas.commandqueue.maxoverlaps,gauge,"[OpenMetrics v1 & v2] Largest number of overlapping commands seen when adding to any CommandQueue +Shown as command" +cockroachdb,replicas.commandqueue.maxreadcount,gauge,"[OpenMetrics v1 & v2] Largest number of read-only commands in any CommandQueue +Shown as command" +cockroachdb,replicas.commandqueue.maxsize,gauge,"[OpenMetrics v1 & v2] Largest number of commands in any CommandQueue +Shown as command" +cockroachdb,replicas.commandqueue.maxtreesize,gauge,[OpenMetrics v1 & v2] Largest number of intervals in any CommandQueue’s interval tree +cockroachdb,replicas.commandqueue.maxwritecount,gauge,"[OpenMetrics v1 & v2] Largest number of read-write commands in any CommandQueue +Shown as command" +cockroachdb,replicas.leaders,gauge,[OpenMetrics v1 & v2] Number of raft leaders +cockroachdb,replicas.leaders.not_leaseholders,gauge,[OpenMetrics v1 & v2] Number of replicas that are Raft leaders whose range lease is held by another store +cockroachdb,replicas.leaders_invalid_lease,gauge,Number of replicas that are Raft leaders whose lease is invalid +cockroachdb,replicas.leaders_not_leaseholders,gauge,Number of replicas that are Raft leaders whose range lease is held by another store +cockroachdb,replicas.leaseholders,gauge,[OpenMetrics v1 & v2] Number of lease holders +cockroachdb,replicas.quiescent,gauge,[OpenMetrics v1 & v2] Number of quiesced replicas +cockroachdb,replicas.reserved,gauge,[OpenMetrics v1 & v2] Number of replicas reserved for snapshots +cockroachdb,replicas.total,gauge,[OpenMetrics v1 & v2] Number of replicas +cockroachdb,replicas.uninitialized,gauge,"Number of uninitialized replicas, this does not include uninitialized replicas that can lie dormant in a persistent state." +cockroachdb,requests.backpressure.split,gauge,[OpenMetrics v1 & v2] Number of backpressured writes waiting on a Range split +cockroachdb,requests.slow.commandqueue,gauge,"[OpenMetrics v1 & v2] Number of requests that have been stuck for a long time in the command queue +Shown as request" +cockroachdb,requests.slow.distsender,gauge,"[OpenMetrics v1 & v2] Number of requests that have been stuck for a long time in the dist sender +Shown as request" +cockroachdb,requests.slow.latch,gauge,Number of requests that have been stuck for a long time acquiring latches.Latches moderate access to the KV keyspace for the purpose of evaluating andreplicating commands. A slow latch acquisition attempt is often caused byanother request holding and not releasing its latches in a timely manner. +cockroachdb,requests.slow.lease,gauge,"[OpenMetrics v1 & v2] Number of requests that have been stuck for a long time acquiring a lease +Shown as request" +cockroachdb,requests.slow.raft,gauge,"[OpenMetrics v1 & v2] Number of requests that have been stuck for a long time in raft +Shown as request" +cockroachdb,rocksdb.block.cache.hits,gauge,[OpenMetrics v1 & v2] Count of block cache hits +cockroachdb,rocksdb.block.cache.misses,gauge,[OpenMetrics v1 & v2] Count of block cache misses +cockroachdb,rocksdb.block.cache.pinned.usage,gauge,"[OpenMetrics v1 & v2] Bytes pinned by the block cache +Shown as byte" +cockroachdb,rocksdb.block.cache.usage,gauge,"[OpenMetrics v1 & v2] Bytes used by the block cache +Shown as byte" +cockroachdb,rocksdb.bloom.filter.prefix.checked,gauge,Number of times the bloom filter was checked +cockroachdb,rocksdb.bloom.filter.prefix.useful,gauge,Number of times the bloom filter helped avoid iterator creation +cockroachdb,rocksdb.bloom_filter.prefix.checked,gauge,[OpenMetrics v1 & v2] Number of times the bloom filter was checked +cockroachdb,rocksdb.bloom_filter.prefix.useful,gauge,[OpenMetrics v1 & v2] Number of times the bloom filter helped avoid iterator creation +cockroachdb,rocksdb.compacted_bytes_read,gauge,"Bytes read during compaction +Shown as byte" +cockroachdb,rocksdb.compacted_bytes_written,gauge,"Bytes written during compaction +Shown as byte" +cockroachdb,rocksdb.compactions,gauge,Number of table compactions +cockroachdb,rocksdb.compactions.total,gauge,[OpenMetrics v1 & v2] Number of table compactions +cockroachdb,rocksdb.encryption.algorithm,gauge,"Algorithm in use for encryption-at-rest, see ccl/storageccl/engineccl/enginepbccl/key_registry.proto" +cockroachdb,rocksdb.estimated_pending_compaction,gauge,"Estimated pending compaction bytes +Shown as byte" +cockroachdb,rocksdb.flushed_bytes,gauge,"Bytes written during flush +Shown as byte" +cockroachdb,rocksdb.flushes,gauge,Number of table flushes +cockroachdb,rocksdb.flushes.total,gauge,[OpenMetrics v1 & v2] Number of table flushes +cockroachdb,rocksdb.ingested_bytes,gauge,"Bytes ingested +Shown as byte" +cockroachdb,rocksdb.memtable.total.size,gauge,"[OpenMetrics v1 & v2] Current size of memtable in bytes +Shown as byte" +cockroachdb,rocksdb.num_sstables,gauge,"[OpenMetrics v1 & v2] Number of rocksdb SSTables +Shown as table" +cockroachdb,rocksdb.read.amplification,gauge,"[OpenMetrics v1 & v2] Number of disk reads per query +Shown as read" +cockroachdb,rocksdb.table.readers.mem.estimate,gauge,[OpenMetrics v1 & v2] Memory used by index and filter blocks +cockroachdb,round_trip.latency,gauge,"[OpenMetrics v1] Distribution of round-trip latencies with other nodes in nanoseconds +Shown as nanosecond" +cockroachdb,round_trip.latency.bucket,count,"[OpenMetrics v2] Distribution of round-trip latencies with other nodes in nanoseconds +Shown as nanosecond" +cockroachdb,round_trip.latency.count,count,"[OpenMetrics v2] Distribution of round-trip latencies with other nodes in nanoseconds +Shown as nanosecond" +cockroachdb,round_trip.latency.sum,count,"[OpenMetrics v2] Distribution of round-trip latencies with other nodes in nanoseconds +Shown as nanosecond" +cockroachdb,round_trip_latency.bucket,count,"Distribution of round-trip latencies with other nodes.This only reflects successful heartbeats and measures gRPC overhead as well aspossible head-of-line blocking. Elevated values in this metric may hint atnetwork issues and/or saturation, but they are no proof of them. CPU overloadcan similarly elevate this metric +Shown as nanosecond" +cockroachdb,round_trip_latency.count,count,"Distribution of round-trip latencies with other nodes.This only reflects successful heartbeats and measures gRPC overhead as well aspossible head-of-line blocking. Elevated values in this metric may hint atnetwork issues and/or saturation, but they are no proof of them. CPU overloadcan similarly elevate this metric. +Shown as nanosecond" +cockroachdb,round_trip_latency.sum,count,"Distribution of round-trip latencies with other nodes.This only reflects successful heartbeats and measures gRPC overhead as well aspossible head-of-line blocking. Elevated values in this metric may hint atnetwork issues and/or saturation, but they are no proof of them. CPU overloadcan similarly elevate this metric. +Shown as nanosecond" +cockroachdb,rpc.batches.recv.count,count,Number of batches processed +cockroachdb,rpc.connection.avg_round_trip_latency,gauge,"Sum of exponentially weighted moving average of round-trip latencies, as measured through a gRPC RPC.Dividing this Gauge by rpc.connection.healthy gives an approximation of averagelatency, but the top-level round-trip-latency histogram is more useful. Instead,users should consult the label families of this metric if they are available. +Shown as nanosecond" +cockroachdb,rpc.connection.failures.count,count,Counter of failed connections.This includes both the event in which a healthy connection terminates as well asunsuccessful reconnection attempts.Connections that are terminated as part of local node shutdown are excluded.Decommissioned peers are excluded. +cockroachdb,rpc.connection.healthy,gauge,Gauge of current connections in a healthy state (i.e. bidirectionally connected and heartbeating) +cockroachdb,rpc.connection.healthy_nanos,gauge,"Gauge of nanoseconds of healthy connection timeOn the prometheus endpoint scraped with the cluster setting ‘server.child_metrics.enabled’ set,the constituent parts of this metric are available on a per-peer basis and one can read offfor how long a given peer has been connected +Shown as nanosecond" +cockroachdb,rpc.connection.heartbeats.count,count,Counter of successful heartbeats. +cockroachdb,rpc.connection.inactive,gauge,"Gauge of current connections in an inactive state and pending deletion; these are not healthy but are not tracked as unhealthy either because there is reason to believe that the connection is no longer relevant,for example if the node has since been seen under a new address" +cockroachdb,rpc.connection.unhealthy,gauge,Gauge of current connections in an unhealthy state (not bidirectionally connected or heartbeating) +cockroachdb,rpc.connection.unhealthy_nanos,gauge,"Gauge of nanoseconds of unhealthy connection time.On the prometheus endpoint scraped with the cluster setting ‘server.child_metrics.enabled’ set,the constituent parts of this metric are available on a per-peer basis and one can read offfor how long a given peer has been unreachable +Shown as nanosecond" +cockroachdb,rpc.method.addsstable.recv.count,count,Number of AddSSTable requests processed +cockroachdb,rpc.method.adminchangereplicas.recv.count,count,Number of AdminChangeReplicas requests processed +cockroachdb,rpc.method.adminmerge.recv.count,count,Number of AdminMerge requests processed +cockroachdb,rpc.method.adminrelocaterange.recv.count,count,Number of AdminRelocateRange requests processed +cockroachdb,rpc.method.adminscatter.recv.count,count,Number of AdminScatter requests processed +cockroachdb,rpc.method.adminsplit.recv.count,count,Number of AdminSplit requests processed +cockroachdb,rpc.method.admintransferlease.recv.count,count,Number of AdminTransferLease requests processed +cockroachdb,rpc.method.adminunsplit.recv.count,count,Number of AdminUnsplit requests processed +cockroachdb,rpc.method.adminverifyprotectedtimestamp.recv.count,count,Number of AdminVerifyProtectedTimestamp requests processed +cockroachdb,rpc.method.barrier.recv.count,count,Number of Barrier requests processed +cockroachdb,rpc.method.checkconsistency.recv.count,count,Number of CheckConsistency requests processed +cockroachdb,rpc.method.clearrange.recv.count,count,Number of ClearRange requests processed +cockroachdb,rpc.method.computechecksum.recv.count,count,Number of ComputeChecksum requests processed +cockroachdb,rpc.method.conditionalput.recv.count,count,Number of ConditionalPut requests processed +cockroachdb,rpc.method.delete.recv.count,count,Number of Delete requests processed +cockroachdb,rpc.method.deleterange.recv.count,count,Number of DeleteRange requests processed +cockroachdb,rpc.method.endtxn.recv.count,count,Number of EndTxn requests processed +cockroachdb,rpc.method.export.recv.count,count,Number of Export requests processed +cockroachdb,rpc.method.gc.recv.count,count,Number of GC requests processed +cockroachdb,rpc.method.get.recv.count,count,Number of Get requests processed +cockroachdb,rpc.method.heartbeattxn.recv.count,count,Number of HeartbeatTxn requests processed +cockroachdb,rpc.method.increment.recv.count,count,Number of Increment requests processed +cockroachdb,rpc.method.initput.recv.count,count,Number of InitPut requests processed +cockroachdb,rpc.method.isspanempty.recv.count,count,Number of IsSpanEmpty requests processed +cockroachdb,rpc.method.leaseinfo.recv.count,count,Number of LeaseInfo requests processed +cockroachdb,rpc.method.merge.recv.count,count,Number of Merge requests processed +cockroachdb,rpc.method.migrate.recv.count,count,Number of Migrate requests processed +cockroachdb,rpc.method.probe.recv.count,count,Number of Probe requests processed +cockroachdb,rpc.method.pushtxn.recv.count,count,Number of PushTxn requests processed +cockroachdb,rpc.method.put.recv.count,count,Number of Put requests processed +cockroachdb,rpc.method.queryintent.recv.count,count,Number of QueryIntent requests processed +cockroachdb,rpc.method.querylocks.recv.count,count,Number of QueryLocks requests processed +cockroachdb,rpc.method.queryresolvedtimestamp.recv.count,count,Number of QueryResolvedTimestamp requests processed +cockroachdb,rpc.method.querytxn.recv.count,count,Number of QueryTxn requests processed +cockroachdb,rpc.method.rangestats.recv.count,count,Number of RangeStats requests processed +cockroachdb,rpc.method.recomputestats.recv.count,count,Number of RecomputeStats requests processed +cockroachdb,rpc.method.recovertxn.recv.count,count,Number of RecoverTxn requests processed +cockroachdb,rpc.method.refresh.recv.count,count,Number of Refresh requests processed +cockroachdb,rpc.method.refreshrange.recv.count,count,Number of RefreshRange requests processed +cockroachdb,rpc.method.requestlease.recv.count,count,Number of RequestLease requests processed +cockroachdb,rpc.method.resolveintent.recv.count,count,Number of ResolveIntent requests processed +cockroachdb,rpc.method.resolveintentrange.recv.count,count,Number of ResolveIntentRange requests processed +cockroachdb,rpc.method.reversescan.recv.count,count,Number of ReverseScan requests processed +cockroachdb,rpc.method.revertrange.recv.count,count,Number of RevertRange requests processed +cockroachdb,rpc.method.scan.recv.count,count,Number of Scan requests processed +cockroachdb,rpc.method.subsume.recv.count,count,Number of Subsume requests processed +cockroachdb,rpc.method.transferlease.recv.count,count,Number of TransferLease requests processed +cockroachdb,rpc.method.truncatelog.recv.count,count,Number of TruncateLog requests processed +cockroachdb,rpc.method.writebatch.recv.count,count,Number of WriteBatch requests processed +cockroachdb,rpc.streams.mux_rangefeed.active,gauge,Number of currently running MuxRangeFeed streams +cockroachdb,rpc.streams.mux_rangefeed.recv.count,count,Total number of MuxRangeFeed streams +cockroachdb,rpc.streams.rangefeed.active,gauge,Number of currently running RangeFeed streams +cockroachdb,rpc.streams.rangefeed.recv.count,count,Total number of RangeFeed streams +cockroachdb,schedules.BACKUP.failed.count,count,Number of BACKUP jobs failed +cockroachdb,schedules.BACKUP.last_completed_time,gauge,The unix timestamp of the most recently completed backup by a schedule specified as maintaining this metric +cockroachdb,schedules.BACKUP.protected_age_sec,gauge,"The age of the oldest PTS record protected by BACKUP schedules +Shown as second" +cockroachdb,schedules.BACKUP.protected_record_count,gauge,Number of PTS records held by BACKUP schedules +cockroachdb,schedules.BACKUP.started.count,count,Number of BACKUP jobs started +cockroachdb,schedules.BACKUP.succeeded.count,count,Number of BACKUP jobs succeeded +cockroachdb,schedules.CHANGEFEED.failed.count,count,Number of CHANGEFEED jobs failed +cockroachdb,schedules.CHANGEFEED.started.count,count,Number of CHANGEFEED jobs started +cockroachdb,schedules.CHANGEFEED.succeeded.count,count,Number of CHANGEFEED jobs succeeded +cockroachdb,schedules.backup.failed,count,[OpenMetrics v1] Number of scheduled backup jobs failed +cockroachdb,schedules.backup.failed.count,count,[OpenMetrics v2] Number of scheduled backup jobs failed +cockroachdb,schedules.backup.last_completed_time,gauge,"[OpenMetrics v1 & v2] The unix timestamp of the most recently completed backup by a schedule specified as maintaining this metric +Shown as second" +cockroachdb,schedules.backup.started,count,[OpenMetrics v1] Number of scheduled backup jobs started +cockroachdb,schedules.backup.started.count,count,[OpenMetrics v2] Number of scheduled backup jobs started +cockroachdb,schedules.backup.succeeded,count,[OpenMetrics v1] Number of scheduled backup jobs succeeded +cockroachdb,schedules.backup.succeeded.count,count,[OpenMetrics v2] Number of scheduled backup jobs succeeded +cockroachdb,schedules.error,gauge,Number of schedules which did not execute successfully +cockroachdb,schedules.malformed,gauge,Number of malformed schedules +cockroachdb,schedules.round.jobs_started,gauge,The number of jobs started +cockroachdb,schedules.round.reschedule_skip,gauge,The number of schedules rescheduled due to SKIP policy +cockroachdb,schedules.round.reschedule_wait,gauge,The number of schedules rescheduled due to WAIT policy +cockroachdb,schedules.scheduled.row.level.ttl.executor_failed.count,count,Number of scheduled-row-level-ttl-executor jobs failed +cockroachdb,schedules.scheduled_row_level_ttl_executor.failed.count,count,Number of scheduled-row-level-ttl-executor jobs failed +cockroachdb,schedules.scheduled_row_level_ttl_executor.started.count,count,Number of scheduled-row-level-ttl-executor jobs started +cockroachdb,schedules.scheduled_row_level_ttl_executor.succeeded.count,count,Number of scheduled-row-level-ttl-executor jobs succeeded +cockroachdb,schedules.scheduled_schema_telemetry_executor.failed.count,count,Number of scheduled-schema-telemetry-executor jobs failed +cockroachdb,schedules.scheduled_schema_telemetry_executor.started.count,count,Number of scheduled-schema-telemetry-executor jobs started +cockroachdb,schedules.scheduled_schema_telemetry_executor.succeeded.count,count,Number of scheduled-schema-telemetry-executor jobs succeeded +cockroachdb,schedules.scheduled_sql_stats_compaction_executor.failed.count,count,Number of scheduled-sql-stats-compaction-executor jobs failed +cockroachdb,schedules.scheduled_sql_stats_compaction_executor.started.count,count,Number of scheduled-sql-stats-compaction-executor jobs started +cockroachdb,schedules.scheduled_sql_stats_compaction_executor.succeeded.count,count,Number of scheduled-sql-stats-compaction-executor jobs succeeded +cockroachdb,seconds.until.enterprise.license.expiry,gauge,Seconds until enterprise license expiry (0 if no license present or running without enterprise features) +cockroachdb,seconds_until_enterprise_license_expiry,gauge,"Seconds until enterprise license expiry (0 if no license present or running without enterprise features) +Shown as second" +cockroachdb,security.certificate.expiration.ca,gauge,Expiration for the CA certificate. 0 means no certificate or error. +cockroachdb,security.certificate.expiration.ca_client_tenant,gauge,Expiration for the Tenant Client CA certificate. 0 means no certificate or error. +cockroachdb,security.certificate.expiration.client,gauge,"Minimum expiration for client certificates, labeled by SQL user. 0 means no certificate or error." +cockroachdb,security.certificate.expiration.client_ca,gauge,Expiration for the client CA certificate. 0 means no certificate or error. +cockroachdb,security.certificate.expiration.client_tenant,gauge,Expiration for the Tenant Client certificate. 0 means no certificate or error. +cockroachdb,security.certificate.expiration.node,gauge,Expiration for the node certificate. 0 means no certificate or error. +cockroachdb,security.certificate.expiration.node_client,gauge,Expiration for the node’s client certificate. 0 means no certificate or error. +cockroachdb,security.certificate.expiration.ui,gauge,Expiration for the UI certificate. 0 means no certificate or error. +cockroachdb,security.certificate.expiration.ui_ca,gauge,Expiration for the UI CA certificate. 0 means no certificate or error. +cockroachdb,security.certificate_expiration.ca,gauge,Expiration for the CA certificate. 0 means no certificate or error +cockroachdb,security.certificate_expiration.client_ca,gauge,Expiration for the client CA certificate. 0 means no certificate or error +cockroachdb,security.certificate_expiration.node,gauge,Expiration for the node certificate. 0 means no certificate or error +cockroachdb,security.certificate_expiration.node_client,gauge,Expiration for the node’s client certificate. 0 means no certificate or error +cockroachdb,security.certificate_expiration.ui,gauge,Expiration for the UI certificate. 0 means no certificate or error +cockroachdb,security.certificate_expiration.ui_ca,gauge,Expiration for the UI CA certificate. 0 means no certificate or error +cockroachdb,spanconfig.kvsubscriber.oldest_protected_record_nanos,gauge,"Difference between the current time and the oldest protected timestamp (sudden drops indicate a record being released; an ever increasing number indicates that the oldest record is around and preventing GC if > configured GC TTL) +Shown as nanosecond" +cockroachdb,spanconfig.kvsubscriber.protected_record_count,gauge,"Number of protected timestamp records, as seen by KV" +cockroachdb,spanconfig.kvsubscriber.update_behind_nanos,gauge,"Difference between the current time and when the KVSubscriber received its last update (an ever increasing number indicates that we’re no longer receiving updates) +Shown as nanosecond" +cockroachdb,sql.bytesin,count,"[OpenMetrics v1] Number of sql bytes received +Shown as byte" +cockroachdb,sql.bytesin.count,count,"[OpenMetrics v2] Number of sql bytes received +Shown as byte" +cockroachdb,sql.bytesout,count,"[OpenMetrics v1] Number of sql bytes sent +Shown as byte" +cockroachdb,sql.bytesout.count,count,"[OpenMetrics v2] Number of sql bytes sent +Shown as byte" +cockroachdb,sql.conn.failures.count,count,Number of SQL connection failures +cockroachdb,sql.conn.latency,gauge,"[OpenMetrics v1] Latency to establish and authenticate a SQL connection +Shown as nanosecond" +cockroachdb,sql.conn.latency.bucket,count,"[OpenMetrics v2] Latency to establish and authenticate a SQL connection +Shown as nanosecond" +cockroachdb,sql.conn.latency.count,count,"[OpenMetrics v2] Latency to establish and authenticate a SQL connection +Shown as nanosecond" +cockroachdb,sql.conn.latency.sum,count,"[OpenMetrics v2] Latency to establish and authenticate a SQL connection +Shown as nanosecond" +cockroachdb,sql.conns,gauge,"[OpenMetrics v1 & v2] Number of active sql connections +Shown as connection" +cockroachdb,sql.conns_waiting_to_hash,gauge,Number of SQL connection attempts that are being throttled in order to limit password hashing concurrency +cockroachdb,sql.contention.resolver.failed_resolutions.count,count,Number of failed transaction ID resolution attempts +cockroachdb,sql.contention.resolver.queue_size,gauge,Length of queued unresolved contention events +cockroachdb,sql.contention.resolver.retries.count,count,Number of times transaction id resolution has been retried +cockroachdb,sql.contention.txn_id_cache.miss.count,count,Number of cache misses +cockroachdb,sql.contention.txn_id_cache.read.count,count,Number of cache read +cockroachdb,sql.copy.count,count,Number of COPY SQL statements successfully executed +cockroachdb,sql.copy.internal.count,count,Number of COPY SQL statements successfully executed (internal queries) +cockroachdb,sql.copy.nonatomic.count,count,Number of non-atomic COPY SQL statements successfully executed +cockroachdb,sql.copy.nonatomic.internal.count,count,Number of non-atomic COPY SQL statements successfully executed (internal queries) +cockroachdb,sql.copy.nonatomic.started.count,count,Number of non-atomic COPY SQL statements started +cockroachdb,sql.copy.nonatomic.started.internal.count,count,Number of non-atomic COPY SQL statements started (internal queries) +cockroachdb,sql.copy.started.count,count,Number of COPY SQL statements started +cockroachdb,sql.copy.started.internal.count,count,Number of COPY SQL statements started (internal queries) +cockroachdb,sql.ddl.count,count,[OpenMetrics v1 & v2] Number of SQL DDL statements +cockroachdb,sql.ddl.internal.count,count,Number of SQL DDL statements successfully executed (internal queries) +cockroachdb,sql.ddl.started.count,count,Number of SQL DDL statements started +cockroachdb,sql.ddl.started.internal.count,count,Number of SQL DDL statements started (internal queries) +cockroachdb,sql.delete.count,count,[OpenMetrics v1 & v2] Number of SQL DELETE statements +cockroachdb,sql.delete.internal.count,count,Number of SQL DELETE statements successfully executed (internal queries) +cockroachdb,sql.delete.started.count,count,Number of SQL DELETE statements started +cockroachdb,sql.delete.started.internal.count,count,Number of SQL DELETE statements started (internal queries) +cockroachdb,sql.disk.distsql.current,gauge,"Current sql statement disk usage for distsql +Shown as byte" +cockroachdb,sql.disk.distsql.max.bucket,count,"Disk usage per sql statement for distsql +Shown as byte" +cockroachdb,sql.disk.distsql.max.count,count,"Disk usage per sql statement for distsql +Shown as byte" +cockroachdb,sql.disk.distsql.max.sum,count,"Disk usage per sql statement for distsql +Shown as byte" +cockroachdb,sql.disk.distsql.spilled.bytes.read.count,count,"Number of bytes read from temporary disk storage as a result of spilling +Shown as byte" +cockroachdb,sql.disk.distsql.spilled.bytes.written.count,count,"Number of bytes written to temporary disk storage as a result of spilling +Shown as byte" +cockroachdb,sql.distsql.contended.queries,count,[OpenMetrics v1] Number of SQL queries that experienced contention +cockroachdb,sql.distsql.contended.queries.count,count,[OpenMetrics v2] Number of SQL queries that experienced contention0 +cockroachdb,sql.distsql.contended_queries.count,count,Number of SQL queries that experienced contention +cockroachdb,sql.distsql.dist_query_rerun_locally.count,count,Total number of cases when distributed query error resulted in a local rerun +cockroachdb,sql.distsql.dist_query_rerun_locally.failure_count.count,count,Total number of cases when the local rerun of a distributed query resulted in an error +cockroachdb,sql.distsql.exec.latency,gauge,"[OpenMetrics v1] Latency in nanoseconds of DistSQL statement execution +Shown as nanosecond" +cockroachdb,sql.distsql.exec.latency.bucket,count,"Latency of DistSQL statement execution +Shown as nanosecond" +cockroachdb,sql.distsql.exec.latency.count,count,"[OpenMetrics v2] Latency in nanoseconds of DistSQL statement execution +Shown as nanosecond" +cockroachdb,sql.distsql.exec.latency.internal.bucket,count,"Latency of DistSQL statement execution (internal queries) +Shown as nanosecond" +cockroachdb,sql.distsql.exec.latency.internal.count,count,"Latency of DistSQL statement execution (internal queries) +Shown as nanosecond" +cockroachdb,sql.distsql.exec.latency.internal.sum,count,"Latency of DistSQL statement execution (internal queries) +Shown as nanosecond" +cockroachdb,sql.distsql.exec.latency.sum,count,"[OpenMetrics v2] Latency in nanoseconds of DistSQL statement execution +Shown as nanosecond" +cockroachdb,sql.distsql.flows.active,gauge,[OpenMetrics v1 & v2] Number of distributed SQL flows currently active +cockroachdb,sql.distsql.flows.count,count,[OpenMetrics v2] Number of distributed SQL flows executed +cockroachdb,sql.distsql.flows.total,count,[OpenMetrics v1] Number of distributed SQL flows executed +cockroachdb,sql.distsql.flows.total.count,count,Number of distributed SQL flows executed +cockroachdb,sql.distsql.queries.active,gauge,[OpenMetrics v1 & v2] Number of distributed SQL queries currently active +cockroachdb,sql.distsql.queries.count,count,[OpenMetrics v2] Number of distributed SQL queries executed +cockroachdb,sql.distsql.queries.spilled.count,count,Number of queries that have spilled to disk +cockroachdb,sql.distsql.queries.total,count,[OpenMetrics v1] Number of distributed SQL queries executed +cockroachdb,sql.distsql.select.count,count,[OpenMetrics v1 & v2] Number of DistSQL SELECT statements +cockroachdb,sql.distsql.select.internal.count,count,Number of DistSQL SELECT statements (internal queries) +cockroachdb,sql.distsql.service.latency,gauge,"[OpenMetrics v1] Latency in nanoseconds of DistSQL request execution +Shown as nanosecond" +cockroachdb,sql.distsql.service.latency.bucket,count,"[OpenMetrics v2] Latency in nanoseconds of DistSQL request execution +Shown as nanosecond" +cockroachdb,sql.distsql.service.latency.count,count,"[OpenMetrics v2] Latency in nanoseconds of DistSQL request execution +Shown as nanosecond" +cockroachdb,sql.distsql.service.latency.internal,count,"Latency of DistSQL request execution (internal queries) +Shown as nanosecond" +cockroachdb,sql.distsql.service.latency.internal.bucket,count,"Latency of DistSQL request execution (internal queries) +Shown as nanosecond" +cockroachdb,sql.distsql.service.latency.internal.count,count,"Latency of DistSQL request execution (internal queries) +Shown as nanosecond" +cockroachdb,sql.distsql.service.latency.internal.sum,count,"Latency of DistSQL request execution (internal queries) +Shown as nanosecond" +cockroachdb,sql.distsql.service.latency.sum,count,"[OpenMetrics v2] Latency in nanoseconds of DistSQL request execution +Shown as nanosecond" +cockroachdb,sql.distsql.vec.openfds,gauge,Current number of open file descriptors used by vectorized external storage +cockroachdb,sql.exec.latency,gauge,"[OpenMetrics v1] Latency in nanoseconds of SQL statement execution +Shown as nanosecond" +cockroachdb,sql.exec.latency.bucket,count,"[OpenMetrics v2] Latency in nanoseconds of SQL statement execution +Shown as nanosecond" +cockroachdb,sql.exec.latency.count,count,"[OpenMetrics v2] Latency in nanoseconds of SQL statement execution +Shown as nanosecond" +cockroachdb,sql.exec.latency.internal,count,"Latency of SQL statement execution (internal queries) +Shown as nanosecond" +cockroachdb,sql.exec.latency.internal.bucket,count,"Latency of SQL statement execution (internal queries) +Shown as nanosecond" +cockroachdb,sql.exec.latency.internal.count,count,"Latency of SQL statement execution (internal queries) +Shown as nanosecond" +cockroachdb,sql.exec.latency.internal.sum,count,"Latency of SQL statement execution (internal queries) +Shown as nanosecond" +cockroachdb,sql.exec.latency.sum,count,"[OpenMetrics v2] Latency in nanoseconds of SQL statement execution +Shown as nanosecond" +cockroachdb,sql.failure,count,[OpenMetrics v1] Number of statements resulting in a planning or runtime error +cockroachdb,sql.failure.count,count,[OpenMetrics v2] Number of statements resulting in a planning or runtime error +cockroachdb,sql.failure.internal.count,count,Number of statements resulting in a planning or runtime error (internal queries) +cockroachdb,sql.feature_flag_denial.count,count,Counter of the number of statements denied by a feature flag +cockroachdb,sql.full.scan,count,[OpenMetrics v1] Number of full table or index scans +cockroachdb,sql.full.scan.count,count,[OpenMetrics v2] Number of full table or index scans +cockroachdb,sql.full.scan.internal.count,count,Number of full table or index scans (internal queries) +cockroachdb,sql.guardrails.full_scan_rejected.count,count,Number of full table or index scans that have been rejected because of disallow_full_table_scans guardrail +cockroachdb,sql.guardrails.full_scan_rejected.internal.count,count,Number of full table or index scans that have been rejected because of disallow_full_table_scans guardrail (internal queries) +cockroachdb,sql.guardrails.max_row_size_err.count,count,Number of rows observed violating sql.guardrails.max_row_size_err +cockroachdb,sql.guardrails.max_row_size_err.internal.count,count,Number of rows observed violating sql.guardrails.max_row_size_err (internal queries) +cockroachdb,sql.guardrails.max_row_size_log.count,count,Number of rows observed violating sql.guardrails.max_row_size_log +cockroachdb,sql.guardrails.max_row_size_log.internal.count,count,Number of rows observed violating sql.guardrails.max_row_size_log (internal queries) +cockroachdb,sql.guardrails.transaction_rows_read_err.count,count,Number of transactions errored because of transaction_rows_read_err guardrail +cockroachdb,sql.guardrails.transaction_rows_read_err.internal.count,count,Number of transactions errored because of transaction_rows_read_err guardrail (internal queries) +cockroachdb,sql.guardrails.transaction_rows_read_log.count,count,Number of transactions logged because of transaction_rows_read_log guardrail +cockroachdb,sql.guardrails.transaction_rows_read_log.internal.count,count,Number of transactions logged because of transaction_rows_read_log guardrail (internal queries) +cockroachdb,sql.guardrails.transaction_rows_written_err.count,count,Number of transactions errored because of transaction_rows_written_err guardrail +cockroachdb,sql.guardrails.transaction_rows_written_err.internal.count,count,Number of transactions errored because of transaction_rows_written_err guardrail (internal queries) +cockroachdb,sql.guardrails.transaction_rows_written_log.count,count,Number of transactions logged because of transaction_rows_written_log guardrail +cockroachdb,sql.guardrails.transaction_rows_written_log.internal.count,count,Number of transactions logged because of transaction_rows_written_log guardrail (internal queries) +cockroachdb,sql.hydrated_schema_cache.hits.count,count,counter on the number of cache hits +cockroachdb,sql.hydrated_schema_cache.misses.count,count,counter on the number of cache misses +cockroachdb,sql.hydrated_table_cache.hits.count,count,counter on the number of cache hits +cockroachdb,sql.hydrated_table_cache.misses.count,count,counter on the number of cache misses +cockroachdb,sql.hydrated_type_cache.hits.count,count,counter on the number of cache hits +cockroachdb,sql.hydrated_type_cache.misses.count,count,counter on the number of cache misses +cockroachdb,sql.hydrated_udf_cache.hits.count,count,counter on the number of cache hits +cockroachdb,sql.hydrated_udf_cache.misses.count,count,counter on the number of cache misses +cockroachdb,sql.insert.count,count,[OpenMetrics v1 & v2] Number of SQL INSERT statements +cockroachdb,sql.insert.internal.count,count,Number of SQL INSERT statements successfully executed (internal queries) +cockroachdb,sql.insert.started.count,count,Number of SQL INSERT statements started +cockroachdb,sql.insert.started.internal.count,count,Number of SQL INSERT statements started (internal queries) +cockroachdb,sql.insights.anomaly_detection.evictions.count,count,Evictions of fingerprint latency summaries due to memory pressure +cockroachdb,sql.insights.anomaly_detection.fingerprints,gauge,Current number of statement fingerprints being monitored for anomaly detection +cockroachdb,sql.insights.anomaly_detection.memory,gauge,"Current memory used to support anomaly detection +Shown as byte" +cockroachdb,sql.leases.active,gauge,The number of outstanding SQL schema leases. +cockroachdb,sql.mem.admin.current,gauge,[OpenMetrics v1 & v2] Current sql statement memory usage for admin +cockroachdb,sql.mem.admin.max,gauge,[OpenMetrics v1] Memory usage per sql statement for admin +cockroachdb,sql.mem.admin.max.bucket,count,[OpenMetrics v2] Memory usage per sql statement for admin +cockroachdb,sql.mem.admin.max.count,count,[OpenMetrics v2] Memory usage per sql statement for admin +cockroachdb,sql.mem.admin.max.sum,count,[OpenMetrics v2] Memory usage per sql statement for admin +cockroachdb,sql.mem.admin.session.current,gauge,[OpenMetrics v1 & v2] Current sql session memory usage for admin +cockroachdb,sql.mem.admin.session.max.bucket,count,[OpenMetrics v2] Memory usage per sql session for admin +cockroachdb,sql.mem.admin.session.max.count,count,[OpenMetrics v2] Memory usage per sql session for admin +cockroachdb,sql.mem.admin.session.max.sum,count,[OpenMetrics v2] Memory usage per sql session for admin +cockroachdb,sql.mem.admin.txn.current,gauge,[OpenMetrics v1 & v2] Current sql transaction memory usage for admin +cockroachdb,sql.mem.admin.txn.max,gauge,[OpenMetrics v1] Memory usage per sql transaction for admin +cockroachdb,sql.mem.admin.txn.max.bucket,count,[OpenMetrics v2] Memory usage per sql transaction for admin +cockroachdb,sql.mem.admin.txn.max.count,count,[OpenMetrics v2] Memory usage per sql transaction for admin +cockroachdb,sql.mem.admin.txn.max.sum,count,[OpenMetrics v2] Memory usage per sql transaction for admin +cockroachdb,sql.mem.bulk.current,gauge,"Current sql statement memory usage for bulk operations +Shown as byte" +cockroachdb,sql.mem.bulk.max,count,"Memory usage per sql statement for bulk operations +Shown as byte" +cockroachdb,sql.mem.bulk.max.bucket,count,"Memory usage per sql statement for bulk operations +Shown as byte" +cockroachdb,sql.mem.bulk.max.count,count,"Memory usage per sql statement for bulk operations +Shown as byte" +cockroachdb,sql.mem.bulk.max.sum,count,"Memory usage per sql statement for bulk operations +Shown as byte" +cockroachdb,sql.mem.client.current,gauge,[OpenMetrics v1 & v2] Current sql statement memory usage for client +cockroachdb,sql.mem.client.max,gauge,[OpenMetrics v1] Memory usage per sql statement for client +cockroachdb,sql.mem.client.max.bucket,count,[OpenMetrics v2] Memory usage per sql statement for client +cockroachdb,sql.mem.client.max.count,count,[OpenMetrics v2] Memory usage per sql statement for client +cockroachdb,sql.mem.client.max.sum,count,[OpenMetrics v2] Memory usage per sql statement for client +cockroachdb,sql.mem.client.session.current,gauge,[OpenMetrics v1 & v2] Current sql session memory usage for client +cockroachdb,sql.mem.client.session.max,gauge,[OpenMetrics v1] Memory usage per sql session for client +cockroachdb,sql.mem.client.session.max.bucket,count,[OpenMetrics v2] Memory usage per sql session for client +cockroachdb,sql.mem.client.session.max.count,count,[OpenMetrics v2] Memory usage per sql session for client +cockroachdb,sql.mem.client.session.max.sum,count,[OpenMetrics v2] Memory usage per sql session for client +cockroachdb,sql.mem.client.txn.current,gauge,[OpenMetrics v1 & v2] Current sql transaction memory usage for client +cockroachdb,sql.mem.client.txn.max,gauge,[OpenMetrics v1] Memory usage per sql transaction for client +cockroachdb,sql.mem.client.txn.max.bucket,count,[OpenMetrics v2] Memory usage per sql transaction for client +cockroachdb,sql.mem.client.txn.max.count,count,[OpenMetrics v2] Memory usage per sql transaction for client +cockroachdb,sql.mem.client.txn.max.sum,count,[OpenMetrics v2] Memory usage per sql transaction for client +cockroachdb,sql.mem.conns.current,gauge,[OpenMetrics v1 & v2] Current sql statement memory usage for conns +cockroachdb,sql.mem.conns.max,gauge,[OpenMetrics v1] Memory usage per sql statement for conns +cockroachdb,sql.mem.conns.max.bucket,count,[OpenMetrics v2] Memory usage per sql statement for conns +cockroachdb,sql.mem.conns.max.count,count,[OpenMetrics v2] Memory usage per sql statement for conns +cockroachdb,sql.mem.conns.max.sum,count,[OpenMetrics v2] Memory usage per sql statement for conns +cockroachdb,sql.mem.conns.session.current,gauge,[OpenMetrics v1 & v2] Current sql session memory usage for conns +cockroachdb,sql.mem.conns.session.max,gauge,[OpenMetrics v1] Memory usage per sql session for conns +cockroachdb,sql.mem.conns.session.max.bucket,count,[OpenMetrics v2] Memory usage per sql session for conns +cockroachdb,sql.mem.conns.session.max.count,count,[OpenMetrics v2] Memory usage per sql session for conns +cockroachdb,sql.mem.conns.session.max.sum,count,[OpenMetrics v2] Memory usage per sql session for conns +cockroachdb,sql.mem.conns.txn.current,gauge,[OpenMetrics v1 & v2] Current sql transaction memory usage for conns +cockroachdb,sql.mem.conns.txn.max,gauge,[OpenMetrics v1] Memory usage per sql transaction for conns +cockroachdb,sql.mem.conns.txn.max.bucket,count,[OpenMetrics v2] Memory usage per sql transaction for conns +cockroachdb,sql.mem.conns.txn.max.count,count,[OpenMetrics v2] Memory usage per sql transaction for conns +cockroachdb,sql.mem.conns.txn.max.sum,count,[OpenMetrics v2] Memory usage per sql transaction for conns +cockroachdb,sql.mem.distsql.current,gauge,[OpenMetrics v1 & v2] Current sql statement memory usage for distsql +cockroachdb,sql.mem.distsql.max,gauge,[OpenMetrics v1] Memory usage per sql statement for distsql +cockroachdb,sql.mem.distsql.max.bucket,count,[OpenMetrics v2] Memory usage per sql statement for distsql +cockroachdb,sql.mem.distsql.max.count,count,[OpenMetrics v2] Memory usage per sql statement for distsql +cockroachdb,sql.mem.distsql.max.sum,count,[OpenMetrics v2] Memory usage per sql statement for distsql +cockroachdb,sql.mem.internal.current,gauge,[OpenMetrics v1 & v2] Current sql statement memory usage for internal +cockroachdb,sql.mem.internal.max,gauge,[OpenMetrics v1] Memory usage per sql statement for internal +cockroachdb,sql.mem.internal.max.bucket,count,[OpenMetrics v2] Memory usage per sql statement for internal +cockroachdb,sql.mem.internal.max.count,count,[OpenMetrics v2] Memory usage per sql statement for internal +cockroachdb,sql.mem.internal.max.sum,count,[OpenMetrics v2] Memory usage per sql statement for internal +cockroachdb,sql.mem.internal.session.current,gauge,[OpenMetrics v1 & v2] Current sql session memory usage for internal +cockroachdb,sql.mem.internal.session.max,gauge,[OpenMetrics v1] Memory usage per sql session for internal +cockroachdb,sql.mem.internal.session.max.bucket,count,[OpenMetrics v2] Memory usage per sql session for internal +cockroachdb,sql.mem.internal.session.max.count,count,[OpenMetrics v2] Memory usage per sql session for internal +cockroachdb,sql.mem.internal.session.max.sum,count,[OpenMetrics v2] Memory usage per sql session for internal +cockroachdb,sql.mem.internal.session.prepared.current,gauge,"Current sql session memory usage by prepared statements for internal +Shown as byte" +cockroachdb,sql.mem.internal.session.prepared.max.bucket,count,"Memory usage by prepared statements per sql session for internal +Shown as byte" +cockroachdb,sql.mem.internal.session.prepared.max.count,count,"Memory usage by prepared statements per sql session for internal +Shown as byte" +cockroachdb,sql.mem.internal.session.prepared.max.sum,count,"Memory usage by prepared statements per sql session for internal +Shown as byte" +cockroachdb,sql.mem.internal.txn.current,gauge,[OpenMetrics v1 & v2] Current sql transaction memory usage for internal +cockroachdb,sql.mem.internal.txn.max,gauge,[OpenMetrics v1] Memory usage per sql transaction for internal +cockroachdb,sql.mem.internal.txn.max.bucket,count,[OpenMetrics v2] Memory usage per sql transaction for internal +cockroachdb,sql.mem.internal.txn.max.count,count,[OpenMetrics v2] Memory usage per sql transaction for internal +cockroachdb,sql.mem.internal.txn.max.sum,count,[OpenMetrics v2] Memory usage per sql transaction for internal +cockroachdb,sql.mem.root.current,gauge,Current sql statement memory usage for root +cockroachdb,sql.mem.root.max.bucket,count,"Memory usage per sql statement for root +Shown as byte" +cockroachdb,sql.mem.root.max.count,count,"Memory usage per sql statement for root +Shown as byte" +cockroachdb,sql.mem.root.max.sum,count,"Memory usage per sql statement for root +Shown as byte" +cockroachdb,sql.mem.sql.current,gauge,"Current sql statement memory usage for sql +Shown as byte" +cockroachdb,sql.mem.sql.max,count,"Memory usage per sql statement for sql +Shown as byte" +cockroachdb,sql.mem.sql.max.bucket,count,"Memory usage per sql statement for sql +Shown as byte" +cockroachdb,sql.mem.sql.max.count,count,"Memory usage per sql statement for sql +Shown as byte" +cockroachdb,sql.mem.sql.max.sum,count,"Memory usage per sql statement for sql +Shown as byte" +cockroachdb,sql.mem.sql.session.current,gauge,"Current sql session memory usage for sql +Shown as byte" +cockroachdb,sql.mem.sql.session.max,count,"Memory usage per sql session for sql +Shown as byte" +cockroachdb,sql.mem.sql.session.max.bucket,count,"Memory usage per sql session for sql +Shown as byte" +cockroachdb,sql.mem.sql.session.max.count,count,"Memory usage per sql session for sql +Shown as byte" +cockroachdb,sql.mem.sql.session.max.sum,count,"Memory usage per sql session for sql +Shown as byte" +cockroachdb,sql.mem.sql.session.prepared.current,gauge,"Current sql session memory usage by prepared statements for sql +Shown as byte" +cockroachdb,sql.mem.sql.session.prepared.max,count,"Memory usage by prepared statements per sql session for sql +Shown as byte" +cockroachdb,sql.mem.sql.session.prepared.max.bucket,count,"Memory usage by prepared statements per sql session for sql +Shown as byte" +cockroachdb,sql.mem.sql.session.prepared.max.count,count,"Memory usage by prepared statements per sql session for sql +Shown as byte" +cockroachdb,sql.mem.sql.session.prepared.max.sum,count,"Memory usage by prepared statements per sql session for sql +Shown as byte" +cockroachdb,sql.mem.sql.txn.current,gauge,"Current sql transaction memory usage for sql +Shown as byte" +cockroachdb,sql.mem.sql.txn.max,count,"Memory usage per sql transaction for sql +Shown as byte" +cockroachdb,sql.mem.sql.txn.max.bucket,count,"Memory usage per sql transaction for sql +Shown as byte" +cockroachdb,sql.mem.sql.txn.max.count,count,"Memory usage per sql transaction for sql +Shown as byte" +cockroachdb,sql.mem.sql.txn.max.sum,count,"Memory usage per sql transaction for sql +Shown as byte" +cockroachdb,sql.misc.count,count,[OpenMetrics v1 & v2] Number of other SQL statements +cockroachdb,sql.misc.internal.count,count,Number of other SQL statements successfully executed (internal queries) +cockroachdb,sql.misc.started.count,count,Number of other SQL statements started +cockroachdb,sql.misc.started.internal.count,count,Number of other SQL statements started (internal queries) +cockroachdb,sql.new_conns.count,count,Counter of the number of sql connections created +cockroachdb,sql.optimizer.fallback.count,count,Number of statements which the cost-based optimizer was unable to plan +cockroachdb,sql.optimizer.fallback.internal.count,count,Number of statements which the cost-based optimizer was unable to plan (internal queries) +cockroachdb,sql.optimizer.plan_cache.hits.count,count,Number of non-prepared statements for which a cached plan was used +cockroachdb,sql.optimizer.plan_cache.hits.internal.count,count,Number of non-prepared statements for which a cached plan was used (internal queries) +cockroachdb,sql.optimizer.plan_cache.misses.count,count,Number of non-prepared statements for which a cached plan was not used +cockroachdb,sql.optimizer.plan_cache.misses.internal.count,count,Number of non-prepared statements for which a cached plan was not used (internal queries) +cockroachdb,sql.pgwire_cancel.count,count,Number of pgwire query cancel requests +cockroachdb,sql.pgwire_cancel.ignored.count,count,Number of pgwire query cancel requests that were ignored due to rate limiting +cockroachdb,sql.pgwire_cancel.successful.count,count,Number of pgwire query cancel requests that were successful +cockroachdb,sql.pre_serve.bytesin.count,count,"Number of SQL bytes received prior to routing the connection to the target SQL server +Shown as byte" +cockroachdb,sql.pre_serve.bytesout.count,count,"Number of SQL bytes sent prior to routing the connection to the target SQL server +Shown as byte" +cockroachdb,sql.pre_serve.conn.failures.count,count,Number of SQL connection failures prior to routing the connection to the target SQL server +cockroachdb,sql.pre_serve.mem.cur,gauge,"Current memory usage for SQL connections prior to routing the connection to the target SQL server +Shown as byte" +cockroachdb,sql.pre_serve.mem.max,count,"Memory usage for SQL connections prior to routing the connection to the target SQL server +Shown as byte" +cockroachdb,sql.pre_serve.mem.max.bucket,count,"Memory usage for SQL connections prior to routing the connection to the target SQL server +Shown as byte" +cockroachdb,sql.pre_serve.mem.max.count,count,"Memory usage for SQL connections prior to routing the connection to the target SQL server +Shown as byte" +cockroachdb,sql.pre_serve.mem.max.sum,count,"Memory usage for SQL connections prior to routing the connection to the target SQL server +Shown as byte" +cockroachdb,sql.pre_serve.new_conns.count,count,Number of SQL connections created prior to routing the connection to the target SQL server +cockroachdb,sql.query.count,count,[OpenMetrics v1 & v2] Number of SQL queries +cockroachdb,sql.query.internal.count,count,Number of SQL queries executed (internal queries) +cockroachdb,sql.query.started.count,count,Number of SQL queries started +cockroachdb,sql.query.started.internal.count,count,Number of SQL queries started (internal queries) +cockroachdb,sql.restart_savepoint.count,count,Number of SAVEPOINT cockroach_restart statements successfully executed +cockroachdb,sql.restart_savepoint.internal.count,count,Number of SAVEPOINT cockroach_restart statements successfully executed (internal queries) +cockroachdb,sql.restart_savepoint.release.count,count,Number of RELEASE SAVEPOINT cockroach_restart statements successfully executed +cockroachdb,sql.restart_savepoint.release.internal.count,count,Number of RELEASE SAVEPOINT cockroach_restart statements successfully executed (internal queries) +cockroachdb,sql.restart_savepoint.release.started.count,count,Number of RELEASE SAVEPOINT cockroach_restart statements started +cockroachdb,sql.restart_savepoint.release.started.internal.count,count,Number of RELEASE SAVEPOINT cockroach_restart statements started (internal queries) +cockroachdb,sql.restart_savepoint.rollback.count,count,Number of ROLLBACK TO SAVEPOINT cockroach_restart statements successfully executed +cockroachdb,sql.restart_savepoint.rollback.internal.count,count,Number of ROLLBACK TO SAVEPOINT cockroach_restart statements successfully executed (internal queries) +cockroachdb,sql.restart_savepoint.rollback.started.count,count,Number of ROLLBACK TO SAVEPOINT cockroach_restart statements started +cockroachdb,sql.restart_savepoint.rollback.started.internal.count,count,Number of ROLLBACK TO SAVEPOINT cockroach_restart statements started (internal queries) +cockroachdb,sql.restart_savepoint.started.count,count,Number of SAVEPOINT cockroach_restart statements started +cockroachdb,sql.restart_savepoint.started.internal.count,count,Number of SAVEPOINT cockroach_restart statements started (internal queries) +cockroachdb,sql.savepoint.count,count,Number of SQL SAVEPOINT statements successfully executed +cockroachdb,sql.savepoint.internal.count,count,Number of SQL SAVEPOINT statements successfully executed (internal queries) +cockroachdb,sql.savepoint.release.count,count,Number of RELEASE SAVEPOINT statements successfully executed +cockroachdb,sql.savepoint.release.internal.count,count,Number of RELEASE SAVEPOINT statements successfully executed (internal queries) +cockroachdb,sql.savepoint.release.started.count,count,Number of RELEASE SAVEPOINT statements started +cockroachdb,sql.savepoint.release.started.internal.count,count,Number of RELEASE SAVEPOINT statements started (internal queries) +cockroachdb,sql.savepoint.rollback.count,count,Number of ROLLBACK TO SAVEPOINT statements successfully executed +cockroachdb,sql.savepoint.rollback.internal.count,count,Number of ROLLBACK TO SAVEPOINT statements successfully executed (internal queries) +cockroachdb,sql.savepoint.rollback.started.count,count,Number of ROLLBACK TO SAVEPOINT statements started +cockroachdb,sql.savepoint.rollback.started.internal.count,count,Number of ROLLBACK TO SAVEPOINT statements started (internal queries) +cockroachdb,sql.savepoint.started.count,count,Number of SQL SAVEPOINT statements started +cockroachdb,sql.savepoint.started.internal.count,count,Number of SQL SAVEPOINT statements started (internal queries) +cockroachdb,sql.schema.invalid_objects,gauge,Gauge of detected invalid objects within the system.descriptor table (measured by querying crdb_internal.invalid_objects) +cockroachdb,sql.schema_changer.permanent_errors.count,count,Counter of the number of permanent errors experienced by the schema changer +cockroachdb,sql.schema_changer.retry_errors.count,count,Counter of the number of retriable errors experienced by the schema changer +cockroachdb,sql.schema_changer.running,gauge,Gauge of currently running schema changes +cockroachdb,sql.schema_changer.successes.count,count,Counter of the number of schema changer resumes which succeed +cockroachdb,sql.select.count,count,[OpenMetrics v1 & v2] Number of SQL SELECT statements +cockroachdb,sql.select.internal.count,count,Number of SQL SELECT statements successfully executed (internal queries) +cockroachdb,sql.select.started.count,count,Number of SQL SELECT statements started +cockroachdb,sql.select.started.internal.count,count,Number of SQL SELECT statements started (internal queries) +cockroachdb,sql.service.latency,gauge,"[OpenMetrics v1] Latency in nanoseconds of SQL request execution +Shown as nanosecond" +cockroachdb,sql.service.latency.bucket,count,"[OpenMetrics v2] Latency in nanoseconds of SQL request execution +Shown as nanosecond" +cockroachdb,sql.service.latency.count,count,"[OpenMetrics v2] Latency in nanoseconds of SQL request execution +Shown as nanosecond" +cockroachdb,sql.service.latency.internal,count,"Latency of SQL request execution (internal queries) +Shown as nanosecond" +cockroachdb,sql.service.latency.internal.bucket,count,"Latency of SQL request execution (internal queries) +Shown as nanosecond" +cockroachdb,sql.service.latency.internal.count,count,"Latency of SQL request execution (internal queries) +Shown as nanosecond" +cockroachdb,sql.service.latency.internal.sum,count,"Latency of SQL request execution (internal queries) +Shown as nanosecond" +cockroachdb,sql.service.latency.sum,count,"[OpenMetrics v2] Latency in nanoseconds of SQL request execution +Shown as nanosecond" +cockroachdb,sql.statements.active,gauge,[OpenMetrics v1 & v2] Number of currently active user SQL statements +cockroachdb,sql.statements.active.internal,gauge,Number of currently active user SQL statements (internal queries) +cockroachdb,sql.stats.cleanup.rows_removed.count,count,Number of stale statistics rows that are removed +cockroachdb,sql.stats.discarded.current.count,count,Number of fingerprint statistics being discarded +cockroachdb,sql.stats.flush.count,count,Number of times SQL Stats are flushed to persistent storage +cockroachdb,sql.stats.flush.duration,count,"Time took to in nanoseconds to complete SQL Stats flush +Shown as nanosecond" +cockroachdb,sql.stats.flush.duration.bucket,count,"Time took to in nanoseconds to complete SQL Stats flush +Shown as nanosecond" +cockroachdb,sql.stats.flush.duration.count,count,"Time took to in nanoseconds to complete SQL Stats flush +Shown as nanosecond" +cockroachdb,sql.stats.flush.duration.sum,count,"Time took to in nanoseconds to complete SQL Stats flush +Shown as nanosecond" +cockroachdb,sql.stats.flush.error.count,count,Number of errors encountered when flushing SQL Stats +cockroachdb,sql.stats.mem.current,gauge,"Current memory usage for fingerprint storage +Shown as byte" +cockroachdb,sql.stats.mem.max,count,"Memory usage for fingerprint storage +Shown as byte" +cockroachdb,sql.stats.mem.max.bucket,count,"Memory usage for fingerprint storage +Shown as byte" +cockroachdb,sql.stats.mem.max.count,count,"Memory usage for fingerprint storage +Shown as byte" +cockroachdb,sql.stats.mem.max.sum,count,"Memory usage for fingerprint storage +Shown as byte" +cockroachdb,sql.stats.reported.mem.current,gauge,"Current memory usage for reported fingerprint storage +Shown as byte" +cockroachdb,sql.stats.reported.mem.max,count,"Memory usage for reported fingerprint storage +Shown as byte" +cockroachdb,sql.stats.reported.mem.max.bucket,count,"Memory usage for reported fingerprint storage +Shown as byte" +cockroachdb,sql.stats.reported.mem.max.count,count,"Memory usage for reported fingerprint storage +Shown as byte" +cockroachdb,sql.stats.reported.mem.max.sum,count,"Memory usage for reported fingerprint storage +Shown as byte" +cockroachdb,sql.stats.txn_stats_collection.duration,count,"Time took in nanoseconds to collect transaction stats +Shown as nanosecond" +cockroachdb,sql.stats.txn_stats_collection.duration.bucket,count,"Time took in nanoseconds to collect transaction stats +Shown as nanosecond" +cockroachdb,sql.stats.txn_stats_collection.duration.count,count,"Time took in nanoseconds to collect transaction stats +Shown as nanosecond" +cockroachdb,sql.stats.txn_stats_collection.duration.sum,count,"Time took in nanoseconds to collect transaction stats +Shown as nanosecond" +cockroachdb,sql.temp_object_cleaner.active_cleaners,gauge,number of cleaner tasks currently running on this node +cockroachdb,sql.temp_object_cleaner.schemas_deletion_error.count,count,number of errored schema deletions by the temp object cleaner on this node +cockroachdb,sql.temp_object_cleaner.schemas_deletion_success.count,count,number of successful schema deletions by the temp object cleaner on this node +cockroachdb,sql.temp_object_cleaner.schemas_to_delete.count,count,number of schemas to be deleted by the temp object cleaner on this node +cockroachdb,sql.txn.abort.count,count,[OpenMetrics v1 & v2] Number of SQL transaction ABORT statements +cockroachdb,sql.txn.abort.internal.count,count,Number of SQL transaction abort errors (internal queries) +cockroachdb,sql.txn.begin.count,count,[OpenMetrics v1 & v2] Number of SQL transaction BEGIN statements +cockroachdb,sql.txn.begin.internal.count,count,Number of SQL transaction BEGIN statements successfully executed (internal queries) +cockroachdb,sql.txn.begin.started.count,count,Number of SQL transaction BEGIN statements started +cockroachdb,sql.txn.begin.started.internal.count,count,Number of SQL transaction BEGIN statements started (internal queries) +cockroachdb,sql.txn.commit.count,count,[OpenMetrics v1 & v2] Number of SQL transaction COMMIT statements +cockroachdb,sql.txn.commit.internal.count,count,Number of SQL transaction COMMIT statements successfully executed (internal queries) +cockroachdb,sql.txn.commit.started.count,count,Number of SQL transaction COMMIT statements started +cockroachdb,sql.txn.commit.started.internal.count,count,Number of SQL transaction COMMIT statements started (internal queries) +cockroachdb,sql.txn.contended.count,count,Number of SQL transactions experienced contention +cockroachdb,sql.txn.contended.internal.count,count,Number of SQL transactions experienced contention (internal queries) +cockroachdb,sql.txn.latency,gauge,"[OpenMetrics v1] Latency of SQL transactions +Shown as transaction" +cockroachdb,sql.txn.latency.bucket,count,"[OpenMetrics v2] Latency of SQL transactions +Shown as transaction" +cockroachdb,sql.txn.latency.count,count,"[OpenMetrics v2] Latency of SQL transactions +Shown as transaction" +cockroachdb,sql.txn.latency.internal.bucket,count,"Latency of SQL transactions (internal queries) +Shown as nanosecond" +cockroachdb,sql.txn.latency.internal.count,count,"Latency of SQL transactions (internal queries) +Shown as nanosecond" +cockroachdb,sql.txn.latency.internal.sum,count,"Latency of SQL transactions (internal queries) +Shown as nanosecond" +cockroachdb,sql.txn.latency.sum,count,"[OpenMetrics v2] Latency of SQL transactions +Shown as transaction" +cockroachdb,sql.txn.rollback.count,count,[OpenMetrics v1 & v2] Number of SQL transaction ROLLBACK statements +cockroachdb,sql.txn.rollback.internal.count,count,Number of SQL transaction ROLLBACK statements successfully executed (internal queries) +cockroachdb,sql.txn.rollback.started.count,count,Number of SQL transaction ROLLBACK statements started +cockroachdb,sql.txn.rollback.started.internal.count,count,Number of SQL transaction ROLLBACK statements started (internal queries) +cockroachdb,sql.txns.open,gauge,"[OpenMetrics v1 & v2] Number of currently open user SQL transactions +Shown as transaction" +cockroachdb,sql.txns.open.internal,gauge,Number of currently open user SQL transactions (internal queries) +cockroachdb,sql.update.count,count,[OpenMetrics v1 & v2] Number of SQL UPDATE statements +cockroachdb,sql.update.internal.count,count,Number of SQL UPDATE statements successfully executed (internal queries) +cockroachdb,sql.update.started.count,count,Number of SQL UPDATE statements started +cockroachdb,sql.update.started.internal.count,count,Number of SQL UPDATE statements started (internal queries) +cockroachdb,sqlliveness.is_alive.cache_hits.count,count,Number of calls to IsAlive that return from the cache +cockroachdb,sqlliveness.is_alive.cache_misses.count,count,Number of calls to IsAlive that do not return from the cache +cockroachdb,sqlliveness.sessions_deleted.count,count,Number of expired sessions which have been deleted +cockroachdb,sqlliveness.sessions_deletion_runs.count,count,Number of calls to delete sessions which have been performed +cockroachdb,sqlliveness.write_failures.count,count,Number of update or insert calls which have failed +cockroachdb,sqlliveness.write_successes.count,count,Number of update or insert calls successfully performed +cockroachdb,storage.batch_commit,gauge,Count of batch commits. See storage.AggregatedBatchCommitStats for details. +cockroachdb,storage.batch_commit.commit_wait.duration,gauge,"Cumulative time spent waiting for WAL sync, for batch commit. See storage.AggregatedBatchCommitStats for details. +Shown as nanosecond" +cockroachdb,storage.batch_commit.duration,gauge,"Cumulative time spent in batch commit. See storage.AggregatedBatchCommitStats for details. +Shown as nanosecond" +cockroachdb,storage.batch_commit.l0_stall.duration,gauge,"Cumulative time spent in a write stall due to high read amplification in L0, for batch commit. See storage.AggregatedBatchCommitStats for details. +Shown as nanosecond" +cockroachdb,storage.batch_commit.mem_stall.duration,gauge,"Cumulative time spent in a write stall due to too many memtables, for batch commit. See storage.AggregatedBatchCommitStats for details. +Shown as nanosecond" +cockroachdb,storage.batch_commit.sem_wait.duration,gauge,"Cumulative time spent in semaphore wait, for batch commit. See storage.AggregatedBatchCommitStats for details. +Shown as nanosecond" +cockroachdb,storage.batch_commit.wal_queue_wait.duration,gauge,"Cumulative time spent waiting for memory blocks in the WAL queue, for batch commit. See storage.AggregatedBatchCommitStats for details. +Shown as nanosecond" +cockroachdb,storage.batch_commit.wal_rotation.duration,gauge,"Cumulative time spent waiting for WAL rotation, for batch commit. See storage.AggregatedBatchCommitStats for details. +Shown as nanosecond" +cockroachdb,storage.checkpoints,gauge,"The number of checkpoint directories found in storage.This is the number of directories found in the auxiliary/checkpoints directory.Each represents an immutable point-in-time storage engine checkpoint. They arecheap (consisting mostly of hard links), but over time they effectively become afull copy of the old state, which increases their relative cost." +cockroachdb,storage.compactions.duration,gauge,"Cumulative sum of all compaction durations.The rate of this value provides the effective compaction concurrency of a store,which can be useful to determine whether the maximum compaction concurrency isfully utilized. +Shown as nanosecond" +cockroachdb,storage.compactions.keys.pinned,gauge,Cumulative count of storage engine KVs written to sstables during flushes and compactions due to open LSM snapshots.Various subsystems of CockroachDB take LSM snapshots to maintain a consistent viewof the database over an extended duration. +cockroachdb,storage.compactions.keys.pinned.bytes,gauge,"Cumulative size of storage engine KVs written to sstables during flushes and compactions due to open LSM snapshots.Various subsystems of CockroachDB take LSM snapshots to maintain a consistent viewof the database over an extended duration. +Shown as byte" +cockroachdb,storage.disk_slow,gauge,Number of instances of disk operations taking longer than 10s +cockroachdb,storage.disk_stalled,gauge,Number of instances of disk operations taking longer than 20s +cockroachdb,storage.flush.ingest,gauge,Flushes performing an ingest (flushable ingestions) +cockroachdb,storage.flush.ingest.table,gauge,Tables ingested via flushes (flushable ingestions) +cockroachdb,storage.flush.ingest.table.bytes,gauge,"Bytes ingested via flushes (flushable ingestions) +Shown as byte" +cockroachdb,storage.flush.utilization,gauge,"The percentage of time the storage engine is actively flushing memtables to disk. +Shown as percent" +cockroachdb,storage.ingest,gauge,Number of successful ingestions performed +cockroachdb,storage.iterator.block_load.bytes,gauge,"Bytes loaded by storage engine iterators (possibly cached). See storage.AggregatedIteratorStats for details. +Shown as byte" +cockroachdb,storage.iterator.block_load.cached_bytes,gauge,"Bytes loaded by storage engine iterators from the block cache. See storage.AggregatedIteratorStats for details. +Shown as byte" +cockroachdb,storage.iterator.block_load.read_duration,gauge,"Cumulative time storage engine iterators spent loading blocks from durable storage. See storage.AggregatedIteratorStats for details. +Shown as nanosecond" +cockroachdb,storage.iterator.external.seeks,gauge,Cumulative count of seeks performed on storage engine iterators. See storage.AggregatedIteratorStats for details. +cockroachdb,storage.iterator.external.steps,gauge,Cumulative count of steps performed on storage engine iterators. See storage.AggregatedIteratorStats for details. +cockroachdb,storage.iterator.internal.seeks,gauge,Cumulative count of seeks performed internally within storage engine iterators.A value high relative to ‘storage.iterator.external.seeks’is a good indication that there’s an accumulation of garbageinternally within the storage engine.See storage.AggregatedIteratorStats for details. +cockroachdb,storage.iterator.internal.steps,gauge,Cumulative count of steps performed internally within storage engine iterators.A value high relative to ‘storage.iterator.external.steps’is a good indication that there’s an accumulation of garbageinternally within the storage engine.See storage.AggregatedIteratorStats for more details. +cockroachdb,storage.keys.range_key_set,gauge,Approximate count of RangeKeySet internal keys across the storage engine. +cockroachdb,storage.keys.tombstone,gauge,"Approximate count of DEL, SINGLEDEL and RANGEDEL internal keys across the storage engine." +cockroachdb,storage.l0_bytes_flushed,gauge,"Number of bytes flushed (from memtables) into Level 0 +Shown as byte" +cockroachdb,storage.l0_bytes_ingested,gauge,"Number of bytes ingested directly into Level 0 +Shown as byte" +cockroachdb,storage.l0_level_score,gauge,Compaction score of level 0 +cockroachdb,storage.l0_level_size,gauge,"Size of the SSTables in level 0 +Shown as byte" +cockroachdb,storage.l0_num_files,gauge,Number of SSTables in Level 0 +cockroachdb,storage.l0_sublevels,gauge,Number of Level 0 sublevels +cockroachdb,storage.l1_bytes_ingested,gauge,"Number of bytes ingested directly into Level 1 +Shown as byte" +cockroachdb,storage.l1_level_score,gauge,Compaction score of level 1 +cockroachdb,storage.l1_level_size,gauge,"Size of the SSTables in level 1 +Shown as byte" +cockroachdb,storage.l2_bytes_ingested,gauge,"Number of bytes ingested directly into Level 2 +Shown as byte" +cockroachdb,storage.l2_level_score,gauge,Compaction score of level 2 +cockroachdb,storage.l2_level_size,gauge,"Size of the SSTables in level 2 +Shown as byte" +cockroachdb,storage.l3_bytes_ingested,gauge,"Number of bytes ingested directly into Level 3 +Shown as byte" +cockroachdb,storage.l3_level_score,gauge,Compaction score of level 3 +cockroachdb,storage.l3_level_size,gauge,"Size of the SSTables in level 3 +Shown as byte" +cockroachdb,storage.l4_bytes_ingested,gauge,"Number of bytes ingested directly into Level 4 +Shown as byte" +cockroachdb,storage.l4_level_score,gauge,Compaction score of level 4 +cockroachdb,storage.l4_level_size,gauge,"Size of the SSTables in level 4 +Shown as byte" +cockroachdb,storage.l5_bytes_ingested,gauge,"Number of bytes ingested directly into Level 5 +Shown as byte" +cockroachdb,storage.l5_level_score,gauge,Compaction score of level 5 +cockroachdb,storage.l5_level_size,gauge,"Size of the SSTables in level 5 +Shown as byte" +cockroachdb,storage.l6_bytes_ingested,gauge,"Number of bytes ingested directly into Level 6 +Shown as byte" +cockroachdb,storage.l6_level_score,gauge,Compaction score of level 6 +cockroachdb,storage.l6_level_size,gauge,"Size of the SSTables in level 6 +Shown as byte" +cockroachdb,storage.marked_for_compaction_files,gauge,Count of SSTables marked for compaction +cockroachdb,storage.queue.store_failures.count,count,Number of replicas which failed processing in replica queues due to retryable store errors +cockroachdb,storage.secondary_cache,gauge,The count of cache blocks in the secondary cache (not sstable blocks) +cockroachdb,storage.secondary_cache.evictions,gauge,The number of times a cache block was evicted from the secondary cache +cockroachdb,storage.secondary_cache.reads_full_hit,gauge,The number of reads where all data returned was read from the secondary cache +cockroachdb,storage.secondary_cache.reads_multi_block,gauge,The number of secondary cache reads that require reading data from 2+ cache blocks +cockroachdb,storage.secondary_cache.reads_multi_shard,gauge,The number of secondary cache reads that require reading data from 2+ shards +cockroachdb,storage.secondary_cache.reads_no_hit,gauge,The number of reads where no data returned was read from the secondary cache +cockroachdb,storage.secondary_cache.reads_partial_hit,gauge,The number of reads where some data returned was read from the secondary cache +cockroachdb,storage.secondary_cache.reads_total,gauge,The number of reads from the secondary cache +cockroachdb,storage.secondary_cache.size,gauge,"The number of sstable bytes stored in the secondary cache +Shown as byte" +cockroachdb,storage.secondary_cache.write_back_failures,gauge,The number of times writing a cache block to the secondary cache failed +cockroachdb,storage.shared_storage.read,gauge,"Bytes read from shared storage +Shown as byte" +cockroachdb,storage.shared_storage.write,gauge,"Bytes written to external storage +Shown as byte" +cockroachdb,storage.single_delete.ineffectual,gauge,Number of SingleDeletes that were ineffectual +cockroachdb,storage.single_delete.invariant_violation,gauge,Number of SingleDelete invariant violations +cockroachdb,storage.wal.bytes_in,gauge,The number of logical bytes the storage engine has written to the WAL +cockroachdb,storage.wal.bytes_written,gauge,The number of bytes the storage engine has written to the WAL +cockroachdb,storage.wal.fsync.latency.bucket,count,"The write ahead log fsync latency +Shown as nanosecond" +cockroachdb,storage.wal.fsync.latency.count,count,"The write ahead log fsync latency +Shown as nanosecond" +cockroachdb,storage.wal.fsync.latency.sum,count,"The write ahead log fsync latency +Shown as nanosecond" +cockroachdb,storage.write.stalls,gauge,Number of instances of intentional write stalls to backpressure incoming writes +cockroachdb,storage.write_stall_nanos,gauge,"Total write stall duration in nanos +Shown as nanosecond" +cockroachdb,storage.write_stalls,gauge,Number of instances of intentional write stalls to backpressure incoming writes +cockroachdb,sys.cgo.allocbytes,gauge,"[OpenMetrics v1 & v2] Current bytes of memory allocated by cgo +Shown as byte" +cockroachdb,sys.cgo.totalbytes,gauge,"[OpenMetrics v1 & v2] Total bytes of memory allocated by cgo, but not released +Shown as byte" +cockroachdb,sys.cgocalls,gauge,[OpenMetrics v1 & v2] Total number of cgo calls +cockroachdb,sys.cpu.combined.percent.normalized,gauge,"[OpenMetrics v1 & v2] Current user+system cpu percentage, normalized 0-1 by number of cores +Shown as fraction" +cockroachdb,sys.cpu.host.combined.percent_normalized,gauge,"Current user+system cpu percentage across the whole machine, normalized 0-1 by number of cores +Shown as percent" +cockroachdb,sys.cpu.now.ns,gauge,"The time when CPU measurements were taken, as nanoseconds since epoch +Shown as nanosecond" +cockroachdb,sys.cpu.sys.ns,gauge,"[OpenMetrics v1 & v2] Total system cpu time in nanoseconds +Shown as nanosecond" +cockroachdb,sys.cpu.sys.percent,gauge,"[OpenMetrics v1 & v2] Current system cpu percentage +Shown as core" +cockroachdb,sys.cpu.user.ns,gauge,"[OpenMetrics v1 & v2] Total user cpu time in nanoseconds +Shown as nanosecond" +cockroachdb,sys.cpu.user.percent,gauge,"[OpenMetrics v1 & v2] Current user cpu percentage +Shown as core" +cockroachdb,sys.fd.open,gauge,[OpenMetrics v1 & v2] Process open file descriptors +cockroachdb,sys.fd.softlimit,gauge,[OpenMetrics v1 & v2] Process open FD soft limit +cockroachdb,sys.gc,gauge,[OpenMetrics v2] Total number of GC runs +cockroachdb,sys.gc.count,gauge,[OpenMetrics v1] Total number of GC runs +cockroachdb,sys.gc.pause.ns,gauge,"[OpenMetrics v1 & v2] Total GC pause in nanoseconds +Shown as nanosecond" +cockroachdb,sys.gc.pause.percent,gauge,"[OpenMetrics v1 & v2] Current GC pause percentage +Shown as fraction" +cockroachdb,sys.go.allocbytes,gauge,"[OpenMetrics v1 & v2] Current bytes of memory allocated by go +Shown as byte" +cockroachdb,sys.go.totalbytes,gauge,"[OpenMetrics v1 & v2] Total bytes of memory allocated by go, but not released +Shown as byte" +cockroachdb,sys.goroutines,gauge,[OpenMetrics v1 & v2] Current number of goroutines +cockroachdb,sys.host.disk.io.time,gauge,"Time spent reading from or writing to all disks since this process started +Shown as nanosecond" +cockroachdb,sys.host.disk.iopsinprogress,gauge,IO operations currently in progress on this host +cockroachdb,sys.host.disk.read,gauge,Disk read operations across all disks since this process started +cockroachdb,sys.host.disk.read.bytes,gauge,"[OpenMetrics v1 & v2] Bytes read from all disks since this process started +Shown as byte" +cockroachdb,sys.host.disk.read.count,gauge,Disk read operations across all disks since this process started +cockroachdb,sys.host.disk.read.time,gauge,"Time spent reading from all disks since this process started +Shown as nanosecond" +cockroachdb,sys.host.disk.weightedio.time,gauge,"Weighted time spent reading from or writing to to all disks since this process started +Shown as nanosecond" +cockroachdb,sys.host.disk.write,gauge,Disk write operations across all disks since this process started +cockroachdb,sys.host.disk.write.bytes,gauge,"[OpenMetrics v1 & v2] Bytes written to all disks since this process started +Shown as byte" +cockroachdb,sys.host.disk.write.count,gauge,Disk write operations across all disks since this process started +cockroachdb,sys.host.disk.write.time,gauge,"Time spent writing to all disks since this process started +Shown as nanosecond" +cockroachdb,sys.host.net.recv.bytes,gauge,"[OpenMetrics v1 & v2] Bytes received on all network interfaces since this process started +Shown as byte" +cockroachdb,sys.host.net.recv.packets,gauge,Packets received on all network interfaces since this process started +cockroachdb,sys.host.net.send.bytes,gauge,"[OpenMetrics v1 & v2] Bytes sent on all network interfaces since this process started +Shown as byte" +cockroachdb,sys.host.net.send.packets,gauge,Packets sent on all network interfaces since this process started +cockroachdb,sys.rss,gauge,[OpenMetrics v1 & v2] Current process RSS +cockroachdb,sys.runnable.goroutines.per.cpu,gauge,"Average number of goroutines that are waiting to run, normalized by number of cores" +cockroachdb,sys.runnable.goroutines.per_cpu,gauge,"Average number of goroutines that are waiting to run, normalized by number of cores" +cockroachdb,sys.totalmem,gauge,"Total memory (both free and used) +Shown as byte" +cockroachdb,sys.uptime,gauge,"[OpenMetrics v1 & v2] Process uptime in seconds +Shown as second" +cockroachdb,sysbytes,gauge,"[OpenMetrics v1 & v2] Number of bytes in system KV pairs +Shown as byte" +cockroachdb,syscount,gauge,[OpenMetrics v1 & v2] Count of system KV pairs +cockroachdb,tenant.consumption.cross_region_network_ru.count,count,Total number of RUs charged for cross-region network traffic +cockroachdb,tenant.consumption.external_io_egress_bytes,gauge,Total number of bytes written to external services such as cloud storage providers +cockroachdb,tenant.consumption.external_io_ingress_bytes,gauge,Total number of bytes read from external services such as cloud storage providers +cockroachdb,tenant.consumption.kv_request_units,gauge,RU consumption attributable to KV +cockroachdb,tenant.consumption.kv_request_units.count,count,RU consumption attributable to KV +cockroachdb,tenant.consumption.pgwire_egress_bytes,gauge,Total number of bytes transferred from a SQL pod to the client +cockroachdb,tenant.consumption.read_batches,gauge,Total number of KV read batches +cockroachdb,tenant.consumption.read_bytes,gauge,Total number of bytes read from KV +cockroachdb,tenant.consumption.read_requests,gauge,Total number of KV read requests +cockroachdb,tenant.consumption.request_units,gauge,Total RU consumption +cockroachdb,tenant.consumption.request_units.count,count,Total RU consumption +cockroachdb,tenant.consumption.sql_pods_cpu_seconds,gauge,"Total amount of CPU used by SQL pods +Shown as second" +cockroachdb,tenant.consumption.write_batches,gauge,Total number of KV write batches +cockroachdb,tenant.consumption.write_bytes,gauge,Total number of bytes written to KV +cockroachdb,tenant.consumption.write_requests,gauge,Total number of KV write requests +cockroachdb,timeseries.write.bytes,count,"[OpenMetrics v1] Total size in bytes of metric samples written to disk +Shown as byte" +cockroachdb,timeseries.write.bytes.count,count,"[OpenMetrics v2] Total size in bytes of metric samples written to disk +Shown as byte" +cockroachdb,timeseries.write.errors,count,"[OpenMetrics v1] Total errors encountered while attempting to write metrics to disk +Shown as error" +cockroachdb,timeseries.write.errors.count,count,"[OpenMetrics v2] Total errors encountered while attempting to write metrics to disk +Shown as error" +cockroachdb,timeseries.write.samples,count,[OpenMetrics v1] Total number of metric samples written to disk +cockroachdb,timeseries.write.samples.count,count,[OpenMetrics v2] Total number of metric samples written to disk +cockroachdb,totalbytes,gauge,"[OpenMetrics v1 & v2] Total number of bytes taken up by keys and values including non-live data +Shown as byte" +cockroachdb,tscache.skl.pages,gauge,Number of pages in the timestamp cache +cockroachdb,tscache.skl.read.pages,gauge,[OpenMetrics v1 & v2] Number of pages in the read timestamp cache +cockroachdb,tscache.skl.read.rotations,count,[OpenMetrics v1] Number of page rotations in the read timestamp cache +cockroachdb,tscache.skl.read.rotations.count,count,[OpenMetrics v2] Number of page rotations in the read timestamp cache +cockroachdb,tscache.skl.rotations.count,count,Number of page rotations in the timestamp cache +cockroachdb,tscache.skl.write.pages,gauge,[OpenMetrics v1 & v2] Number of pages in the write timestamp cache +cockroachdb,tscache.skl.write.rotations,count,[OpenMetrics v1] Number of page rotations in the write timestamp cache +cockroachdb,tscache.skl.write.rotations.count,count,[OpenMetrics v2] Number of page rotations in the write timestamp cache +cockroachdb,txn.abandons,count,[OpenMetrics v1] Number of abandoned KV transactions +cockroachdb,txn.abandons.count,count,[OpenMetrics v2] Number of abandoned KV transactions +cockroachdb,txn.aborts,count,[OpenMetrics v1] Number of aborted KV transactions +cockroachdb,txn.aborts.count,count,[OpenMetrics v2] Number of aborted KV transactions +cockroachdb,txn.autoretries,count,[OpenMetrics v1] Number of automatic retries to avoid serializable restarts +cockroachdb,txn.autoretries.count,count,[OpenMetrics v2] Number of automatic retries to avoid serializable restarts +cockroachdb,txn.commit_waits.before_commit_trigger.count,count,Number of KV transactions that had to commit-wait on the server before committing because they had a commit trigger +cockroachdb,txn.commit_waits.count,count,Number of KV transactions that had to commit-wait on commit in order to ensure linearizability. This generally happens to transactions writing to global ranges. +cockroachdb,txn.commits,count,[OpenMetrics v1] Number of committed KV transactions (including 1PC) +cockroachdb,txn.commits.count,count,[OpenMetrics v2] Number of committed KV transactions (including 1PC) +cockroachdb,txn.commits1PC,count,[OpenMetrics v1] Number of committed one-phase KV transactions +cockroachdb,txn.commits1PC.count,count,[OpenMetrics v2] Number of committed one-phase KV transactions +cockroachdb,txn.condensed_intent_spans.count,count,KV transactions that have exceeded their intent tracking memory budget (kv.transaction.max_intents_bytes). See also txn.condensed_intent_spans_gauge for a gauge of such transactions currently running. +cockroachdb,txn.condensed_intent_spans_gauge,gauge,KV transactions currently running that have exceeded their intent tracking memory budget (kv.transaction.max_intents_bytes). See also txn.condensed_intent_spans for a perpetual counter/rate. +cockroachdb,txn.condensed_intent_spans_rejected.count,count,KV transactions that have been aborted because they exceeded their intent tracking memory budget (kv.transaction.max_intents_bytes). Rejection is caused by kv.transaction.reject_over_max_intents_budget. +cockroachdb,txn.durations,gauge,[OpenMetrics v1] KV transaction durations in nanoseconds +cockroachdb,txn.durations.bucket,count,[OpenMetrics v2] KV transaction durations in nanoseconds +cockroachdb,txn.durations.count,count,[OpenMetrics v2] KV transaction durations in nanoseconds +cockroachdb,txn.durations.sum,count,[OpenMetrics v2] KV transaction durations in nanoseconds +cockroachdb,txn.parallelcommits.auto_retries.count,count,Number of commit tries after successful failed parallel commit attempts +cockroachdb,txn.parallelcommits.count,count,Number of KV transaction parallel commit attempts +cockroachdb,txn.refresh.auto_retries.count,count,Number of request retries after successful client-side refreshes +cockroachdb,txn.refresh.fail.count,count,Number of failed client-side transaction refreshes +cockroachdb,txn.refresh.fail_with_condensed_spans.count,count,"Number of failed client-side refreshes for transactions whose read tracking lost fidelity because of condensing. Such a failure could be a false conflict. Failures counted here are also counted in txn.refresh.fail, and the respective transactions are also counted in txn.refresh.memory_limit_exceeded." +cockroachdb,txn.refresh.memory_limit_exceeded.count,count,"Number of transaction which exceed the refresh span bytes limit, causing their read spans to be condensed" +cockroachdb,txn.refresh.success.count,count,"Number of successful client-side transaction refreshes. A refresh may be preemptive or reactive. A reactive refresh is performed after a request throws an error because a refresh is needed for it to succeed. In these cases, the request will be re-issued as an auto-retry (see txn.refresh.auto_retries) after the refresh succeeds." +cockroachdb,txn.refresh.success_server_side.count,count,Number of successful server-side transaction refreshes +cockroachdb,txn.restarts,gauge,[OpenMetrics v1] Number of restarted KV transactions +cockroachdb,txn.restarts.asyncwritefailure.count,count,Number of restarts due to async consensus writes that failed to leave intents +cockroachdb,txn.restarts.bucket,count,[OpenMetrics v2] Number of restarted KV transactions +cockroachdb,txn.restarts.commitdeadlineexceeded.count,count,Number of restarts due to a transaction exceeding its deadline +cockroachdb,txn.restarts.count,count,[OpenMetrics v2] Number of restarted KV transactions +cockroachdb,txn.restarts.deleterange,count,[OpenMetrics v1] Number of restarts due to a forwarded commit timestamp and a DeleteRange command +cockroachdb,txn.restarts.deleterange.count,count,[OpenMetrics v2] Number of restarts due to a forwarded commit timestamp and a DeleteRange command +cockroachdb,txn.restarts.possiblereplay,count,[OpenMetrics v1] Number of restarts due to possible replays of command batches at the storage layer +cockroachdb,txn.restarts.possiblereplay.count,count,[OpenMetrics v2] Number of restarts due to possible replays of command batches at the storage layer +cockroachdb,txn.restarts.readwithinuncertainty.count,count,Number of restarts due to reading a new value within the uncertainty interval +cockroachdb,txn.restarts.serializable,count,[OpenMetrics v1] Number of restarts due to a forwarded commit timestamp and isolation=SERIALIZABLE +cockroachdb,txn.restarts.serializable.count,count,[OpenMetrics v2] Number of restarts due to a forwarded commit timestamp and isolation=SERIALIZABLE +cockroachdb,txn.restarts.sum,count,[OpenMetrics v2] Number of restarted KV transactions +cockroachdb,txn.restarts.txnaborted.count,count,Number of restarts due to an abort by a concurrent transaction (usually due to deadlock) +cockroachdb,txn.restarts.txnpush.count,count,Number of restarts due to a transaction push failure +cockroachdb,txn.restarts.unknown.count,count,Number of restarts due to a unknown reasons +cockroachdb,txn.restarts.writetooold,count,[OpenMetrics v1] Number of restarts due to a concurrent writer committing first +cockroachdb,txn.restarts.writetooold.count,count,[OpenMetrics v2] Number of restarts due to a concurrent writer committing first +cockroachdb,txn.restarts.writetoooldmulti.count,count,Number of restarts due to multiple concurrent writers committing first +cockroachdb,txn.rollbacks.async.failed.count,count,Number of KV transaction that failed to send abort asynchronously which is not always retried +cockroachdb,txn.rollbacks.failed.count,count,Number of KV transaction that failed to send final abort +cockroachdb,txn.server_side.1PC.failure.count,count,Number of batches that attempted to commit using 1PC and failed +cockroachdb,txn.server_side.1PC.success.count,count,Number of batches that attempted to commit using 1PC and succeeded +cockroachdb,txn.server_side_retry.read_evaluation.failure.count,count,Number of read batches that were not successfully refreshed server side +cockroachdb,txn.server_side_retry.read_evaluation.success.count,count,Number of read batches that were successfully refreshed server side +cockroachdb,txn.server_side_retry.uncertainty_interval_error.failure.count,count,Number of batches that ran into uncertainty interval errors that were not successfully refreshed server side +cockroachdb,txn.server_side_retry.uncertainty_interval_error.success.count,count,Number of batches that ran into uncertainty interval errors that were successfully refreshed server side +cockroachdb,txn.server_side_retry.write_evaluation.failure.count,count,Number of write batches that were not successfully refreshed server side +cockroachdb,txn.server_side_retry.write_evaluation.success.count,count,Number of write batches that were successfully refreshed server side +cockroachdb,txnrecovery.attempts.count,count,Number of transaction recovery attempts executed +cockroachdb,txnrecovery.attempts.pending,gauge,Number of transaction recovery attempts currently in-flight +cockroachdb,txnrecovery.failures.count,count,Number of transaction recovery attempts that failed +cockroachdb,txnrecovery.successes.aborted.count,count,Number of transaction recovery attempts that aborted a transaction +cockroachdb,txnrecovery.successes.committed.count,count,Number of transaction recovery attempts that committed a transaction +cockroachdb,txnrecovery.successes.pending.count,count,Number of transaction recovery attempts that left a transaction pending +cockroachdb,txnwaitqueue.deadlocks.count,count,Number of deadlocks detected by the txn wait queue +cockroachdb,txnwaitqueue.deadlocks_total.count,count,Number of deadlocks detected by the txn wait queue +cockroachdb,txnwaitqueue.pushee.waiting,gauge,Number of pushees on the txn wait queue +cockroachdb,txnwaitqueue.pusher.slow,gauge,The total number of cases where a pusher waited more than the excessive wait threshold +cockroachdb,txnwaitqueue.pusher.wait_time.bucket,count,"Histogram of durations spent in queue by pushers +Shown as nanosecond" +cockroachdb,txnwaitqueue.pusher.wait_time.count,count,"Histogram of durations spent in queue by pushers +Shown as nanosecond" +cockroachdb,txnwaitqueue.pusher.wait_time.sum,count,"Histogram of durations spent in queue by pushers +Shown as nanosecond" +cockroachdb,txnwaitqueue.pusher.waiting,gauge,Number of pushers on the txn wait queue +cockroachdb,txnwaitqueue.query.wait_time.bucket,count,"Histogram of durations spent in queue by queries +Shown as nanosecond" +cockroachdb,txnwaitqueue.query.wait_time.count,count,"Histogram of durations spent in queue by queries +Shown as nanosecond" +cockroachdb,txnwaitqueue.query.wait_time.sum,count,"Histogram of durations spent in queue by queries +Shown as nanosecond" +cockroachdb,txnwaitqueue.query.waiting,gauge,Number of transaction status queries waiting for an updated transaction record +cockroachdb,valbytes,gauge,"[OpenMetrics v1 & v2] Number of bytes taken up by values +Shown as byte" +cockroachdb,valcount,gauge,[OpenMetrics v1 & v2] Count of all values \ No newline at end of file diff --git a/src/current/_data/v25.3/metrics/datadog-crdb-dedicated.csv b/src/current/_data/v25.3/metrics/datadog-crdb-dedicated.csv new file mode 100644 index 00000000000..b78a8595406 --- /dev/null +++ b/src/current/_data/v25.3/metrics/datadog-crdb-dedicated.csv @@ -0,0 +1,475 @@ +prefix,datadog_id,, +crdb_dedicated,addsstable.applications,count,"Number of SSTable ingestions applied i.e. applied by Replicas. Shown as operation +Shown as operation" +crdb_dedicated,addsstable.copies,count,"number of SSTable ingestions that required copying files during application. Shown as operation +Shown as operation" +crdb_dedicated,addsstable.proposals,count,"Number of SSTable ingestions proposed i.e. sent to Raft by lease holders. Shown as operation +Shown as operation" +crdb_dedicated,admission.wait.sum.kv,count,"Total wait time in micros for requests within the KV layer +Shown as microsecond" +crdb_dedicated,admission.wait.sum.kv.stores,count,"Total wait time in micros for write requests within the KV layer +Shown as microsecond" +crdb_dedicated,admission.wait.sum.sql.kv.response,count,"Total wait time in micros for responses between the KV and SQL layer +Shown as microsecond" +crdb_dedicated,admission.wait.sum.sql.sql.response,count,"Total wait time in micros for responses within the SQL layer when receiving DistSQL responses +Shown as microsecond" +crdb_dedicated,capacity,gauge,"Total storage capacity. Shown as byte +Shown as byte" +crdb_dedicated,capacity.available,gauge,"Available storage capacity. Shown as byte +Shown as byte" +crdb_dedicated,capacity.reserved,gauge,"Capacity reserved for snapshots. Shown as byte +Shown as byte" +crdb_dedicated,capacity.used,gauge,"Used storage capacity. Shown as byte +Shown as byte" +crdb_dedicated,changefeed.backfill.count,gauge,"Number of changefeeds currently executing backfill. Shown as count. +Shown as unit" +crdb_dedicated,changefeed.backfill.pending.ranges,gauge,"Number of ranges in an ongoing backfill that are yet to be fully emitted. Shown as count +Shown as unit" +crdb_dedicated,changefeed.commit.latency,gauge,"Event commit latency: a difference between event MVCC timestamp and the time it was acknowledged by the downstream sink. If the sink batches events, then the difference between the oldest event in the batch and acknowledgement is recorded. Excludes latency during backfill. Shown as nanoseconds. +Shown as unit" +crdb_dedicated,changefeed.emitted.messages,count,"Messages emitted by all feeds. Shown as count. +Shown as unit" +crdb_dedicated,changefeed.error.retries,count,"Total retryable errors encountered by all changefeeds. Shown as count. +Shown as unit" +crdb_dedicated,changefeed.failures,count,"Total number of changefeed jobs which have failed. Shown as count. +Shown as unit" +crdb_dedicated,changefeed.max.behind.nanos,gauge,"Largest commit-to-emit duration of any running feed. Shown as nanoseconds. +Shown as nanosecond" +crdb_dedicated,changefeed.message.size.hist,gauge,"Histogram of message sizes for changefeeds. Shown as bytes. +Shown as byte" +crdb_dedicated,changefeed.running,gauge,"Number of currently running changefeeds, including sinkless. Shown as count. +Shown as unit" +crdb_dedicated,clock.offset.meannanos,gauge,"Mean clock offset with other nodes in nanoseconds. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,clock.offset.stddevnanos,gauge,"Stdddev clock offset with other nodes in nanoseconds. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,distsender.batches,count,Number of batches processed +crdb_dedicated,distsender.batches.partial,count,Number of partial batches processed +crdb_dedicated,distsender.errors.notleaseholder,count,"Number of NotLeaseHolderErrors encountered. Shown as error +Shown as error" +crdb_dedicated,distsender.rpc.sent,count,"Number of RPCs sent +Shown as request" +crdb_dedicated,distsender.rpc.sent.local,count,"Number of local RPCs sent +Shown as request" +crdb_dedicated,distsender.rpc.sent.nextreplicaerror,count,"Number of RPCs sent due to per-replica errors. Shown as error +Shown as request" +crdb_dedicated,exec.error,count,"Number of batch KV requests that failed to execute on this node. Shown as request +Shown as request" +crdb_dedicated,exec.latency,count,"Latency in nanoseconds of batch KV requests executed on this node. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,exec.success,count,"Number of batch KV requests executed successfully on this node. Shown as request +Shown as request" +crdb_dedicated,gcbytesage,gauge,"Cumulative age of non-live data in seconds. Shown as second +Shown as second" +crdb_dedicated,gossip.bytes.received,count,"Number of received gossip bytes. Shown as byte +Shown as byte" +crdb_dedicated,gossip.bytes.sent,count,"Number of sent gossip bytes. Shown as byte +Shown as byte" +crdb_dedicated,gossip.connections.incoming,gauge,"Number of active incoming gossip connections. Shown as connection +Shown as connection" +crdb_dedicated,gossip.connections.outgoing,gauge,"Number of active outgoing gossip connections. Shown as connection +Shown as connection" +crdb_dedicated,gossip.connections.refused,count,"Number of refused incoming gossip connections. Shown as connection +Shown as connection" +crdb_dedicated,gossip.infos.received,count,"Number of received gossip Info objects +Shown as message" +crdb_dedicated,gossip.infos.sent,count,"Number of sent gossip Info objects +Shown as message" +crdb_dedicated,intentage,gauge,"Cumulative age of intents in seconds. Shown as second +Shown as second" +crdb_dedicated,intentbytes,gauge,"Number of bytes in intent KV pairs. Shown as byte +Shown as byte" +crdb_dedicated,intentcount,gauge,"Count of intent keys. Shown as key +Shown as key" +crdb_dedicated,jobs.changefeed.resume.retry.error,count,"Number of changefeed jobs which failed with a retriable error. Shown as count. +Shown as unit" +crdb_dedicated,keybytes,gauge,"Number of bytes taken up by keys. Shown as byte +Shown as byte" +crdb_dedicated,keycount,gauge,"Count of all keys. Shown as key +Shown as key" +crdb_dedicated,leases.epoch,gauge,"Number of replica leaseholders using epoch-based leases +Shown as unit" +crdb_dedicated,leases.error,count,"Number of failed lease requests. Shown as request +Shown as request" +crdb_dedicated,leases.expiration,gauge,"Number of replica leaseholders using expiration-based leases +Shown as unit" +crdb_dedicated,leases.success,count,"Number of successful lease requests. Shown as request +Shown as request" +crdb_dedicated,leases.transfers.error,count,"Number of failed lease transfers +Shown as error" +crdb_dedicated,leases.transfers.success,count,"Number of successful lease transfers +Shown as success" +crdb_dedicated,livebytes,gauge,"Number of bytes of live data keys plus values. Shown as byte +Shown as byte" +crdb_dedicated,livecount,gauge,"Count of live keys. Shown as key +Shown as key" +crdb_dedicated,liveness.epochincrements,count,"Number of times this node has incremented its liveness epoch +Shown as unit" +crdb_dedicated,liveness.heartbeatfailures,count,"Number of failed node liveness heartbeats from this node +Shown as unit" +crdb_dedicated,liveness.heartbeatlatency,count,"Node liveness heartbeat latency in nanoseconds. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,liveness.heartbeatsuccesses,count,"Number of successful node liveness heartbeats from this node +Shown as unit" +crdb_dedicated,liveness.livenodes,gauge,"Number of live nodes in the cluster will be 0 if this node is not itself live +Shown as unit" +crdb_dedicated,queue.consistency.pending,gauge,"Number of pending replicas in the consistency checker queue +Shown as unit" +crdb_dedicated,queue.consistency.process.failure,count,"Number of replicas which failed processing in the consistency checker queue +Shown as unit" +crdb_dedicated,queue.consistency.process.success,count,"Number of replicas successfully processed by the consistency checker queue +Shown as success" +crdb_dedicated,queue.consistency.processingnanos,count,"Nanoseconds spent processing replicas in the consistency checker queue. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,queue.gc.info.abortspanconsidered,count,"Number of AbortSpan entries old enough to be considered for removal +Shown as transaction" +crdb_dedicated,queue.gc.info.abortspangcnum,count,"Number of AbortSpan entries fit for removal +Shown as transaction" +crdb_dedicated,queue.gc.info.abortspanscanned,count,"Number of transactions present in the AbortSpan scanned from the engine. Shown as transaction +Shown as transaction" +crdb_dedicated,queue.gc.info.intentsconsidered,count,"Number of ‘old’ intents +Shown as transaction" +crdb_dedicated,queue.gc.info.intenttxns,count,"Number of associated distinct transactions. Shown as transaction +Shown as key" +crdb_dedicated,queue.gc.info.numkeysaffected,count,"Number of keys with GC’able data. Shown as key +Shown as key" +crdb_dedicated,queue.gc.info.pushtxn,count,"Number of attempted pushes +Shown as attempt" +crdb_dedicated,queue.gc.info.resolvesuccess,count,"Number of successful intent resolutions +Shown as success" +crdb_dedicated,queue.gc.info.resolvetotal,count,"Number of attempted intent resolutions +Shown as attempt" +crdb_dedicated,queue.gc.info.transactionspangcaborted,count,"Number of GC’able entries corresponding to aborted txns +Shown as unit" +crdb_dedicated,queue.gc.info.transactionspangccommitted,count,"Number of GC’able entries corresponding to committed txns +Shown as commit" +crdb_dedicated,queue.gc.info.transactionspangcpending,count,"Number of GC’able entries corresponding to pending txns +Shown as unit" +crdb_dedicated,queue.gc.info.transactionspanscanned,count,"Number of entries in transaction spans scanned from the engine +Shown as unit" +crdb_dedicated,queue.gc.pending,gauge,"Number of pending replicas in the GC queue +Shown as unit" +crdb_dedicated,queue.gc.process.failure,count,"Number of replicas which failed processing in the GC queue +Shown as unit" +crdb_dedicated,queue.gc.process.success,count,"Number of replicas successfully processed by the GC queue +Shown as success" +crdb_dedicated,queue.gc.processingnanos,count,"Nanoseconds spent processing replicas in the GC queue. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,queue.raftlog.pending,gauge,"Number of pending replicas in the Raft log queue +Shown as unit" +crdb_dedicated,queue.raftlog.process.failure,count,"Number of replicas which failed processing in the Raft log queue +Shown as unit" +crdb_dedicated,queue.raftlog.process.success,count,"Number of replicas successfully processed by the Raft log queue +Shown as unit" +crdb_dedicated,queue.raftlog.processingnanos,count,"Nanoseconds spent processing replicas in the Raft log queue. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,queue.raftsnapshot.pending,gauge,"Number of pending replicas in the Raft repair queue +Shown as unit" +crdb_dedicated,queue.raftsnapshot.process.failure,count,"Number of replicas which failed processing in the Raft repair queue +Shown as unit" +crdb_dedicated,queue.raftsnapshot.process.success,count,"Number of replicas successfully processed by the Raft repair queue +Shown as unit" +crdb_dedicated,queue.raftsnapshot.processingnanos,count,"Nanoseconds spent processing replicas in the Raft repair queue. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,queue.replicagc.pending,gauge,"Number of pending replicas in the replica GC queue +Shown as unit" +crdb_dedicated,queue.replicagc.process.failure,count,"Number of replicas which failed processing in the replica GC queue +Shown as unit" +crdb_dedicated,queue.replicagc.process.success,count,"Number of replicas successfully processed by the replica GC queue +Shown as unit" +crdb_dedicated,queue.replicagc.processingnanos,count,"Nanoseconds spent processing replicas in the replica GC queue. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,queue.replicagc.removereplica,count,"Number of replica removals attempted by the replica gc queue +Shown as unit" +crdb_dedicated,queue.replicate.addreplica,count,"Number of replica additions attempted by the replicate queue +Shown as unit" +crdb_dedicated,queue.replicate.pending,gauge,"Number of pending replicas in the replicate queue +Shown as unit" +crdb_dedicated,queue.replicate.process.failure,count,"Number of replicas which failed processing in the replicate queue +Shown as unit" +crdb_dedicated,queue.replicate.process.success,count,"Number of replicas successfully processed by the replicate queue +Shown as unit" +crdb_dedicated,queue.replicate.processingnanos,count,"Nanoseconds spent processing replicas in the replicate queue. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,queue.replicate.purgatory,gauge,"Number of replicas in the replicate queue’s purgatory, awaiting allocation options +Shown as unit" +crdb_dedicated,queue.replicate.rebalancereplica,count,"Number of replica rebalancer-initiated additions attempted by the replicate queue +Shown as unit" +crdb_dedicated,queue.replicate.removedeadreplica,count,"Number of dead replica removals attempted by the replicate queue typically in response to a node outage +Shown as unit" +crdb_dedicated,queue.replicate.removereplica,count,"Number of replica removals attempted by the replicate queue typically in response to a rebalancer-initiated addition +Shown as unit" +crdb_dedicated,queue.replicate.transferlease,count,"Number of range lease transfers attempted by the replicate queue +Shown as unit" +crdb_dedicated,queue.split.pending,gauge,"Number of pending replicas in the split queue +Shown as unit" +crdb_dedicated,queue.split.process.failure,count,"Number of replicas which failed processing in the split queue +Shown as unit" +crdb_dedicated,queue.split.process.success,count,"Number of replicas successfully processed by the split queue +Shown as unit" +crdb_dedicated,queue.split.processingnanos,count,"Nanoseconds spent processing replicas in the split queue. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,queue.tsmaintenance.pending,gauge,"Number of pending replicas in the timeseries maintenance queue +Shown as unit" +crdb_dedicated,queue.tsmaintenance.process.failure,count,"Number of replicas which failed processing in the timeseries maintenance queue +Shown as unit" +crdb_dedicated,queue.tsmaintenance.process.success,count,"Number of replicas successfully processed by the timeseries maintenance queue +Shown as unit" +crdb_dedicated,queue.tsmaintenance.processingnanos,count,"Nanoseconds spent processing replicas in the timeseries maintenance queue. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,raft.commandsapplied,count,"Count of Raft commands applied. Shown as command +Shown as command" +crdb_dedicated,raft.enqueued.pending,gauge,"Number of pending outgoing messages in the Raft Transport queue +Shown as unit" +crdb_dedicated,raft.heartbeats.pending,gauge,"Number of pending heartbeats and responses waiting to be coalesced +Shown as unit" +crdb_dedicated,raft.process.commandcommit.latency,count,"Latency histogram in nanoseconds for committing Raft commands. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,raft.process.logcommit.latency,count,"Latency histogram in nanoseconds for committing Raft log entries. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,raft.process.tickingnanos,count,"Nanoseconds spent in store.processRaft processing replica.Tick. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,raft.process.workingnanos,count,"Nanoseconds spent in store.processRaft working. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,raft.rcvd.app,count,"Number of MsgApp messages received by this store +Shown as message" +crdb_dedicated,raft.rcvd.appresp,count,"Number of MsgAppResp messages received by this store +Shown as message" +crdb_dedicated,raft.rcvd.dropped,count,"Number of dropped incoming Raft messages +Shown as message" +crdb_dedicated,raft.rcvd.heartbeat,count,"Number of coalesced, if enabled MsgHeartbeat messages received by this store +Shown as message" +crdb_dedicated,raft.rcvd.heartbeatresp,count,"Number of coalesced, if enabled MsgHeartbeatResp messages received by this store +Shown as message" +crdb_dedicated,raft.rcvd.prevote,count,"Number of MsgPreVote messages received by this store +Shown as message" +crdb_dedicated,raft.rcvd.prevoteresp,count,"Number of MsgPreVoteResp messages received by this store +Shown as message" +crdb_dedicated,raft.rcvd.prop,count,"Number of MsgProp messages received by this store +Shown as message" +crdb_dedicated,raft.rcvd.snap,count,"Number of MsgSnap messages received by this store +Shown as message" +crdb_dedicated,raft.rcvd.timeoutnow,count,"Number of MsgTimeoutNow messages received by this store +Shown as message" +crdb_dedicated,raft.rcvd.transferleader,count,"Number of MsgTransferLeader messages received by this store +Shown as message" +crdb_dedicated,raft.rcvd.vote,count,"Number of MsgVote messages received by this store +Shown as message" +crdb_dedicated,raft.rcvd.voteresp,count,"Number of MsgVoteResp messages received by this store +Shown as message" +crdb_dedicated,raft.ticks,count,"Number of Raft ticks queued +Shown as unit" +crdb_dedicated,raftlog.behind,gauge,"Number of Raft log entries followers on other stores are behind. Shown as entry +Shown as unit" +crdb_dedicated,raftlog.truncated,count,"Number of Raft log entries truncated. Shown as entry +Shown as unit" +crdb_dedicated,range.adds,count,"Number of range additions +Shown as unit" +crdb_dedicated,range.raftleadertransfers,count,"Number of raft leader transfers +Shown as unit" +crdb_dedicated,range.removes,count,"Number of range removals +Shown as unit" +crdb_dedicated,range.snapshots.generated,count,"Number of generated snapshots +Shown as unit" +crdb_dedicated,range.splits,count,"Number of range splits +Shown as unit" +crdb_dedicated,ranges,gauge,"Number of ranges +Shown as unit" +crdb_dedicated,ranges.overreplicated,gauge,"Number of ranges with more live replicas than the replication target +Shown as unit" +crdb_dedicated,ranges.unavailable,gauge,"Number of ranges with fewer live replicas than needed for quorum +Shown as unit" +crdb_dedicated,ranges.underreplicated,gauge,"Number of ranges with fewer live replicas than the replication target +Shown as unit" +crdb_dedicated,rebalancing.writespersecond,gauge,"Number of keys written i.e. applied by raft per second to the store, averaged over a large time period as used in rebalancing decisions. Shown as key +Shown as unit" +crdb_dedicated,replicas,gauge,"Number of replicas +Shown as unit" +crdb_dedicated,replicas.leaders,gauge,"Number of raft leaders +Shown as unit" +crdb_dedicated,replicas.leaders.not_leaseholders,gauge,"Number of replicas that are Raft leaders whose range lease is held by another store +Shown as unit" +crdb_dedicated,replicas.leaseholders,gauge,"Number of lease holders +Shown as unit" +crdb_dedicated,replicas.quiescent,gauge,"Number of quiesced replicas +Shown as unit" +crdb_dedicated,replicas.reserved,gauge,"Number of replicas reserved for snapshots +Shown as unit" +crdb_dedicated,requests.backpressure.split,gauge,"Number of backpressured writes waiting on a Range split +Shown as unit" +crdb_dedicated,requests.slow.distsender,gauge,"Number of requests that have been stuck for a long time in the dist sender. Shown as request +Shown as request" +crdb_dedicated,requests.slow.lease,gauge,"Number of requests that have been stuck for a long time acquiring a lease. Shown as request +Shown as request" +crdb_dedicated,requests.slow.raft,gauge,"Number of requests that have been stuck for a long time in raft. Shown as request +Shown as request" +crdb_dedicated,rocksdb.block.cache.hits,gauge,"Count of block cache hits +Shown as hit" +crdb_dedicated,rocksdb.block.cache.misses,gauge,"Count of block cache misses +Shown as miss" +crdb_dedicated,rocksdb.block.cache.pinned.usage,gauge,"Bytes pinned by the block cache. Shown as byte +Shown as byte" +crdb_dedicated,rocksdb.block.cache.usage,gauge,"Bytes used by the block cache. Shown as byte +Shown as byte" +crdb_dedicated,rocksdb.bloom_filter.prefix.checked,gauge,"Number of times the bloom filter was checked +Shown as unit" +crdb_dedicated,rocksdb.bloom_filter.prefix.useful,gauge,"Number of times the bloom filter helped avoid iterator creation +Shown as unit" +crdb_dedicated,rocksdb.compactions,gauge,"Number of table compactions +Shown as unit" +crdb_dedicated,rocksdb.flushes,gauge,"Number of table flushes +Shown as flush" +crdb_dedicated,rocksdb.memtable.total.size,gauge,"Current size of memtable in bytes. Shown as byte +Shown as byte" +crdb_dedicated,rocksdb.num_sstables,gauge,"Number of rocksdb SSTables. Shown as table +Shown as table" +crdb_dedicated,rocksdb.read.amplification,gauge,"Number of disk reads per query. Shown as read +Shown as read" +crdb_dedicated,rocksdb.table.readers.mem.estimate,gauge,"Memory used by index and filter blocks +Shown as unit" +crdb_dedicated,round_trip.latency,count,"Distribution of round-trip latencies with other nodes in nanoseconds. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,sql.bytesin,count,"Number of sql bytes received. Shown as byte +Shown as byte" +crdb_dedicated,sql.bytesout,count,"Number of sql bytes sent. Shown as byte +Shown as byte" +crdb_dedicated,sql.conn.latency,count,"Latency to establish and authenticate a SQL connection. Shown as nanoseconds. +Shown as nanosecond" +crdb_dedicated,sql.conns,gauge,"Number of active sql connections. Shown as connection +Shown as connection" +crdb_dedicated,sql.ddl.count,count,"Number of SQL DDL statements +Shown as query" +crdb_dedicated,sql.delete.count,count,"Number of SQL DELETE statements +Shown as query" +crdb_dedicated,sql.distsql.contended.queries.count,count,"Number of SQL queries that experienced contention. Shown as count. +Shown as query" +crdb_dedicated,sql.distsql.exec.latency,count,"Latency in nanoseconds of DistSQL statement execution. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,sql.distsql.flows.active,gauge,"Number of distributed SQL flows currently active +Shown as query" +crdb_dedicated,sql.distsql.flows.total,count,"Number of distributed SQL flows executed +Shown as query" +crdb_dedicated,sql.distsql.queries.active,gauge,"Number of distributed SQL queries currently active +Shown as query" +crdb_dedicated,sql.distsql.queries.total,count,"Number of distributed SQL queries executed +Shown as query" +crdb_dedicated,sql.distsql.select.count,count,"Number of DistSQL SELECT statements +Shown as unit" +crdb_dedicated,sql.distsql.service.latency,count,"Latency in nanoseconds of DistSQL request execution. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,sql.exec.latency,count,"Latency in nanoseconds of SQL statement execution. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,sql.failure.count,count,"Number of statements resulting in a planning or runtime error. Shown as count. +Shown as unit" +crdb_dedicated,sql.full.scan.count,count,"Number of full table or index scans. Shown as count. +Shown as unit" +crdb_dedicated,sql.insert.count,count,"Number of SQL INSERT statements +Shown as unit" +crdb_dedicated,sql.mem.distsql.current,gauge,"Current sql statement memory usage for distsql +Shown as unit" +crdb_dedicated,sql.mem.distsql.max,count,"Memory usage per sql statement for distsql +Shown as unit" +crdb_dedicated,sql.mem.internal.session.current,gauge,"Current sql session memory usage for internal +Shown as unit" +crdb_dedicated,sql.mem.internal.session.max,count,"Memory usage per sql session for internal +Shown as unit" +crdb_dedicated,sql.mem.internal.txn.current,gauge,"Current sql transaction memory usage for internal +Shown as unit" +crdb_dedicated,sql.mem.internal.txn.max,count,"Memory usage per sql transaction for internal +Shown as unit" +crdb_dedicated,sql.misc.count,count,"Number of other SQL statements +Shown as query" +crdb_dedicated,sql.new_conns.count,count,"Number of SQL connections created +Shown as connection" +crdb_dedicated,sql.query.count,count,"Number of SQL queries +Shown as query" +crdb_dedicated,sql.select.count,count,"Number of SQL SELECT statements +Shown as query" +crdb_dedicated,sql.service.latency,count,"Latency in nanoseconds of SQL request execution. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,sql.statements.active,gauge,"Number of currently active user SQL statements. Shown as count. +Shown as unit" +crdb_dedicated,sql.txn.abort.count,count,"Number of SQL transaction ABORT statements +Shown as unit" +crdb_dedicated,sql.txn.begin.count,count,"Number of SQL transaction BEGIN statements +Shown as unit" +crdb_dedicated,sql.txn.commit.count,count,"Number of SQL transaction COMMIT statements +Shown as unit" +crdb_dedicated,sql.txn.latency,count,"Latency of SQL transactions. Shown as nanoseconds. +Shown as unit" +crdb_dedicated,sql.txn.rollback.count,count,"Number of SQL transaction ROLLBACK statements +Shown as unit" +crdb_dedicated,sql.txns.open,gauge,"Number of currently open SQL transactions. Shown as count. +Shown as unit" +crdb_dedicated,sql.update.count,count,"Number of SQL UPDATE statements +Shown as unit" +crdb_dedicated,sys.cgo.allocbytes,gauge,"Current bytes of memory allocated by cgo. Shown as byte +Shown as byte" +crdb_dedicated,sys.cgo.totalbytes,gauge,"Total bytes of memory allocated by cgo, but not released. Shown as byte +Shown as byte" +crdb_dedicated,sys.cgocalls,gauge,"Total number of cgo calls +Shown as unit" +crdb_dedicated,sys.cpu.combined.percent.normalized,gauge,"Current user+system cpu percentage, normalized 0-1 by number of cores. +Shown as fraction" +crdb_dedicated,sys.cpu.sys.ns,gauge,"Total system cpu time in nanoseconds. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,sys.cpu.sys.percent,gauge,"Current system cpu percentage +Shown as core" +crdb_dedicated,sys.cpu.user.ns,gauge,"Total user cpu time in nanoseconds. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,sys.cpu.user.percent,gauge,"Current user cpu percentage. Shown as percent +Shown as core" +crdb_dedicated,sys.fd.open,gauge,"Process open file descriptors +Shown as unit" +crdb_dedicated,sys.fd.softlimit,gauge,"Process open FD soft limit +Shown as unit" +crdb_dedicated,sys.gc.count,gauge,"Total number of GC runs +Shown as garbage collection" +crdb_dedicated,sys.gc.pause.ns,gauge,"Total GC pause in nanoseconds. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,sys.gc.pause.percent,gauge,"Current GC pause percentage. Shown as fraction +Shown as fraction" +crdb_dedicated,sys.go.allocbytes,gauge,"Current bytes of memory allocated by go. Shown as byte +Shown as byte" +crdb_dedicated,sys.go.totalbytes,gauge,"Total bytes of memory allocated by go, but not released. Shown as byte +Shown as byte" +crdb_dedicated,sys.goroutines,gauge,"Current number of goroutines +Shown as unit" +crdb_dedicated,sys.host.net.recv.bytes,gauge,"Bytes received on all network interfaces since this process started. +Shown as byte" +crdb_dedicated,sys.host.net.send.bytes,gauge,"Bytes sent on all network interfaces since this process started. +Shown as byte" +crdb_dedicated,sys.rss,gauge,"Current process RSS +Shown as unit" +crdb_dedicated,sys.uptime,gauge,"Process uptime in seconds. Shown as second +Shown as second" +crdb_dedicated,sysbytes,gauge,"Number of bytes in system KV pairs. Shown as byte +Shown as byte" +crdb_dedicated,syscount,gauge,"Count of system KV pairs +Shown as unit" +crdb_dedicated,timeseries.write.bytes,count,"Total size in bytes of metric samples written to disk. Shown as byte +Shown as byte" +crdb_dedicated,timeseries.write.errors,count,"Total errors encountered while attempting to write metrics to disk. Shown as error +Shown as error" +crdb_dedicated,timeseries.write.samples,count,"Total number of metric samples written to disk +Shown as unit" +crdb_dedicated,totalbytes,gauge,"Total number of bytes taken up by keys and values including non-live data. Shown as byte +Shown as byte" +crdb_dedicated,txn.aborts,count,"Number of aborted KV transactions +Shown as unit" +crdb_dedicated,txn.commits,count,"Number of committed KV transactions including 1PC +Shown as commit" +crdb_dedicated,txn.commits1PC,count,"Number of committed one-phase KV transactions +Shown as commit" +crdb_dedicated,txn.durations,count,"KV transaction durations in nanoseconds +Shown as nanosecond" +crdb_dedicated,txn.restarts,count,"Number of restarted KV transactions +Shown as unit" +crdb_dedicated,txn.restarts.serializable,count,"Number of restarts due to a forwarded commit timestamp and isolation=SERIALIZABLE +Shown as unit" +crdb_dedicated,txn.restarts.writetooold,count,"Number of restarts due to a concurrent writer committing first +Shown as unit" +crdb_dedicated,valbytes,gauge,"Number of bytes taken up by values. Shown as byte +Shown as byte" +crdb_dedicated,valcount,gauge,"Count of all values +Shown as unit" \ No newline at end of file diff --git a/src/current/_includes/v25.3/essential-alerts.md b/src/current/_includes/v25.3/essential-alerts.md index dbc9dddc8fe..6abfd7b475f 100644 --- a/src/current/_includes/v25.3/essential-alerts.md +++ b/src/current/_includes/v25.3/essential-alerts.md @@ -483,7 +483,7 @@ Changefeed has fallen behind. This is determined by the end-to-end lag between a Changefeed jobs should not be paused for a long time because [the protected timestamp prevents garbage collection]({% link {{ page.version.version }}/protect-changefeed-data.md %}). To protect against an operational error, this alert guards against an inadvertently forgotten pause. **Metric** -
[`jobs.changefeed.currently_paused`]({% link {{ page.version.version }}/essential-metrics-{{ include.deployment }}.md %}#changefeed-currently-paused) +
[`jobs.changefeed.currently_paused`]({% link {{ page.version.version }}/essential-metrics-{{ include.deployment }}.md %}#jobs-changefeed-currently-paused) **Rule**
WARNING: `jobs.changefeed.currently_paused` is greater than `0` for more than `15 minutes` diff --git a/src/current/_includes/v25.3/essential-metrics.md b/src/current/_includes/v25.3/essential-metrics.md index ed00c070506..6f02ba25f05 100644 --- a/src/current/_includes/v25.3/essential-metrics.md +++ b/src/current/_includes/v25.3/essential-metrics.md @@ -1,201 +1,289 @@ -These essential CockroachDB metrics enable you to build custom dashboards with the following tools: +{% assign version = page.version.version | replace: ".", "" %} +{% comment %}DEBUG: {{ version }}{% endcomment %} + +These essential CockroachDB metrics let you monitor your CockroachDB {{ site.data.products.core }} cluster. Use them to build custom dashboards with the following tools: + +{% comment %} STEP 1. Assign variables specific to deployment {% endcomment %} {% if include.deployment == 'self-hosted' %} -* [Grafana]({% link {{ page.version.version }}/monitor-cockroachdb-with-prometheus.md %}#step-5-visualize-metrics-in-grafana) -* [Datadog Integration]({% link {{ page.version.version }}/datadog.md %}) - The [**Datadog Integration Metric Name**](https://docs.datadoghq.com/integrations/cockroachdb/?tab=host#metrics) column lists the corresponding Datadog metric which requires the `cockroachdb.` prefix. + {% assign metrics_datadog = site.data[version].metrics.datadog-cockroachdb %} + {% assign datadog_link = "https://docs.datadoghq.com/integrations/cockroachdb/?tab=host#metrics" %} + {% assign datadog_prefix = "cockroachdb" %} + {% assign category_order = "HARDWARE,STORAGE,OVERLOAD,NETWORKING,DISTRIBUTED,REPLICATION,SQL,CHANGEFEEDS,TTL,UNSET," %} + +- [Grafana]({% link {{ page.version.version }}/monitor-cockroachdb-with-prometheus.md %}#step-5-visualize-metrics-in-grafana) +- [Datadog Integration]({% link {{ page.version.version }}/datadog.md %}): The [**Datadog Integration Metric Name**]({{ datadog_link }}) column lists the corresponding Datadog metric which requires the `{{ datadog_prefix }}.` prefix. + {% elsif include.deployment == 'advanced' %} -* [Datadog integration]({% link cockroachcloud/tools-page.md %}#monitor-cockroachdb-cloud-with-datadog) - The [**Datadog Integration Metric Name**](https://docs.datadoghq.com/integrations/cockroachdb_dedicated/#metrics) column lists the corresponding Datadog metric which requires the `crdb_dedicated.` prefix. -* [Metrics export]({% link cockroachcloud/export-metrics-advanced.md %}) -{% endif %} + {% assign metrics_datadog = site.data[version].metrics.datadog-crdb-dedicated %} + {% assign datadog_link = "https://docs.datadoghq.com/integrations/cockroach-cloud/#metrics" %} + {% assign datadog_prefix = "crdb_dedicated" %} +{% comment %} Removed NETWORKING category for advanced deployment {% endcomment %} + {% assign category_order = "HARDWARE,STORAGE,OVERLOAD,DISTRIBUTED,REPLICATION,SQL,CHANGEFEEDS,TTL,UNSET," %} -The **Usage** column explains why each metric is important to visualize in a custom dashboard and how to make both practical and actionable use of the metric in a production deployment. - -## Platform - -|
CockroachDB Metric Name
| {% if include.deployment == 'self-hosted' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb/?tab=host#metrics)
(add `cockroachdb.` prefix)
|{% elsif include.deployment == 'advanced' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb_dedicated/#metrics)
(add `crdb_dedicated.` prefix)
|{% endif %}
Description
| Usage | -| ----------------------------------------------------- | {% if include.deployment == 'self-hosted' %}------ |{% elsif include.deployment == 'advanced' %}---- |{% endif %} ------------------------------------------------------------ | ------------------------------------------------------------ | -| sys.cpu.combined.percent-normalized | sys.cpu.combined.percent.normalized | Current user+system CPU percentage consumed by the CRDB process, normalized by number of cores | This metric gives the CPU utilization percentage by the CockroachDB process. If it is equal to 1 (or 100%), then the CPU is overloaded. The CockroachDB process should not be running with over 80% utilization for extended periods of time (hours). This metric is used in the DB Console [**CPU Percent** graph]({% link {{ page.version.version }}/ui-hardware-dashboard.md %}#cpu-percent). | -| sys.cpu.host.combined.percent-normalized | NOT AVAILABLE | Current user+system CPU percentage consumed by all processes on the host OS, normalized by number of cores. If the CRDB process is run in a containerized environment, the host OS is the container since the CRDB process cannot inspect CPU usage beyond the container. | This metric gives the CPU utilization percentage of the underlying server, virtual machine, or container hosting the CockroachDB process. It includes CPU usage from both CockroachDB and non-CockroachDB processes. It also accounts for time spent processing hardware (`irq`) and software (`softirq`) interrupts, as well as `nice` time, which represents low-priority user-mode activity.

A value of 1 (or 100%) indicates that the CPU is overloaded. Avoid running the CockroachDB process in an environment where the CPU remains overloaded for extended periods (e.g. multiple hours). This metric appears in the DB Console on the **Host CPU Percent** graph. | -| sys.cpu.user.percent | sys.cpu.user.percent | Current user CPU percentage consumed by the CRDB process | This metric gives the CPU usage percentage at the user level by the CockroachDB process only. This is similar to the Linux `top` command output. The metric value can be more than 1 (or 100%) on multi-core systems. It is best to combine user and system metrics. | -| sys.cpu.sys.percent | sys.cpu.sys.percent | Current system CPU percentage consumed by the CRDB process | This metric gives the CPU usage percentage at the system (Linux kernel) level by the CockroachDB process only. This is similar to the Linux `top` command output. The metric value can be more than 1 (or 100%) on multi-core systems. It is best to combine user and system metrics. | -| sys.rss | sys.rss | Current process memory (RSS) | This metric gives the amount of RAM used by the CockroachDB process. Persistently low values over an extended period of time suggest there is underutilized memory that can be put to work with adjusted [settings for `--cache` or `--max_sql_memory`]({% link {{ page.version.version }}/recommended-production-settings.md %}#cache-and-sql-memory-size) or both. Conversely, a high utilization, even if a temporary spike, indicates an increased risk of [Out-of-memory (OOM) crash]({% link {{ page.version.version }}/cluster-setup-troubleshooting.md %}#out-of-memory-oom-crash) (particularly since the [swap is generally disabled]({% link {{ page.version.version }}/recommended-production-settings.md %}#memory)). | -| sql.mem.root.current | {% if include.deployment == 'self-hosted' %}sql.mem.root.current |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Current sql statement memory usage for root | This metric shows how memory set aside for temporary materializations, such as hash tables and intermediary result sets, is utilized. Use this metric to optimize memory allocations based on long term observations. The maximum amount is set with [`--max_sql_memory`]({% link {{ page.version.version }}/recommended-production-settings.md %}#cache-and-sql-memory-size). If the utilization of sql memory is persistently low, perhaps some portion of this memory allocation can be shifted to [`--cache`]({% link {{ page.version.version }}/recommended-production-settings.md %}#cache-and-sql-memory-size). | -| sys.host.disk.write.bytes | {% if include.deployment == 'self-hosted' %}sys.host.disk.write.bytes |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Bytes written to all disks since this process started | This metric reports the effective storage device write throughput (MB/s) rate. To confirm that storage is sufficiently provisioned, assess the I/O performance rates (IOPS and MBPS) in the context of the sys.host.disk.iopsinprogress metric. | -| sys.host.disk.write.count | {% if include.deployment == 'self-hosted' %}sys.host.disk.write |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Disk write operations across all disks since this process started | This metric reports the effective storage device write IOPS rate. To confirm that storage is sufficiently provisioned, assess the I/O performance rates (IOPS and MBPS) in the context of the sys.host.disk.iopsinprogress metric. | -| sys.host.disk.read.bytes | {% if include.deployment == 'self-hosted' %}sys.host.disk.read.bytes |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Bytes read from all disks since this process started | This metric reports the effective storage device read throughput (MB/s) rate. To confirm that storage is sufficiently provisioned, assess the I/O performance rates (IOPS and MBPS) in the context of the sys.host.disk.iopsinprogress metric. | -| sys.host.disk.read.count | {% if include.deployment == 'self-hosted' %}sys.host.disk.read |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Disk read operations across all disks since this process started | This metric reports the effective storage device read IOPS rate. To confirm that storage is sufficiently provisioned, assess the I/O performance rates (IOPS and MBPS) in the context of the sys.host.disk.iopsinprogress metric. | -| sys.host.disk.iopsinprogress | {% if include.deployment == 'self-hosted' %}sys.host.disk.iopsinprogress |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} IO operations currently in progress on this host | This metric gives the average queue length of the storage device. It characterizes the storage device's performance capability. All I/O performance metrics are Linux counters and correspond to the `avgqu-sz` in the Linux `iostat` command output. You need to view the device queue graph in the context of the actual read/write IOPS and MBPS metrics that show the actual device utilization. If the device is not keeping up, the queue will grow. Values over 10 are bad. Values around 5 mean the device is working hard trying to keep up. For internal (on chassis) [NVMe](https://www.wikipedia.org/wiki/NVM_Express) devices, the queue values are typically 0. For network connected devices, such as [AWS EBS volumes](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-volume-types.html), the normal operating range of values is 1 to 2. Spikes in values are OK. They indicate an I/O spike where the device fell behind and then caught up. End users may experience inconsistent response times, but there should be no cluster stability issues. If the queue is greater than 5 for an extended period of time and IOPS or MBPS are low, then the storage is most likely not provisioned per Cockroach Labs guidance. In AWS EBS, it is commonly an EBS type, such as gp2, not suitable as database primary storage. If I/O is low and the queue is low, the most likely scenario is that the CPU is lacking and not driving I/O. One such case is a cluster with nodes with only 2 vcpus which is not supported [sizing]({% link {{ page.version.version }}/recommended-production-settings.md %}#sizing) for production deployments. There are quite a few background processes in the database that take CPU away from the workload, so the workload is just not getting the CPU. Review [storage and disk I/O]({% link {{ page.version.version }}/common-issues-to-monitor.md %}#storage-and-disk-i-o). | -| sys.host.net.recv.bytes | sys.host.net.recv.bytes | Bytes received on all network interfaces since this process started | This metric gives the node's ingress/egress network transfer rates for flat sections which may indicate insufficiently provisioned networking or high error rates. CockroachDB is using a reliable TCP/IP protocol, so errors result in delivery retries that create a "slow network" effect. | -| sys.host.net.send.bytes | sys.host.net.send.bytes | Bytes sent on all network interfaces since this process started | This metric gives the node's ingress/egress network transfer rates for flat sections which may indicate insufficiently provisioned networking or high error rates. CockroachDB is using a reliable TCP/IP protocol, so errors result in delivery retries that create a "slow network" effect. | -| clock-offset.meannanos | clock.offset.meannanos | Mean clock offset with other nodes | This metric gives the node's clock skew. In a well-configured environment, the actual clock skew would be in the sub-millisecond range. A skew exceeding 5 ms is likely due to a NTP service mis-configuration. Reducing the actual clock skew reduces the probability of uncertainty related conflicts and corresponding retires which has a positive impact on workload performance. Conversely, a larger actual clock skew increases the probability of retries due to uncertainty conflicts, with potentially measurable adverse effects on workload performance. | - -## Storage - -
CockroachDB Metric Name
| {% if include.deployment == 'self-hosted' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb/?tab=host#metrics)
(add `cockroachdb.` prefix)
|{% elsif include.deployment == 'advanced' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb_dedicated/#metrics)
(add `crdb_dedicated.` prefix)
|{% endif %}
Description
| Usage | -| ----------------------------------------------------- | {% if include.deployment == 'self-hosted' %}------ |{% elsif include.deployment == 'advanced' %}---- |{% endif %} ------------------------------------------------------------ | ------------------------------------------------------------ | -| capacity | {% if include.deployment == 'self-hosted' %}capacity.total |{% elsif include.deployment == 'advanced' %}capacity |{% endif %} Total storage capacity | This metric gives total storage capacity. Measurements should comply with the following rule: CockroachDB storage volumes should not be utilized more than 60% (40% free space). | -| capacity.available | capacity.available | Available storage capacity | This metric gives available storage capacity. Measurements should comply with the following rule: CockroachDB storage volumes should not be utilized more than 60% (40% free space). | -| capacity.used | capacity.used | Used storage capacity | This metric gives used storage capacity. Measurements should comply with the following rule: CockroachDB storage volumes should not be utilized more than 60% (40% free space). | -| storage.wal.fsync.latency | {% if include.deployment == 'self-hosted' %}storage.wal.fsync.latency |{% elsif include.deployment == 'advanced' %}storage.wal.fsync.latency |{% endif %} This metric reports the latency of writes to the [WAL]({% link {{ page.version.version }}/architecture/storage-layer.md %}#memtable-and-write-ahead-log). | If this value is greater than `100ms`, it is an indication of a [disk stall]({% link {{ page.version.version }}/cluster-setup-troubleshooting.md %}#disk-stalls). To mitigate the effects of disk stalls, consider deploying your cluster with [WAL failover]({% link {{ page.version.version }}/wal-failover.md %}) configured. | -| storage.write-stalls | {% if include.deployment == 'self-hosted' %}storage.write.stalls |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of instances of intentional write stalls to backpressure incoming writes | This metric reports actual disk stall events. Ideally, investigate all reports of disk stalls. As a pratical guideline, one stall per minute is not likely to have a material impact on workload beyond an occasional increase in response time. However one stall per second should be viewed as problematic and investigated actively. It is particularly problematic if the rate persists over an extended period of time, and worse, if it is increasing. | -| rocksdb.compactions | rocksdb.compactions.total | Number of SST compactions | This metric reports the number of a node's [LSM compactions]({% link {{ page.version.version }}/common-issues-to-monitor.md %}#lsm-health). If the number of compactions remains elevated while the LSM health does not improve, compactions are not keeping up with the workload. If the condition persists for an extended period, the cluster will initially exhibit performance issues that will eventually escalate into stability issues. | -| rocksdb.block.cache.hits | rocksdb.block.cache.hits | Count of block cache hits | This metric gives hits to block cache which is reserved memory. It is allocated upon the start of a node process by the [`--cache` flag]({% link {{ page.version.version }}/cockroach-start.md %}#general) and never shrinks. By observing block cache hits and misses, you can fine-tune memory allocations in the node process for the demands of the workload. | -| rocksdb.block.cache.misses | rocksdb.block.cache.misses | Count of block cache misses | This metric gives misses to block cache which is reserved memory. It is allocated upon the start of a node process by the [`--cache` flag]({% link {{ page.version.version }}/cockroach-start.md %}#general) and never shrinks. By observing block cache hits and misses, you can fine-tune memory allocations in the node process for the demands of the workload. | -| storage.value_separation.blob_files.count | storage.value_separation.blob_files.count | The number of blob files that are used to store [separated values]({% link {{ page.version.version }}/architecture/storage-layer.md %}#value-separation) within the storage engine. | Use this metric to track how many values (of key-value pairs) are being stored outside of the [LSM]({% link {{ page.version.version }}/architecture/storage-layer.md %}#log-structured-merge-trees) by the storage engine due to their large size. | -| storage.value_separation.blob_files.size | storage.value_separation.blob_files.size | The size of the physical blob files that are used to store [separated values]({% link {{ page.version.version }}/architecture/storage-layer.md %}#value-separation) within the storage engine. This value is the physical post-compression sum of the `storage.value_separation.value_bytes.referenced` and `storage.value_separation.value_bytes.unreferenced` metrics. | Use this metric to see how much of your physical storage capacity is being used by separated values in blob files. | -| storage.value_separation.value_bytes.referenced | storage.value_separation.value_bytes.referenced | The size of storage engine value bytes (pre-compression) that are [stored separately in blob files]({% link {{ page.version.version }}/architecture/storage-layer.md %}#value-separation) and referenced by a live [SSTable]({% link {{ page.version.version }}/architecture/storage-layer.md %}#ssts). | Use this metric to see how much live (i.e., not yet eligible for compaction) blob storage is in use by separated values. | -| storage.value_separation.value_bytes.unreferenced | storage.value_separation.value_bytes.unreferenced | The size of storage engine value bytes (pre-compression) that are [stored separately in blob files]({% link {{ page.version.version }}/architecture/storage-layer.md %}#value-separation) and not referenced by any live [SSTable]({% link {{ page.version.version }}/architecture/storage-layer.md %}#ssts). These bytes are garbage that could be reclaimed by a [compaction]({% link {{ page.version.version }}/architecture/storage-layer.md %}#compaction). | Use this metric to see how much blob storage is no longer in use and waiting to be compacted. | - -## Health - -|
CockroachDB Metric Name
| {% if include.deployment == 'self-hosted' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb/?tab=host#metrics)
(add `cockroachdb.` prefix)
|{% elsif include.deployment == 'advanced' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb_dedicated/#metrics)
(add `crdb_dedicated.` prefix)
|{% endif %}
Description
| Usage | -| ----------------------------------------------------- | {% if include.deployment == 'self-hosted' %}------ |{% elsif include.deployment == 'advanced' %}---- |{% endif %} ------------------------------------------------------------ | ------------------------------------------------------------ | -| sys.uptime | sys.uptime | Process uptime | This metric measures the length of time, in seconds, that the CockroachDB process has been running. Monitor this metric to detect events such as node restarts, which may require investigation or intervention. | -| admission.io.overload | {% if include.deployment == 'self-hosted' %}admission.io.overload |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} 1-normalized float indicating whether IO admission control considers the store as overloaded with respect to compaction out of L0 (considers sub-level and file counts). | If the value of this metric exceeds 1, then it indicates overload. You can also look at the metrics `storage.l0-num-files`, `storage.l0-sublevels` or `rocksdb.read-amplification` directly. A healthy LSM shape is defined as “read-amp < 20” and “L0-files < 1000”, looking at [cluster settings]({% link {{ page.version.version }}/cluster-settings.md %}) `admission.l0_sub_level_count_overload_threshold` and `admission.l0_file_count_overload_threshold` respectively. | -| admission.wait_durations.kv-p75 | {% if include.deployment == 'self-hosted' %}admission.wait.durations.kv |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Wait time durations for requests that waited | This metric shows if CPU utilization-based admission control feature is working effectively or potentially overaggressive. This is a latency histogram of how much delay was added to the workload due to throttling by CPU control. If observing over 100ms waits for over 5 seconds while there was excess CPU capacity available, then the admission control is overly aggressive. | -| admission.wait_durations.kv-stores-p75 | {% if include.deployment == 'self-hosted' %}admission.wait.durations.kv_stores |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Wait time durations for requests that waited | This metric shows if I/O utilization-based admission control feature is working effectively or potentially overaggressive. This is a latency histogram of how much delay was added to the workload due to throttling by I/O control. If observing over 100ms waits for over 5 seconds while there was excess I/O capacity available, then the admission control is overly aggressive. | -| sys.runnable.goroutines.per.cpu | {% if include.deployment == 'self-hosted' %}sys.runnable.goroutines.per_cpu |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Average number of goroutines that are waiting to run, normalized by number of cores | If this metric has a value over 30, it indicates a CPU overload. If the condition lasts a short period of time (a few seconds), the database users are likely to experience inconsistent response times. If the condition persists for an extended period of time (tens of seconds, or minutes) the cluster may start developing stability issues. Review [CPU planning]({% link {{ page.version.version }}/common-issues-to-monitor.md %}#cpu). +- [Datadog integration]({% link cockroachcloud/tools-page.md %}#monitor-cockroachdb-cloud-with-datadog) - The [**Datadog Integration Metric Name**]({{ datadog_link }}) column lists the corresponding Datadog metric which requires the `{{ datadog_prefix }}` prefix. +- [Metrics export]({% link cockroachcloud/export-metrics-advanced.md %}) -{% if include.deployment == 'self-hosted' %} -## Network - -|
CockroachDB Metric Name
|
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb/?tab=host#metrics)
(add `cockroachdb.` prefix)
|
Description
| Usage | -| ------------------------------------------------------ | --------------------------------------------------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | -| rpc.connection.avg_round_trip_latency | rpc.connection.avg_round_trip_latency | Sum of exponentially weighted moving average of round-trip latencies, as measured through a gRPC RPC. Dividing this gauge by `rpc.connection.healthy` gives an approximation of average latency, but the top-level round-trip-latency histogram is more useful. Instead, users should consult the label families of this metric if they are available (which requires Prometheus and the cluster setting `server.child_metrics.enabled`); these provide per-peer moving averages. This metric does not track failed connection. A failed connection's contribution is reset to zero. | This metric is helpful in understanding general network issues outside of CockroachDB that could be impacting the user’s workload. | -| rpc.connection.failures | rpc.connection.failures.count | Counter of failed connections. This includes both the event in which a healthy connection terminates as well as unsuccessful reconnection attempts. Connections that are terminated as part of local node shutdown are excluded. Decommissioned peers are excluded. | See Description. | -| rpc.connection.healthy | rpc.connection.healthy | Gauge of current connections in a healthy state (i.e., bidirectionally connected and heartbeating). | See Description. | -| rpc.connection.healthy_nanos | rpc.connection.healthy_nanos | Gauge of nanoseconds of healthy connection time. On the Prometheus endpoint scraped when the cluster setting `server.child_metrics.enabled` is set, this gauge allows you to see the duration for which a given peer has been connected in a healthy state. | This can be useful for monitoring the stability and health of connections within your CockroachDB cluster. | -| rpc.connection.heartbeats | rpc.connection.heartbeats.count | Counter of successful heartbeats. | See Description. | -| rpc.connection.unhealthy | rpc.connection.unhealthy | Gauge of current connections in an unhealthy state (not bidirectionally connected or heartbeating). | If the value of this metric is greater than 0, this could indicate a network partition. | -| rpc.connection.unhealthy_nanos | rpc.connection.unhealthy_nanos | Gauge of nanoseconds of unhealthy connection time. On the Prometheus endpoint scraped when the cluster setting `server.child_metrics.enabled` is set, this gauge allows you to see the duration for which a given peer has been unreachable. | If this duration is greater than 0, this could indicate how long a network partition has been occurring. | {% endif %} +The **Usage** column explains why each metric is important to visualize and how to make both practical and actionable use of the metric in a production deployment. + +{% assign layers = site.data[version].metrics.metrics.layers %} + +{% comment %} STEP 2. Create array of layer names {% endcomment %} +{% assign layer_names_string = "" %} +{% for layer in layers %} + {% assign layer_names_string = layer_names_string | append: layer.name | append: "," %} +{% endfor %} + +{% comment %}DEBUG: layer_names_string = {{ layer_names_string }}{% endcomment %} +{% assign layer_names_array = layer_names_string | split: "," %} + +{% comment %} STEP 3. Create array of unique category names {% endcomment %} +{% assign category_names_string = "" %} +{% for layer_name in layer_names_array %} + {% assign layer = layers | where_exp: "l", "l.name == layer_name" %} + {% comment %}DEBUG: layer_name = {{ layer_name }}{% endcomment %} + + {% for category in layer[0].categories %} + {% comment %}DEBUG: category.name = {{ category.name }}{% endcomment %} + {% unless category_names_string contains category.name %} + {% assign category_names_string = category_names_string | append: category.name | append: "," %} + {% endunless %} + + {% endfor %} + +{% endfor %} + +{% comment %} Order categories, NOTE: new categories may break this order, however all relevant categories will be displayed though not in the desired order{% endcomment %} +{% comment %}DEBUG: category_names_string = {{ category_names_string }}{% endcomment %} +{% assign category_names_string_ordered = category_names_string | replace: "CHANGEFEEDS,DISTRIBUTED,NETWORKING,SQL,TTL,UNSET,HARDWARE,OVERLOAD,REPLICATION,STORAGE,", category_order %} +{% comment %}DEBUG: category_names_string_ordered = {{ category_names_string_ordered }}{% endcomment %} +{% assign category_names_array = category_names_string_ordered | split: "," %} + +{% comment %} STEP 4. Create sections for each unique category. For example, both APPLICATION and STORAGE layers have a SQL category, however only one SQL category will be created. {% endcomment %} +{% for category_name in category_names_array %} + {% if category_name != "" %} + + {% comment %} STEP 4a. Loop 1 to count essential metrics {% endcomment %} + {% assign essential_metrics_total = 0 %} + {% for layer_name in layer_names_array %} + + {% assign layer = layers | where_exp: "l", "l.name == layer_name" %} + {% assign category = layer[0].categories | where_exp: "c", "c.name == category_name" %} + {% assign essential_metrics = category[0].metrics | where: "essential", true %} + {% if essential_metrics.size > 0 %} + {% comment %}DEBUG: 1 {{ layer_name }} 2 {{ layer[0].name }} 3 {{ category[0].name }} {{ essential_metrics.size }}{% endcomment %} + {% assign essential_metrics_total = essential_metrics_total | plus: essential_metrics.size %} + {% endif %}{% comment %}if essential_metrics.size > 0{% endcomment %} + + {% endfor %}{% comment %}for layer in layer_names_array{% endcomment %} + + {% comment %} STEP 4b. Only create a section for a category if essential metrics exist. For example, the UNSET category does not have any essential metrics.{% endcomment %} + {% if essential_metrics_total > 0 %} + + {% comment %} Transform category_name to user-facing name. {% endcomment %} + {% if category_name == "HARDWARE" %}{% assign category_display_name = "Platform" %} + {% elsif category_name == "STORAGE" %}{% assign category_display_name = "Storage" %} + {% elsif category_name == "OVERLOAD" %}{% assign category_display_name = "Health" %} + {% elsif category_name == "NETWORKING" %}{% assign category_display_name = "Network" %} + {% elsif category_name == "DISTRIBUTED" %}{% assign category_display_name = "KV Distributed" %} + {% elsif category_name == "REPLICATION" %}{% assign category_display_name = "KV Replication" %} + {% elsif category_name == "CHANGEFEEDS" %}{% assign category_display_name = "Changefeeds" %} + {% elsif category_name == "TTL" %}{% assign category_display_name = "Row-level TTL" %} + {% else %}{% assign category_display_name = category_name %}{% comment %} For example, SQL {% endcomment %} + {% endif %} + +## {{ category_display_name }} +{% comment %}DEBUG: {{ essential_metrics_total }} essential metrics{% endcomment %} + + + + + + + + + + + + + {% comment %} STEP 4c. Loop 2 to create essential metric rows for category{% endcomment %} + {% for layer_name in layer_names_array %} + + {% assign layer = layers | where_exp: "l", "l.name == layer_name" %} + {% assign category = layer[0].categories | where_exp: "c", "c.name == category_name" %} + {% assign essential_metrics = category[0].metrics | where: "essential", true %} + {% comment %}DEBUG: 1 {{ layer_name }} 2 {{ layer[0].name }} 3 {{ category[0].name }}{% endcomment %} + + {% for metric in essential_metrics %} + {% comment %} STEP 4d. Exclude SQL metrics that will be placed in special categories {% endcomment %} + {% unless category_name == SQL %} + {% unless metric.name contains "backup" or metric.name contains "BACKUP" or metric.name contains "create_stats" %} + + {% comment %} Transforms to match datadog_id {% endcomment %} + {% assign input_metric = metric.name %} + {% assign match1 = metrics_datadog | where: "datadog_id", input_metric | first %} + {% assign input_metric = metric.name | replace: "-", "." %} + {% assign match2 = metrics_datadog | where: "datadog_id", input_metric | first %} + {% assign input_metric = metric.name | append: ".count" %} + {% assign match3 = metrics_datadog | where: "datadog_id", input_metric | first %} + {% assign input_metric = metric.name | replace: "_", "." %} + {% assign match4 = metrics_datadog | where: "datadog_id", input_metric | first %} + {% assign input_metric = metric.name | replace: "-", "_" | append: ".count" %} + {% assign match5 = metrics_datadog | where: "datadog_id", input_metric | first %} + {% assign input_metric = metric.name | replace: "-", "_" %} + {% assign match6 = metrics_datadog | where: "datadog_id", input_metric | first %} + {% assign input_metric = metric.name | append: ".total" %} + {% assign match7 = metrics_datadog | where: "datadog_id", input_metric | first %} + + {% assign metric_link = metric.name | replace: "_", "-" | replace: ".", "-" %} + + + + + + + + + {% endunless %}{% comment %}unless metric.name contains "backup" or metric.name contains "BACKUP" or metric.name contains "create_stats"{% endcomment %} + {% endunless %}{% comment %}unless category_name == SQL{% endcomment %} + {% endfor %}{% comment %}for metric in essential_metrics{% endcomment %} + {% endfor %}{% comment %}for layer in layer_names_array{% endcomment %} + + +
CockroachDB Metric Name[Datadog Integration Metric Name]({{ datadog_link }})
(add `{{ datadog_prefix }}.` prefix)
DescriptionUsage
+ {% comment %} For self-hosted, add labeled_name if exists. advanced does not yet support metrics endpoint. {% endcomment %} +
{% if include.deployment == 'self-hosted' %}{% if metric.labeled_name %}metrics endpoint:
{{ metric.labeled_name }}{% endif %}{% endif %} +
+ {% if match1 %}{% comment %}Match1:{% endcomment %}{{ match1.datadog_id }} + {% elsif match2 %}{% comment %}Match2:{% endcomment %}{{ match2.datadog_id }} + {% elsif match3 %}{% comment %}Match3:{% endcomment %}{{ match3.datadog_id }} + {% elsif match4 %}{% comment %}Match4:{% endcomment %}{{ match4.datadog_id }} + {% elsif match5 %}{% comment %}Match5:{% endcomment %}{{ match5.datadog_id }} + {% elsif match6 %}{% comment %}Match6:{% endcomment %}{{ match6.datadog_id }} + {% elsif match7 %}{% comment %}Match7:{% endcomment %}{{ match7.datadog_id }} + {% else %}NOT AVAILABLE + {% endif %} + {{ metric.description }}{{ metric.how_to_use }}
+ + {% endif %}{% comment %}essential_metrics_total > 0{% endcomment %} + + {% comment %} STEP 4e. Create SQL special categories {% endcomment %} + {% if category_name == "SQL" %} + {% assign layer = layers | where_exp: "l", "l.name == 'APPLICATION'" %} + {% assign category = layer[0].categories | where_exp: "c", "c.name == category_name" %} + {% assign essential_metrics = category[0].metrics | where: "essential", true %} + +## Table Statistics + + + + + + + + + + + + + {% for metric in essential_metrics %} + {% if metric.name contains "create_stats" %} + {% assign metric_link = metric.name | replace: "_", "-" | replace: ".", "-" %} + + + + + + + + + {% endif %} + {% endfor %} + + +
CockroachDB Metric Name[Datadog Integration Metric Name]({{ datadog_link }})
(add `{{ datadog_prefix }}.` prefix)
DescriptionUsage
+ {% comment %} For self-hosted, add labeled_name if exists. advanced does not yet support metrics endpoint. {% endcomment %} +
{% if include.deployment == 'self-hosted' %}{% if metric.labeled_name %}metrics endpoint:
{{ metric.labeled_name }}{% endif %}{% endif %} +
{{ metric.name }} + {{ metric.description }}{{ metric.how_to_use }}
+ +## Disaster Recovery + + + + + + + + + + + + + {% for metric in essential_metrics %} + {% if metric.name contains "backup" or metric.name contains "BACKUP" %} + + {% assign metric_link = metric.name | replace: "_", "-" | replace: ".", "-" %} + + + + + + + + + {% endif %} + {% endfor %} + + +
CockroachDB Metric Name[Datadog Integration Metric Name]({{ datadog_link }})
(add `{{ datadog_prefix }}.` prefix)
DescriptionUsage
+ {% comment %} For self-hosted, add labeled_name if exists. advanced does not yet support metrics endpoint. {% endcomment %} +
{% if include.deployment == 'self-hosted' %}{% if metric.labeled_name %}metrics endpoint:
{{ metric.labeled_name }}{% endif %}{% endif %} +
{{ metric.name }} + {{ metric.description }}{{ metric.how_to_use }}
+ + {% endif %}{% comment %}if category_name == "SQL"{% endcomment %} + + {% endif %}{% comment %}if category_name != ""{% endcomment %} +{% endfor %}{% comment %}for category_name in category_names_array{% endcomment %} + +{% comment %} STEP 5. Add category for metrics that are not in metrics.yaml{% endcomment %} {% if include.deployment == 'self-hosted' %} + {% assign essential_metrics = site.data[version].metrics.available-metrics-not-in-metrics-list | where: "essential", true %} ## Expiration of license and certificates -|
CockroachDB Metric Name
|
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb/?tab=host#metrics)
(add `cockroachdb.` prefix)
|
Description
| Usage | -| ----------------------------------------------------- | ---------------------------------------------------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | -| seconds.until.enterprise.license.expiry | seconds.until.enterprise.license.expiry | Seconds until enterprise license expiry (0 if no license present or running without enterprise features) | See Description. | -| security.certificate.expiration.ca | security.certificate_expiration.ca | Expiration for the CA certificate. 0 means no certificate or error | See Description. | -| security.certificate.expiration.client-ca | security.certificate_expiration.client_ca | Expiration for the client CA certificate. 0 means no certificate or error| See Description. | -| security.certificate.expiration.ui | security.certificate_expiration.ui | Expiration for the UI certificate. 0 means no certificate or error| See Description. | -| security.certificate.expiration.ui-ca | security.certificate_expiration.ui_ca | Expiration for the UI CA certificate. 0 means no certificate or error| See Description. | -| security.certificate.expiration.node | security.certificate_expiration.node | Expiration for the node certificate. 0 means no certificate or error| See Description. | -| security.certificate.expiration.node-client | security.certificate_expiration.node_client | Expiration for the node's client certificate. 0 means no certificate or error| See Description. | -{% endif %} + + + + + + + + + + + + {% for metric in essential_metrics %} + + {% assign metric_link = metric.metric_id | replace: "_", "-" | replace: ".", "-" %} -## KV distributed - -|
CockroachDB Metric Name
| {% if include.deployment == 'self-hosted' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb/?tab=host#metrics)
(add `cockroachdb.` prefix)
|{% elsif include.deployment == 'advanced' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb_dedicated/#metrics)
(add `crdb_dedicated.` prefix)
|{% endif %}
Description
| Usage | -| ----------------------------------------------------- | {% if include.deployment == 'self-hosted' %}------ |{% elsif include.deployment == 'advanced' %}---- |{% endif %} ------------------------------------------------------------ | ------------------------------------------------------------ | -| liveness.heartbeatlatency | {% if include.deployment == 'self-hosted' %}liveness.heartbeatlatency-p90 |{% elsif include.deployment == 'advanced' %}liveness.heartbeatlatency |{% endif %} Node liveness heartbeat latency | If this metric exceeds 1 second, it is a sign of cluster instability. | -| liveness.livenodes | liveness.livenodes | Number of live nodes in the cluster (will be 0 if this node is not itself live) | This is a critical metric that tracks the live nodes in the cluster. | -| distsender.rpc.sent.nextreplicaerror | distsender.rpc.sent.nextreplicaerror | Number of replica-addressed RPCs sent due to per-replica errors | [RPC](architecture/overview.html#overview) errors do not necessarily indicate a problem. This metric tracks remote procedure calls that return a status value other than "success". A non-success status of an RPC should not be misconstrued as a network transport issue. It is database code logic executed on another cluster node. The non-success status is a result of an orderly execution of an RPC that reports a specific logical condition. | -| distsender.errors.notleaseholder | distsender.errors.notleaseholder | Number of NotLeaseHolderErrors encountered from replica-addressed RPCs | Errors of this type are normal during elastic cluster topology changes when leaseholders are actively rebalancing. They are automatically retried. However they may create occasional response time spikes. In that case, this metric may provide the explanation of the cause. | - -## KV replication - -|
CockroachDB Metric Name
| {% if include.deployment == 'self-hosted' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb/?tab=host#metrics)
(add `cockroachdb.` prefix)
|{% elsif include.deployment == 'advanced' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb_dedicated/#metrics)
(add `crdb_dedicated.` prefix)
|{% endif %}
Description
| Usage | -| ----------------------------------------------------- | {% if include.deployment == 'self-hosted' %}------ |{% elsif include.deployment == 'advanced' %}---- |{% endif %} ------------------------------------------------------------ | ------------------------------------------------------------ | -| leases.transfers.success | leases.transfers.success | Number of successful lease transfers | A high number of [lease](architecture/replication-layer.html#leases) transfers is not a negative or positive signal, rather it is a reflection of the elastic cluster activities. For example, this metric is high during cluster topology changes. A high value is often the reason for NotLeaseHolderErrors which are normal and expected during rebalancing. Observing this metric may provide a confirmation of the cause of such errors. | -| rebalancing_lease_transfers | rebalancing.lease.transfers | Counter of the number of [lease transfers]({% link {{ page.version.version }}/architecture/replication-layer.md %}#leases) that occur during replica rebalancing. These lease transfers are tracked by a component that looks for a [store-level]({% link {{ page.version.version }}/cockroach-start.md %}#store) load imbalance of either QPS (`rebalancing.queriespersecond`) or CPU usage (`rebalancing.cpunanospersecond`), depending on the value of the `kv.allocator.load_based_rebalancing.objective` [cluster setting]({% link {{ page.version.version }}/cluster-settings.md %}#setting-kv-allocator-load-based-rebalancing-objective). | Used to identify when there has been more rebalancing activity triggered by imbalance between stores (of QPS or CPU). If this is high (when the count is rated), it indicates that more rebalancing activity is taking place due to load imbalance between stores. | -| rebalancing_range_rebalances | {% if include.deployment == 'self-hosted' %}rebalancing.range.rebalances | {% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Counter of the number of [load-based range rebalances]({% link {{ page.version.version }}/architecture/replication-layer.md %}#load-based-replica-rebalancing). This range movement is tracked by a component that looks for [store-level]({% link {{ page.version.version }}/cockroach-start.md %}#store) load imbalance of either QPS (`rebalancing.queriespersecond`) or CPU usage (`rebalancing.cpunanospersecond`), depending on the value of the `kv.allocator.load_based_rebalancing.objective` [cluster setting]({% link {{ page.version.version }}/cluster-settings.md %}#setting-kv-allocator-load-based-rebalancing-objective). | Used to identify when there has been more rebalancing activity triggered by imbalance between stores (of QPS or CPU). If this is high (when the count is rated), it indicates that more rebalancing activity is taking place due to load imbalance between stores. | -| rebalancing_replicas_queriespersecond | {% if include.deployment == 'self-hosted' %}rebalancing.replicas.queriespersecond | {% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Counter of the KV-level requests received per second by a given [store]({% link {{ page.version.version }}/cockroach-start.md %}#store). The store aggregates all of the CPU and QPS stats across all its replicas and then creates a histogram that maintains buckets that can be queried for, e.g., the P95 replica's QPS or CPU. | A high value of this metric could indicate that one of the store's replicas is part of a [hot range]({% link {{ page.version.version }}/understand-hotspots.md %}#hot-range). See also: `rebalancing_replicas_cpunanospersecond`. | -| rebalancing_replicas_cpunanospersecond | {% if include.deployment == 'self-hosted' %}rebalancing.replicas.cpunanospersecond | {% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Counter of the CPU nanoseconds of execution time per second by a given [store]({% link {{ page.version.version }}/cockroach-start.md %}#store). The store aggregates all of the CPU and QPS stats across all its replicas and then creates a histogram that maintains buckets that can be queried for, e.g., the P95 replica's QPS or CPU. | A high value of this metric could indicate that one of the store's replicas is part of a [hot range]({% link {{ page.version.version }}/understand-hotspots.md %}#hot-range). See also the non-histogram variant: `rebalancing.cpunanospersecond`. | -| rebalancing.queriespersecond | {% if include.deployment == 'self-hosted' %}rebalancing.queriespersecond |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of kv-level requests received per second by the store, considering the last 30 minutes, as used in rebalancing decisions. | This metric shows hotspots along the queries per second (QPS) dimension. It provides insights into the ongoing rebalancing activities. | -| rebalancing.cpunanospersecond | {% if include.deployment == 'self-hosted' %}rebalancing.cpunanospersecond |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Non-histogram variant of `rebalancing_replicas_cpunanospersecond`. | See usage of `rebalancing_replicas_cpunanospersecond`. | -| ranges | ranges | Number of ranges | This metric provides a measure of the scale of the data size. | -| replicas | {% if include.deployment == 'self-hosted' %}replicas.total |{% elsif include.deployment == 'advanced' %}replicas |{% endif %} Number of replicas | This metric provides an essential characterization of the data distribution across cluster nodes. | -| replicas.leaseholders | replicas.leaseholders | Number of lease holders | This metric provides an essential characterization of the data processing points across cluster nodes. | -| ranges.underreplicated | ranges.underreplicated | Number of ranges with fewer live replicas than the replication target | This metric is an indicator of [replication issues]({% link {{ page.version.version }}/cluster-setup-troubleshooting.md %}#replication-issues). It shows whether the cluster has data that is not conforming to resilience goals. The next step is to determine the corresponding database object, such as the table or index, of these under-replicated ranges and whether the under-replication is temporarily expected. Use the statement `SELECT table_name, index_name FROM [SHOW RANGES WITH INDEXES] WHERE range_id = {id of under-replicated range};`| -| ranges.unavailable | ranges.unavailable | Number of ranges with fewer live replicas than needed for quorum | This metric is an indicator of [replication issues]({% link {{ page.version.version }}/cluster-setup-troubleshooting.md %}#replication-issues). It shows whether the cluster is unhealthy and can impact workload. If an entire range is unavailable, then it will be unable to process queries. | -| queue.replicate.replacedecommissioningreplica.error | {% if include.deployment == 'self-hosted' %}queue.replicate.replacedecommissioningreplica.error.count |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of failed decommissioning replica replacements processed by the replicate queue | Refer to [Decommission the node]({% link {{ page.version.version }}/node-shutdown.md %}?filters=decommission#decommission-the-node). | -| range.splits | {% if include.deployment == 'self-hosted' %}range.splits.total |{% elsif include.deployment == 'advanced' %}range.splits |{% endif %} Number of range splits | This metric indicates how fast a workload is scaling up. Spikes can indicate resource [hotspots]({% link {{ page.version.version }}/understand-hotspots.md %}) since the [split heuristic is based on QPS]({% link {{ page.version.version }}/load-based-splitting.md %}#control-load-based-splitting-threshold). To understand whether hotspots are an issue and with which tables and indexes they are occurring, correlate this metric with other metrics such as CPU usage, such as `sys.cpu.combined.percent-normalized`, or use the [**Hot Ranges** page]({% link {{ page.version.version }}/ui-hot-ranges-page.md %}). | -| range.merges | {% if include.deployment == 'self-hosted' %}range.merges.count |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of range merges | This metric indicates how fast a workload is scaling down. Merges are Cockroach's [optimization for performance](architecture/distribution-layer.html#range-merges). This metric indicates that there have been deletes in the workload. | - -## SQL - -|
CockroachDB Metric Name
| {% if include.deployment == 'self-hosted' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb/?tab=host#metrics)
(add `cockroachdb.` prefix)
|{% elsif include.deployment == 'advanced' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb_dedicated/#metrics)
(add `crdb_dedicated.` prefix)
|{% endif %}
Description
| Usage | -| ----------------------------------------------------- | {% if include.deployment == 'self-hosted' %}------ |{% elsif include.deployment == 'advanced' %}---- |{% endif %} ------------------------------------------------------------ | ------------------------------------------------------------ | -| sql.conns | sql.conns | Number of active SQL connections | This metric shows the number of connections as well as the distribution, or balancing, of connections across cluster nodes. An imbalance can lead to nodes becoming overloaded. Review [Connection Pooling]({% link {{ page.version.version }}/connection-pooling.md %}). | -| sql.new_conns | {% if include.deployment == 'self-hosted' %}sql.new_conns.count |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of new connection attempts. | The rate of this metric shows how frequently new connections are being established. This can be useful in determining if a high rate of incoming new connections is causing additional load on the server due to a misconfigured application. | -| sql.txns.open | sql.txns.open | Number of currently open user SQL transactions | This metric should roughly correspond to the number of cores * 4. If this metric is consistently larger, scale out the cluster. | -| sql.statements.active | sql.statements.active | Number of currently active user SQL statements | This high-level metric reflects workload volume. | -| sql.failure.count | {% if include.deployment == 'self-hosted' %}sql.failure |{% elsif include.deployment == 'advanced' %}sql.failure.count |{% endif %} Number of statements resulting in a planning or runtime error | This metric is a high-level indicator of workload and application degradation with query failures. Use the [Insights page]({% link {{ page.version.version }}/ui-insights-page.md %}) to find failed executions with their error code to troubleshoot or use application-level logs, if instrumented, to determine the cause of error. | -| sql.full.scan.count | {% if include.deployment == 'self-hosted' %}sql.full.scan |{% elsif include.deployment == 'advanced' %}sql.full.scan.count |{% endif %} Number of full table or index scans | This metric is a high-level indicator of potentially suboptimal query plans in the workload that may require index tuning and maintenance. To identify the [statements with a full table scan]({% link {{ page.version.version }}/performance-recipes.md %}#statements-with-full-table-scans), use `SHOW FULL TABLE SCAN` or the [**SQL Activity Statements** page]({% link {{ page.version.version }}/ui-statements-page.md %}) with the corresponding metric time frame. The **Statements** page also includes [explain plans]({% link {{ page.version.version }}/ui-statements-page.md %}#explain-plans) and [index recommendations]({% link {{ page.version.version }}/ui-statements-page.md %}#insights). Not all full scans are necessarily bad especially over smaller tables. | -| sql.insert.count | sql.insert.count | Number of SQL INSERT statements successfully executed | This high-level metric reflects workload volume. Monitor this metric to identify abnormal application behavior or patterns over time. If abnormal patterns emerge, apply the metric's time range to the [**SQL Activity** pages]({% link {{ page.version.version }}/monitoring-and-alerting.md %}#sql-activity-pages) to investigate interesting outliers or patterns. For example, on the [**Transactions** page]({% link {{ page.version.version }}/ui-transactions-page.md %}) and the [**Statements** page]({% link {{ page.version.version }}/ui-statements-page.md %}), sort on the Execution Count column. To find problematic sessions, on the [**Sessions** page]({% link {{ page.version.version }}/ui-sessions-page.md %}), sort on the Transaction Count column. Find the sessions with high transaction counts and trace back to a user or application. | -| sql.update.count | sql.update.count | Number of SQL UPDATE statements successfully executed | This high-level metric reflects workload volume. Monitor this metric to identify abnormal application behavior or patterns over time. If abnormal patterns emerge, apply the metric's time range to the [**SQL Activity** pages]({% link {{ page.version.version }}/monitoring-and-alerting.md %}#sql-activity-pages) to investigate interesting outliers or patterns. For example, on the [**Transactions** page]({% link {{ page.version.version }}/ui-transactions-page.md %}) and the [**Statements** page]({% link {{ page.version.version }}/ui-statements-page.md %}), sort on the Execution Count column. To find problematic sessions, on the [**Sessions** page]({% link {{ page.version.version }}/ui-sessions-page.md %}), sort on the Transaction Count column. Find the sessions with high transaction counts and trace back to a user or application. | -| sql.delete.count | sql.delete.count | Number of SQL DELETE statements successfully executed | This high-level metric reflects workload volume. Monitor this metric to identify abnormal application behavior or patterns over time. If abnormal patterns emerge, apply the metric's time range to the [**SQL Activity** pages]({% link {{ page.version.version }}/monitoring-and-alerting.md %}#sql-activity-pages) to investigate interesting outliers or patterns. For example, on the [**Transactions** page]({% link {{ page.version.version }}/ui-transactions-page.md %}) and the [**Statements** page]({% link {{ page.version.version }}/ui-statements-page.md %}), sort on the Execution Count column. To find problematic sessions, on the [**Sessions** page]({% link {{ page.version.version }}/ui-sessions-page.md %}), sort on the Transaction Count column. Find the sessions with high transaction counts and trace back to a user or application. | -| sql.select.count | sql.select.count | Number of SQL SELECT statements successfully executed | This high-level metric reflects workload volume. Monitor this metric to identify abnormal application behavior or patterns over time. If abnormal patterns emerge, apply the metric's time range to the [**SQL Activity** pages]({% link {{ page.version.version }}/monitoring-and-alerting.md %}#sql-activity-pages) to investigate interesting outliers or patterns. For example, on the [**Transactions** page]({% link {{ page.version.version }}/ui-transactions-page.md %}) and the [**Statements** page]({% link {{ page.version.version }}/ui-statements-page.md %}), sort on the Execution Count column. To find problematic sessions, on the [**Sessions** page]({% link {{ page.version.version }}/ui-sessions-page.md %}), sort on the Transaction Count column. Find the sessions with high transaction counts and trace back to a user or application. | -| sql.ddl.count | sql.ddl.count | Number of SQL DDL statements successfully executed | This high-level metric reflects workload volume. Monitor this metric to identify abnormal application behavior or patterns over time. If abnormal patterns emerge, apply the metric's time range to the [**SQL Activity** pages]({% link {{ page.version.version }}/monitoring-and-alerting.md %}#sql-activity-pages) to investigate interesting outliers or patterns. For example, on the [**Transactions** page]({% link {{ page.version.version }}/ui-transactions-page.md %}) and the [**Statements** page]({% link {{ page.version.version }}/ui-statements-page.md %}), sort on the Execution Count column. To find problematic sessions, on the [**Sessions** page]({% link {{ page.version.version }}/ui-sessions-page.md %}), sort on the Transaction Count column. Find the sessions with high transaction counts and trace back to a user or application. | -| sql.txn.begin.count | sql.txn.begin.count | Number of SQL transaction BEGIN statements successfully executed | This metric reflects workload volume by counting explicit [transactions]({% link {{ page.version.version }}/transactions.md %}). Use this metric to determine whether explicit transactions can be refactored as implicit transactions (individual statements). | -| sql.txn.commit.count | sql.txn.commit.count | Number of SQL transaction COMMIT statements successfully executed | This metric shows the number of [transactions]({% link {{ page.version.version }}/transactions.md %}) that completed successfully. This metric can be used as a proxy to measure the number of successful explicit transactions. | -| sql.txn.rollback.count | sql.txn.rollback.count | Number of SQL transaction ROLLBACK statements successfully executed | This metric shows the number of orderly transaction [rollbacks]({% link {{ page.version.version }}/rollback-transaction.md %}). A persistently high number of rollbacks may negatively impact the workload performance and needs to be investigated. | -| sql.txn.abort.count | sql.txn.abort.count | Number of SQL transaction abort errors | This high-level metric reflects workload performance. A persistently high number of SQL transaction abort errors may negatively impact the workload performance and needs to be investigated. | -| sql.service.latency-p90, sql.service.latency-p99 | sql.service.latency | Latency of SQL request execution | These high-level metrics reflect workload performance. Monitor these metrics to understand latency over time. If abnormal patterns emerge, apply the metric's time range to the [**SQL Activity** pages]({% link {{ page.version.version }}/monitoring-and-alerting.md %}#sql-activity-pages) to investigate interesting outliers or patterns. The [**Statements page**]({% link {{ page.version.version }}/ui-statements-page.md %}) has P90 Latency and P99 latency columns to enable correlation with this metric. | -| sql.txn.latency-p90, sql.txn.latency-p99 | sql.txn.latency | Latency of SQL transactions | These high-level metrics provide a latency histogram of all executed SQL transactions. These metrics provide an overview of the current SQL workload. | -| txnwaitqueue.deadlocks_total | {% if include.deployment == 'self-hosted' %}txnwaitqueue.deadlocks.count |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of deadlocks detected by the transaction wait queue | Alert on this metric if its value is greater than zero, especially if transaction throughput is lower than expected. Applications should be able to detect and recover from deadlock errors. However, transaction performance and throughput can be maximized if the application logic avoids deadlock conditions in the first place, for example, by keeping transactions as short as possible. | -| sql.distsql.contended_queries.count | {% if include.deployment == 'self-hosted' %}sql.distsql.contended.queries |{% elsif include.deployment == 'advanced' %} sql.distsql.contended.queries |{% endif %} Number of SQL queries that experienced contention | This metric is incremented whenever there is a non-trivial amount of contention experienced by a statement whether read-write or write-write conflicts. Monitor this metric to correlate possible workload performance issues to contention conflicts. | -| sql.conn.failures | sql.conn.failures.count | Number of SQL connection failures | This metric is incremented whenever a connection attempt fails for any reason, including timeouts. | -| sql.conn.latency-p90, sql.conn.latency-p99 | sql.conn.latency | Latency to establish and authenticate a SQL connection | These metrics characterize the database connection latency which can affect the application performance, for example, by having slow startup times. Connection failures are not recorded in these metrics.| -| txn.restarts.serializable | txn.restarts.serializable | Number of restarts due to a forwarded commit timestamp and isolation=SERIALIZABLE | This metric is one measure of the impact of contention conflicts on workload performance. For guidance on contention conflicts, review [transaction contention best practices]({% link {{ page.version.version }}/performance-best-practices-overview.md %}#transaction-contention) and [performance tuning recipes]({% link {{ page.version.version }}/performance-recipes.md %}#transaction-contention). Tens of restarts per minute may be a high value, a signal of an elevated degree of contention in the workload, which should be investigated. | -| txn.restarts.writetooold | txn.restarts.writetooold | Number of restarts due to a concurrent writer committing first | This metric is one measure of the impact of contention conflicts on workload performance. For guidance on contention conflicts, review [transaction contention best practices]({% link {{ page.version.version }}/performance-best-practices-overview.md %}#transaction-contention) and [performance tuning recipes]({% link {{ page.version.version }}/performance-recipes.md %}#transaction-contention). Tens of restarts per minute may be a high value, a signal of an elevated degree of contention in the workload, which should be investigated. | -| txn.restarts.writetoooldmulti | {% if include.deployment == 'self-hosted' %}txn.restarts.writetoooldmulti.count |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of restarts due to multiple concurrent writers committing first | This metric is one measure of the impact of contention conflicts on workload performance. For guidance on contention conflicts, review [transaction contention best practices]({% link {{ page.version.version }}/performance-best-practices-overview.md %}#transaction-contention) and [performance tuning recipes]({% link {{ page.version.version }}/performance-recipes.md %}#transaction-contention). Tens of restarts per minute may be a high value, a signal of an elevated degree of contention in the workload, which should be investigated. | -| txn.restarts.unknown | {% if include.deployment == 'self-hosted' %}txn.restarts.unknown.count |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of restarts due to a unknown reasons | This metric is one measure of the impact of contention conflicts on workload performance. For guidance on contention conflicts, review [transaction contention best practices]({% link {{ page.version.version }}/performance-best-practices-overview.md %}#transaction-contention) and [performance tuning recipes]({% link {{ page.version.version }}/performance-recipes.md %}#transaction-contention). Tens of restarts per minute may be a high value, a signal of an elevated degree of contention in the workload, which should be investigated. | -| txn.restarts.txnpush | {% if include.deployment == 'self-hosted' %}txn.restarts.txnpush.count |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of restarts due to a transaction push failure | This metric is one measure of the impact of contention conflicts on workload performance. For guidance on contention conflicts, review [transaction contention best practices]({% link {{ page.version.version }}/performance-best-practices-overview.md %}#transaction-contention) and [performance tuning recipes]({% link {{ page.version.version }}/performance-recipes.md %}#transaction-contention). Tens of restarts per minute may be a high value, a signal of an elevated degree of contention in the workload, which should be investigated. | -| txn.restarts.txnaborted | {% if include.deployment == 'self-hosted' %}txn.restarts.txnaborted.count |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of restarts due to an abort by a concurrent transaction | The errors tracked by this metric are generally due to deadlocks. Deadlocks can often be prevented with a considered transaction design. Identify the conflicting transactions involved in the deadlocks, then, if possible, redesign the business logic implementation prone to deadlocks. | + + + + + + -## Table Statistics + {% endfor %}{% comment %}for metric in essential_metrics{% endcomment %} -|
CockroachDB Metric Name
| {% if include.deployment == 'self-hosted' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb/?tab=host#metrics)
(add `cockroachdb.` prefix)
|{% elsif include.deployment == 'advanced' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb_dedicated/#metrics)
(add `crdb_dedicated.` prefix)
|{% endif %}
Description
| Usage | -| ----------------------------------------------------- | {% if include.deployment == 'self-hosted' %}------ |{% elsif include.deployment == 'advanced' %}---- |{% endif %} ------------------------------------------------------------ | ------------------------------------------------------------ | -| jobs.auto_create_stats.resume_failed | {% if include.deployment == 'self-hosted' %}jobs.auto.create.stats.resume_failed.count |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of auto_create_stats jobs which failed with a non-retryable error | This metric is a high-level indicator that automatically generated [table statistics]({% link {{ page.version.version }}/cost-based-optimizer.md %}#table-statistics) is failing. Failed statistic creation can lead to the query optimizer running with stale statistics. Stale statistics can cause suboptimal query plans to be selected leading to poor query performance. | -| jobs.auto_create_stats.currently_running | {% if include.deployment == 'self-hosted' %}jobs.auto.create.stats.currently_running |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of auto_create_stats jobs currently running | This metric tracks the number of active automatically generated statistics jobs that could also be consuming resources. Ensure that foreground SQL traffic is not impacted by correlating this metric with SQL latency and query volume metrics. | -| jobs.auto_create_stats.currently_paused | {% if include.deployment == 'self-hosted' %}jobs.auto.create.stats.currently_paused |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of auto_create_stats jobs currently considered Paused | This metric is a high-level indicator that automatically generated statistics jobs are paused which can lead to the query optimizer running with stale statistics. Stale statistics can cause suboptimal query plans to be selected leading to poor query performance. | -| jobs.create_stats.currently_running | {% if include.deployment == 'self-hosted' %}jobs.create.stats.currently_running |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of create_stats jobs currently running | This metric tracks the number of active create statistics jobs that may be consuming resources. Ensure that foreground SQL traffic is not impacted by correlating this metric with SQL latency and query volume metrics. | - -## Backup and Restore - -|
CockroachDB Metric Name
| {% if include.deployment == 'self-hosted' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb/?tab=host#metrics)
(add `cockroachdb.` prefix)
|{% elsif include.deployment == 'advanced' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb_dedicated/#metrics)
(add `crdb_dedicated.` prefix)
|{% endif %}
Description
| Usage | -| ----------------------------------------------------- | {% if include.deployment == 'self-hosted' %}------ |{% elsif include.deployment == 'advanced' %}---- |{% endif %} ------------------------------------------------------------ | ------------------------------------------------------------ | -| jobs.backup.currently_running | {% if include.deployment == 'self-hosted' %}jobs.backup.currently_running |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of backup jobs currently running | See Description. | -| jobs.backup.currently_paused | {% if include.deployment == 'self-hosted' %}jobs.backup.currently_paused |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of backup jobs currently considered Paused | Monitor and alert on this metric to safeguard against an inadvertent operational error of leaving a backup job in a paused state for an extended period of time. In functional areas, a paused job can hold resources or have concurrency impact or some other negative consequence. Paused backup may break the [recovery point objective (RPO)]({% link {{ page.version.version }}/backup.md %}#performance). | -| schedules.BACKUP.failed | {% if include.deployment == 'self-hosted' %}schedules.backup.failed |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of BACKUP jobs failed | Monitor this metric and investigate backup job failures. | -| schedules.BACKUP.last-completed-time | {% if include.deployment == 'self-hosted' %}schedules.backup.last_completed_time |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} The Unix timestamp of the most recently completed backup by a schedule specified as maintaining this metric | Monitor this metric to ensure that backups are meeting the [recovery point objective (RPO)]({% link {{ page.version.version }}/disaster-recovery-overview.md %}). Each node exports the time that it last completed a backup on behalf of the schedule. If a node is restarted, it will report `0` until it completes a backup. If all nodes are restarted, `max()` is `0` until a node completes a backup.

To make use of this metric, first, from each node, take the maximum over a rolling window equal to or greater than the backup frequency, and then take the maximum of those values across nodes. For example with a backup frequency of 60 minutes, monitor `time() - max_across_nodes(max_over_time(schedules_BACKUP_last_completed_time, 60min))`. | - -## Changefeeds - -If [changefeeds]({% link {{ page.version.version }}/change-data-capture-overview.md %}) are created in a CockroachDB cluster, monitor these additional metrics in your custom dashboards: - -|
CockroachDB Metric Name
| {% if include.deployment == 'self-hosted' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb/?tab=host#metrics)
(add `cockroachdb.` prefix)
|{% elsif include.deployment == 'advanced' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb_dedicated/#metrics)
(add `crdb_dedicated.` prefix)
|{% endif %}
Description
| Usage | -| ----------------------------------------------------- | {% if include.deployment == 'self-hosted' %}------ |{% elsif include.deployment == 'advanced' %}---- |{% endif %} ------------------------------------------------------------ | ------------------------------------------------------------ | -| changefeed.running | changefeed.running | Number of currently running changefeeds, including sinkless | This metric tracks the total number of all running changefeeds. | -| jobs.changefeed.currently_paused | {% if include.deployment == 'self-hosted' %}jobs.changefeed.currently_paused |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of changefeed jobs currently considered Paused | Monitor and alert on this metric to safeguard against an inadvertent operational error of leaving a changefeed job in a paused state for an extended period of time. Changefeed jobs should not be paused for a long time because the [protected timestamp prevents garbage collection]({% link {{ page.version.version }}/monitor-and-debug-changefeeds.md %}#protected-timestamp-and-garbage-collection-monitoring). | -| changefeed.failures | changefeed.failures | Total number of changefeed jobs which have failed | This metric tracks the permanent changefeed job failures that the jobs system will not try to restart. Any increase in this counter should be investigated. An alert on this metric is recommended. | -| changefeed.error_retries | changefeed.error.retries | Total retryable errors encountered by all changefeeds | This metric tracks transient changefeed errors. Alert on "too many" errors, such as 50 retries in 15 minutes. For example, during a rolling upgrade this counter will increase because the changefeed jobs will restart following node restarts. There is an exponential backoff, up to 10 minutes. But if there is no rolling upgrade in process or other cluster maintenance, and the error rate is high, investigate the changefeed job. -| changefeed.emitted_messages | changefeed.emitted.messages | Messages emitted by all feeds | This metric provides a useful context when assessing the state of changefeeds. This metric characterizes the rate of changes being streamed from the CockroachDB cluster. | -| changefeed.emitted_bytes | {% if include.deployment == 'self-hosted' %}changefeed.emitted_bytes.count |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Bytes emitted by all feeds | This metric provides a useful context when assessing the state of changefeeds. This metric characterizes the throughput bytes being streamed from the CockroachDB cluster. | -| changefeed.commit_latency | changefeed.commit.latency | The difference between the event MVCC timestamp and the time it was acknowledged by the downstream sink. If the sink batches events, then the difference between the oldest event in the batch and acknowledgement is recorded. Latency during backfill is excluded.| This metric provides a useful context when assessing the state of changefeeds. This metric characterizes the end-to-end lag between a committed change and that change applied at the destination. | -| jobs.changefeed.protected_age_sec | {% if include.deployment == 'self-hosted' %}jobs.changefeed.protected_age_sec |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} The age of the oldest PTS record protected by changefeed jobs | [Changefeeds use protected timestamps to protect the data from being garbage collected]({% link {{ page.version.version }}/monitor-and-debug-changefeeds.md %}#protected-timestamp-and-garbage-collection-monitoring). Ensure the protected timestamp age does not significantly exceed the [GC TTL zone configuration]({% link {{ page.version.version }}/configure-replication-zones.md %}#replication-zone-variables). Alert on this metric if the protected timestamp age is greater than 3 times the GC TTL. | - -## Row-Level TTL - -If [Row-Level TTL]({% link {{ page.version.version }}/row-level-ttl.md %}) is configured for any table in a CockroachDB cluster, monitor these additional metrics in your custom dashboards: - -|
CockroachDB Metric Name
| {% if include.deployment == 'self-hosted' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb/?tab=host#metrics)
(add `cockroachdb.` prefix)
|{% elsif include.deployment == 'advanced' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb_dedicated/#metrics)
(add `crdb_dedicated.` prefix)
|{% endif %}
Description
| Usage | -| ----------------------------------------------------- | {% if include.deployment == 'self-hosted' %}------ |{% elsif include.deployment == 'advanced' %}---- |{% endif %} ------------------------------------------------------------ | ------------------------------------------------------------ | -| jobs.row_level_ttl.resume_completed | {% if include.deployment == 'self-hosted' %}jobs.row.level.ttl.resume_completed.count |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of row_level_ttl jobs which successfully resumed to completion | If Row Level TTL is enabled, this metric should be nonzero and correspond to the `ttl_cron` setting that was chosen. If this metric is zero, it means the job is not running | -| jobs.row_level_ttl.resume_failed | {% if include.deployment == 'self-hosted' %}jobs.row.level.ttl.resume_failed.count |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of row_level_ttl jobs which failed with a non-retryable error | This metric should remain at zero. Repeated errors means the Row Level TTL job is not deleting data. | -| jobs.row_level_ttl.rows_selected | {% if include.deployment == 'self-hosted' %}jobs.row.level.ttl.rows_selected.count |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of rows selected for deletion by the row level TTL job. | Correlate this metric with the metric `jobs.row_level_ttl.rows_deleted` to ensure all the rows that should be deleted are actually getting deleted. | -| jobs.row_level_ttl.rows_deleted | {% if include.deployment == 'self-hosted' %}jobs.row.level.ttl.rows_deleted.count |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of rows deleted by the row level TTL job. | Correlate this metric with the metric `jobs.row_level_ttl.rows_selected` to ensure all the rows that should be deleted are actually getting deleted. | -| jobs.row_level_ttl.currently_paused | {% if include.deployment == 'self-hosted' %}jobs.row.level.ttl.currently_paused |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of row_level_ttl jobs currently considered Paused | Monitor this metric to ensure the Row Level TTL job does not remain paused inadvertently for an extended period. | -| jobs.row_level_ttl.currently_running | {% if include.deployment == 'self-hosted' %}jobs.row.level.ttl.currently_running |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of row_level_ttl jobs currently running | Monitor this metric to ensure there are not too many Row Level TTL jobs running at the same time. Generally, this metric should be in the low single digits. | -| schedules.scheduled-row-level-ttl-executor.failed | {% if include.deployment == 'self-hosted' %}schedules.scheduled.row.level.ttl.executor_failed.count |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of scheduled-row-level-ttl-executor jobs failed | Monitor this metric to ensure the Row Level TTL job is running. If it is non-zero, it means the job could not be created. | -| jobs.row_level_ttl.span_total_duration | NOT AVAILABLE | Duration for processing a span during row level TTL. | See Description. | -| jobs.row_level_ttl.select_duration | NOT AVAILABLE | Duration for select requests during row level TTL. | See Description. | -| jobs.row_level_ttl.delete_duration | NOT AVAILABLE | Duration for delete requests during row level TTL. | See Description. | -| jobs.row_level_ttl.num_active_spans | NOT AVAILABLE | Number of active spans the TTL job is deleting from. | See Description. | -| jobs.row_level_ttl.total_rows | NOT AVAILABLE | Approximate number of rows on the TTL table. | See Description. | -| jobs.row_level_ttl.total_expired_rows | NOT AVAILABLE | Approximate number of rows that have expired the TTL on the TTL table. | See Description. | + +
CockroachDB Metric Name[Datadog Integration Metric Name]({{ datadog_link }})
(add `{{ datadog_prefix }}.` prefix)
DescriptionUsage
+ {% comment %} For self-hosted, add labeled_name if exists. advanced does not yet support metrics endpoint {% endcomment %} +
{% if include.deployment == 'self-hosted' %}{% if metric.labeled_name %}metrics endpoint:
{{ metric.labeled_name }}{% endif %}{% endif %} +
{{ metric.metric_id }} + {{ metric.description }}{{ metric.how_to_use }}
+{% endif %}{% comment %}if include.deployment == 'self-hosted'{% endcomment %} ## See also @@ -204,5 +292,4 @@ If [Row-Level TTL]({% link {{ page.version.version }}/row-level-ttl.md %}) is co - [Visualize metrics in Grafana]({% link {{ page.version.version }}/monitor-cockroachdb-with-prometheus.md %}#step-5-visualize-metrics-in-grafana) - [Custom Chart Debug Page]({% link {{ page.version.version }}/ui-custom-chart-debug-page.md %}) - [Cluster API]({% link {{ page.version.version }}/cluster-api.md %}) -- [Essential Alerts]({% link {{ page.version.version }}/essential-alerts-{{ include.deployment}}.md %}) -- [CockroachDB Source Code - DB Console metrics to graphs mappings (in *.tsx files)](https://github.com/cockroachdb/cockroach/tree/master/pkg/ui/workspaces/db-console/src/views/cluster/containers/nodeGraphs/dashboards) +- [Essential Alerts]({% link {{ page.version.version }}/essential-alerts-self-hosted.md %}) \ No newline at end of file diff --git a/src/current/v25.3/essential-metrics-self-hosted.md b/src/current/v25.3/essential-metrics-self-hosted.md index d18970c0ce4..61251c330cd 100644 --- a/src/current/v25.3/essential-metrics-self-hosted.md +++ b/src/current/v25.3/essential-metrics-self-hosted.md @@ -1,8 +1,7 @@ --- title: Essential Metrics for CockroachDB Self-Hosted Deployments -summary: Learn about the recommended essential metrics for monitoring your CockroachDB {{ site.data.products.core }} cluster. +summary: Learn about the recommended essential metrics for monitoring your CockroachDB self-hosted cluster. toc: true -docs_area: manage --- -{% include {{ page.version.version }}/essential-metrics.md deployment='self-hosted' %} +{% include {{ page.version.version }}/essential-metrics.md deployment='self-hosted' %} \ No newline at end of file