infra/charts/victoria-metrics-k8s-stack/files/dashboards/generated/kubelet.yaml
Konstantin Averkiev c45fd1a6ac added vm stack
2025-07-08 17:29:32 +03:00

880 lines
22 KiB
YAML

{{- $Values := (.helm).Values | default .Values }}
{{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }}
{{- $multicluster := ((($Values.grafana).sidecar).dashboards).multicluster | default false }}
{{- $defaultDatasource := "prometheus" -}}
{{- range (((($Values.grafana).sidecar).datasources).victoriametrics | default list) }}
{{- if and .isDefault .type }}{{ $defaultDatasource = .type }}{{- end }}
{{- end }}
condition: {{ ($Values.kubelet).enabled }}
editable: false
links:
- asDropdown: true
includeVars: true
keepTime: true
tags:
- kubernetes-mixin
targetBlank: false
title: Kubernetes
type: dashboards
panels:
- datasource:
type: {{ $defaultDatasource }}
uid: -- Mixed --
fieldConfig:
defaults:
unit: none
gridPos:
h: 7
w: 4
x: 0
'y': 0
id: 1
interval: 1m
options:
colorMode: none
pluginVersion: v11.4.0
targets:
- datasource:
type: {{ $defaultDatasource }}
uid: ${datasource}
expr: sum(kubelet_node_name{ {{ $clusterLabel }}=~"$cluster", job="kubelet", metrics_path="/metrics"})
instant: true
title: Running Kubelets
type: stat
- datasource:
type: {{ $defaultDatasource }}
uid: -- Mixed --
fieldConfig:
defaults:
unit: none
gridPos:
h: 7
w: 4
x: 4
'y': 0
id: 2
interval: 1m
options:
colorMode: none
pluginVersion: v11.4.0
targets:
- datasource:
type: {{ $defaultDatasource }}
uid: ${datasource}
expr: sum(kubelet_running_pods{ {{ $clusterLabel }}=~"$cluster", job="kubelet", metrics_path="/metrics", instance=~"$instance"})
instant: true
title: Running Pods
type: stat
- datasource:
type: {{ $defaultDatasource }}
uid: -- Mixed --
fieldConfig:
defaults:
unit: none
gridPos:
h: 7
w: 4
x: 8
'y': 0
id: 3
interval: 1m
options:
colorMode: none
pluginVersion: v11.4.0
targets:
- datasource:
type: {{ $defaultDatasource }}
uid: ${datasource}
expr: sum(kubelet_running_containers{ {{ $clusterLabel }}=~"$cluster", job="kubelet", metrics_path="/metrics", instance=~"$instance"})
instant: true
title: Running Containers
type: stat
- datasource:
type: {{ $defaultDatasource }}
uid: -- Mixed --
fieldConfig:
defaults:
unit: none
gridPos:
h: 7
w: 4
x: 12
'y': 0
id: 4
interval: 1m
options:
colorMode: none
pluginVersion: v11.4.0
targets:
- datasource:
type: {{ $defaultDatasource }}
uid: ${datasource}
expr: sum(volume_manager_total_volumes{ {{ $clusterLabel }}=~"$cluster", job="kubelet", metrics_path="/metrics", instance=~"$instance", state="actual_state_of_world"})
instant: true
title: Actual Volume Count
type: stat
- datasource:
type: {{ $defaultDatasource }}
uid: -- Mixed --
fieldConfig:
defaults:
unit: none
gridPos:
h: 7
w: 4
x: 16
'y': 0
id: 5
interval: 1m
options:
colorMode: none
pluginVersion: v11.4.0
targets:
- datasource:
type: {{ $defaultDatasource }}
uid: ${datasource}
expr: sum(volume_manager_total_volumes{ {{ $clusterLabel }}=~"$cluster", job="kubelet", metrics_path="/metrics", instance=~"$instance",state="desired_state_of_world"})
instant: true
title: Desired Volume Count
type: stat
- datasource:
type: {{ $defaultDatasource }}
uid: -- Mixed --
fieldConfig:
defaults:
unit: none
gridPos:
h: 7
w: 4
x: 20
'y': 0
id: 6
interval: 1m
options:
colorMode: none
pluginVersion: v11.4.0
targets:
- datasource:
type: {{ $defaultDatasource }}
uid: ${datasource}
expr: sum(rate(kubelet_node_config_error{ {{ $clusterLabel }}=~"$cluster", job="kubelet", metrics_path="/metrics", instance=~"$instance"}[$__rate_interval]))
instant: true
title: Config Error Count
type: stat
- datasource:
type: {{ $defaultDatasource }}
uid: -- Mixed --
fieldConfig:
defaults:
custom:
fillOpacity: 10
showPoints: never
spanNulls: true
unit: ops
gridPos:
h: 7
w: 12
x: 0
'y': 7
id: 7
interval: 1m
options:
legend:
asTable: true
calcs:
- lastNotNull
displayMode: table
placement: right
showLegend: true
tooltip:
mode: single
pluginVersion: v11.4.0
targets:
- datasource:
type: {{ $defaultDatasource }}
uid: ${datasource}
expr: sum(rate(kubelet_runtime_operations_total{ {{ $clusterLabel }}=~"$cluster",job="kubelet", metrics_path="/metrics",instance=~"$instance"}[$__rate_interval])) by (operation_type, instance)
legendFormat: '{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}'
title: Operation Rate
type: timeseries
- datasource:
type: {{ $defaultDatasource }}
uid: -- Mixed --
fieldConfig:
defaults:
custom:
fillOpacity: 10
showPoints: never
spanNulls: true
unit: ops
gridPos:
h: 7
w: 12
x: 12
'y': 7
id: 8
interval: 1m
options:
legend:
asTable: true
calcs:
- lastNotNull
displayMode: table
placement: right
showLegend: true
tooltip:
mode: single
pluginVersion: v11.4.0
targets:
- datasource:
type: {{ $defaultDatasource }}
uid: ${datasource}
expr: sum(rate(kubelet_runtime_operations_errors_total{ {{ $clusterLabel }}=~"$cluster",job="kubelet", metrics_path="/metrics",instance=~"$instance"}[$__rate_interval])) by (instance, operation_type)
legendFormat: '{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}'
title: Operation Error Rate
type: timeseries
- datasource:
type: {{ $defaultDatasource }}
uid: -- Mixed --
fieldConfig:
defaults:
custom:
fillOpacity: 10
showPoints: never
spanNulls: true
unit: s
gridPos:
h: 7
w: 24
x: 0
'y': 14
id: 9
interval: 1m
options:
legend:
asTable: true
calcs:
- lastNotNull
displayMode: table
placement: right
showLegend: true
tooltip:
mode: single
pluginVersion: v11.4.0
targets:
- datasource:
type: {{ $defaultDatasource }}
uid: ${datasource}
expr: histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{ {{ $clusterLabel }}=~"$cluster",job="kubelet", metrics_path="/metrics",instance=~"$instance"}[$__rate_interval])) by (instance, operation_type, le))
legendFormat: '{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}'
title: Operation Duration 99th quantile
type: timeseries
- datasource:
type: {{ $defaultDatasource }}
uid: -- Mixed --
fieldConfig:
defaults:
custom:
fillOpacity: 10
showPoints: never
spanNulls: true
unit: ops
gridPos:
h: 7
w: 12
x: 0
'y': 21
id: 10
interval: 1m
options:
legend:
asTable: true
calcs:
- lastNotNull
displayMode: table
placement: right
showLegend: true
tooltip:
mode: single
pluginVersion: v11.4.0
targets:
- datasource:
type: {{ $defaultDatasource }}
uid: ${datasource}
expr: sum(rate(kubelet_pod_start_duration_seconds_count{ {{ $clusterLabel }}=~"$cluster",job="kubelet", metrics_path="/metrics",instance=~"$instance"}[$__rate_interval])) by (instance)
legendFormat: '{{`{{`}}instance{{`}}`}} pod'
- datasource:
type: {{ $defaultDatasource }}
uid: ${datasource}
expr: sum(rate(kubelet_pod_worker_duration_seconds_count{ {{ $clusterLabel }}=~"$cluster",job="kubelet", metrics_path="/metrics",instance=~"$instance"}[$__rate_interval])) by (instance)
legendFormat: '{{`{{`}}instance{{`}}`}} worker'
title: Pod Start Rate
type: timeseries
- datasource:
type: {{ $defaultDatasource }}
uid: -- Mixed --
fieldConfig:
defaults:
custom:
fillOpacity: 10
showPoints: never
spanNulls: true
unit: s
gridPos:
h: 7
w: 12
x: 12
'y': 21
id: 11
interval: 1m
options:
legend:
asTable: true
calcs:
- lastNotNull
displayMode: table
placement: right
showLegend: true
tooltip:
mode: single
pluginVersion: v11.4.0
targets:
- datasource:
type: {{ $defaultDatasource }}
uid: ${datasource}
expr: histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_bucket{ {{ $clusterLabel }}=~"$cluster",job="kubelet", metrics_path="/metrics",instance=~"$instance"}[$__rate_interval])) by (instance, le))
legendFormat: '{{`{{`}}instance{{`}}`}} pod'
- datasource:
type: {{ $defaultDatasource }}
uid: ${datasource}
expr: histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{ {{ $clusterLabel }}=~"$cluster",job="kubelet", metrics_path="/metrics",instance=~"$instance"}[$__rate_interval])) by (instance, le))
legendFormat: '{{`{{`}}instance{{`}}`}} worker'
title: Pod Start Duration
type: timeseries
- datasource:
type: {{ $defaultDatasource }}
uid: -- Mixed --
fieldConfig:
defaults:
custom:
fillOpacity: 10
showPoints: never
spanNulls: true
unit: ops
gridPos:
h: 7
w: 12
x: 0
'y': 28
id: 12
interval: 1m
options:
legend:
asTable: true
calcs:
- lastNotNull
displayMode: table
placement: right
showLegend: true
tooltip:
mode: single
pluginVersion: v11.4.0
targets:
- datasource:
type: {{ $defaultDatasource }}
uid: ${datasource}
expr: sum(rate(storage_operation_duration_seconds_count{ {{ $clusterLabel }}=~"$cluster",job="kubelet", metrics_path="/metrics",instance=~"$instance"}[$__rate_interval])) by (instance, operation_name, volume_plugin)
legendFormat: '{{`{{`}}instance{{`}}`}} {{`{{`}}operation_name{{`}}`}} {{`{{`}}volume_plugin{{`}}`}}'
title: Storage Operation Rate
type: timeseries
- datasource:
type: {{ $defaultDatasource }}
uid: -- Mixed --
fieldConfig:
defaults:
custom:
fillOpacity: 10
showPoints: never
spanNulls: true
unit: ops
gridPos:
h: 7
w: 12
x: 12
'y': 28
id: 13
interval: 1m
options:
legend:
asTable: true
calcs:
- lastNotNull
displayMode: table
placement: right
showLegend: true
tooltip:
mode: single
pluginVersion: v11.4.0
targets:
- datasource:
type: {{ $defaultDatasource }}
uid: ${datasource}
expr: sum(rate(storage_operation_errors_total{ {{ $clusterLabel }}=~"$cluster",job="kubelet", metrics_path="/metrics",instance=~"$instance"}[$__rate_interval])) by (instance, operation_name, volume_plugin)
legendFormat: '{{`{{`}}instance{{`}}`}} {{`{{`}}operation_name{{`}}`}} {{`{{`}}volume_plugin{{`}}`}}'
title: Storage Operation Error Rate
type: timeseries
- datasource:
type: {{ $defaultDatasource }}
uid: -- Mixed --
fieldConfig:
defaults:
custom:
fillOpacity: 10
showPoints: never
spanNulls: true
unit: s
gridPos:
h: 7
w: 24
x: 0
'y': 35
id: 14
interval: 1m
options:
legend:
asTable: true
calcs:
- lastNotNull
displayMode: table
placement: right
showLegend: true
tooltip:
mode: single
pluginVersion: v11.4.0
targets:
- datasource:
type: {{ $defaultDatasource }}
uid: ${datasource}
expr: histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{ {{ $clusterLabel }}=~"$cluster", job="kubelet", metrics_path="/metrics", instance=~"$instance"}[$__rate_interval])) by (instance, operation_name, volume_plugin, le))
legendFormat: '{{`{{`}}instance{{`}}`}} {{`{{`}}operation_name{{`}}`}} {{`{{`}}volume_plugin{{`}}`}}'
title: Storage Operation Duration 99th quantile
type: timeseries
- datasource:
type: {{ $defaultDatasource }}
uid: -- Mixed --
fieldConfig:
defaults:
custom:
fillOpacity: 10
showPoints: never
spanNulls: true
unit: ops
gridPos:
h: 7
w: 12
x: 0
'y': 42
id: 15
interval: 1m
options:
legend:
asTable: true
calcs:
- lastNotNull
displayMode: table
placement: right
showLegend: true
tooltip:
mode: single
pluginVersion: v11.4.0
targets:
- datasource:
type: {{ $defaultDatasource }}
uid: ${datasource}
expr: sum(rate(kubelet_cgroup_manager_duration_seconds_count{ {{ $clusterLabel }}=~"$cluster", job="kubelet", metrics_path="/metrics", instance=~"$instance"}[$__rate_interval])) by (instance, operation_type)
legendFormat: '{{`{{`}}operation_type{{`}}`}}'
title: Cgroup manager operation rate
type: timeseries
- datasource:
type: {{ $defaultDatasource }}
uid: -- Mixed --
fieldConfig:
defaults:
custom:
fillOpacity: 10
showPoints: never
spanNulls: true
unit: s
gridPos:
h: 7
w: 12
x: 12
'y': 42
id: 16
interval: 1m
options:
legend:
asTable: true
calcs:
- lastNotNull
displayMode: table
placement: right
showLegend: true
tooltip:
mode: single
pluginVersion: v11.4.0
targets:
- datasource:
type: {{ $defaultDatasource }}
uid: ${datasource}
expr: histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{ {{ $clusterLabel }}=~"$cluster", job="kubelet", metrics_path="/metrics", instance=~"$instance"}[$__rate_interval])) by (instance, operation_type, le))
legendFormat: '{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}'
title: Cgroup manager 99th quantile
type: timeseries
- datasource:
type: {{ $defaultDatasource }}
uid: -- Mixed --
fieldConfig:
defaults:
custom:
fillOpacity: 10
showPoints: never
spanNulls: true
unit: ops
gridPos:
h: 7
w: 12
x: 0
'y': 49
id: 17
interval: 1m
options:
legend:
asTable: true
calcs:
- lastNotNull
displayMode: table
placement: right
showLegend: true
tooltip:
mode: single
pluginVersion: v11.4.0
targets:
- datasource:
type: {{ $defaultDatasource }}
uid: ${datasource}
expr: sum(rate(kubelet_pleg_relist_duration_seconds_count{ {{ $clusterLabel }}=~"$cluster", job="kubelet", metrics_path="/metrics", instance=~"$instance"}[$__rate_interval])) by (instance)
legendFormat: '{{`{{`}}instance{{`}}`}}'
title: PLEG relist rate
type: timeseries
- datasource:
type: {{ $defaultDatasource }}
uid: -- Mixed --
fieldConfig:
defaults:
custom:
fillOpacity: 10
showPoints: never
spanNulls: true
unit: s
gridPos:
h: 7
w: 12
x: 12
'y': 49
id: 18
interval: 1m
options:
legend:
asTable: true
calcs:
- lastNotNull
displayMode: table
placement: right
showLegend: true
tooltip:
mode: single
pluginVersion: v11.4.0
targets:
- datasource:
type: {{ $defaultDatasource }}
uid: ${datasource}
expr: histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{ {{ $clusterLabel }}=~"$cluster",job="kubelet", metrics_path="/metrics",instance=~"$instance"}[$__rate_interval])) by (instance, le))
legendFormat: '{{`{{`}}instance{{`}}`}}'
title: PLEG relist interval
type: timeseries
- datasource:
type: {{ $defaultDatasource }}
uid: -- Mixed --
fieldConfig:
defaults:
custom:
fillOpacity: 10
showPoints: never
spanNulls: true
unit: s
gridPos:
h: 7
w: 24
x: 0
'y': 56
id: 19
interval: 1m
options:
legend:
asTable: true
calcs:
- lastNotNull
displayMode: table
placement: right
showLegend: true
tooltip:
mode: single
pluginVersion: v11.4.0
targets:
- datasource:
type: {{ $defaultDatasource }}
uid: ${datasource}
expr: histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{ {{ $clusterLabel }}=~"$cluster",job="kubelet", metrics_path="/metrics",instance=~"$instance"}[$__rate_interval])) by (instance, le))
legendFormat: '{{`{{`}}instance{{`}}`}}'
title: PLEG relist duration
type: timeseries
- datasource:
type: {{ $defaultDatasource }}
uid: -- Mixed --
fieldConfig:
defaults:
custom:
fillOpacity: 10
showPoints: never
spanNulls: true
unit: ops
gridPos:
h: 7
w: 24
x: 0
'y': 63
id: 20
interval: 1m
options:
legend:
asTable: true
calcs:
- lastNotNull
displayMode: table
placement: right
showLegend: true
tooltip:
mode: single
pluginVersion: v11.4.0
targets:
- datasource:
type: {{ $defaultDatasource }}
uid: ${datasource}
expr: sum(rate(rest_client_requests_total{ {{ $clusterLabel }}=~"$cluster",job="kubelet", metrics_path="/metrics", instance=~"$instance",code=~"2.."}[$__rate_interval]))
legendFormat: 2xx
- datasource:
type: {{ $defaultDatasource }}
uid: ${datasource}
expr: sum(rate(rest_client_requests_total{ {{ $clusterLabel }}=~"$cluster",job="kubelet", metrics_path="/metrics", instance=~"$instance",code=~"3.."}[$__rate_interval]))
legendFormat: 3xx
- datasource:
type: {{ $defaultDatasource }}
uid: ${datasource}
expr: sum(rate(rest_client_requests_total{ {{ $clusterLabel }}=~"$cluster",job="kubelet", metrics_path="/metrics", instance=~"$instance",code=~"4.."}[$__rate_interval]))
legendFormat: 4xx
- datasource:
type: {{ $defaultDatasource }}
uid: ${datasource}
expr: sum(rate(rest_client_requests_total{ {{ $clusterLabel }}=~"$cluster",job="kubelet", metrics_path="/metrics", instance=~"$instance",code=~"5.."}[$__rate_interval]))
legendFormat: 5xx
title: RPC rate
type: timeseries
- datasource:
type: {{ $defaultDatasource }}
uid: -- Mixed --
fieldConfig:
defaults:
custom:
fillOpacity: 10
showPoints: never
spanNulls: true
unit: s
gridPos:
h: 7
w: 24
x: 0
'y': 70
id: 21
interval: 1m
options:
legend:
asTable: true
calcs:
- lastNotNull
displayMode: table
placement: right
showLegend: true
tooltip:
mode: single
pluginVersion: v11.4.0
targets:
- datasource:
type: {{ $defaultDatasource }}
uid: ${datasource}
expr: histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{ {{ $clusterLabel }}=~"$cluster",job="kubelet", metrics_path="/metrics", instance=~"$instance"}[$__rate_interval])) by (instance, verb, le))
legendFormat: '{{`{{`}}instance{{`}}`}} {{`{{`}}verb{{`}}`}}'
title: Request duration 99th quantile
type: timeseries
- datasource:
type: {{ $defaultDatasource }}
uid: -- Mixed --
fieldConfig:
defaults:
custom:
fillOpacity: 10
showPoints: never
spanNulls: true
unit: bytes
gridPos:
h: 7
w: 8
x: 0
'y': 77
id: 22
interval: 1m
options:
legend:
asTable: true
calcs:
- lastNotNull
displayMode: table
placement: right
showLegend: true
tooltip:
mode: single
pluginVersion: v11.4.0
targets:
- datasource:
type: {{ $defaultDatasource }}
uid: ${datasource}
expr: process_resident_memory_bytes{ {{ $clusterLabel }}=~"$cluster",job="kubelet", metrics_path="/metrics",instance=~"$instance"}
legendFormat: '{{`{{`}}instance{{`}}`}}'
title: Memory
type: timeseries
- datasource:
type: {{ $defaultDatasource }}
uid: -- Mixed --
fieldConfig:
defaults:
custom:
fillOpacity: 10
showPoints: never
spanNulls: true
unit: short
gridPos:
h: 7
w: 8
x: 8
'y': 77
id: 23
interval: 1m
options:
legend:
asTable: true
calcs:
- lastNotNull
displayMode: table
placement: right
showLegend: true
tooltip:
mode: single
pluginVersion: v11.4.0
targets:
- datasource:
type: {{ $defaultDatasource }}
uid: ${datasource}
expr: rate(process_cpu_seconds_total{ {{ $clusterLabel }}=~"$cluster",job="kubelet", metrics_path="/metrics",instance=~"$instance"}[$__rate_interval])
legendFormat: '{{`{{`}}instance{{`}}`}}'
title: CPU usage
type: timeseries
- datasource:
type: {{ $defaultDatasource }}
uid: -- Mixed --
fieldConfig:
defaults:
custom:
fillOpacity: 10
showPoints: never
spanNulls: true
unit: short
gridPos:
h: 7
w: 8
x: 16
'y': 77
id: 24
interval: 1m
options:
legend:
asTable: true
calcs:
- lastNotNull
displayMode: table
placement: right
showLegend: true
tooltip:
mode: single
pluginVersion: v11.4.0
targets:
- datasource:
type: {{ $defaultDatasource }}
uid: ${datasource}
expr: go_goroutines{ {{ $clusterLabel }}=~"$cluster",job="kubelet", metrics_path="/metrics",instance=~"$instance"}
legendFormat: '{{`{{`}}instance{{`}}`}}'
title: Goroutines
type: timeseries
refresh: 10s
schemaVersion: 39
tags:
- kubernetes-mixin
- vm-k8s-stack
templating:
list:
- current:
selected: true
text: default
value: default
hide: 0
label: Data source
name: datasource
query: {{ $defaultDatasource }}
regex: ''
type: datasource
- datasource:
type: prometheus
uid: ${datasource}
hide: {{ ternary 0 2 $multicluster }}
label: cluster
name: cluster
query: {{ ternary (b64dec "ImxhYmVsX3ZhbHVlcyh1cHtqb2I9XCJrdWJlbGV0XCIsIG1ldHJpY3NfcGF0aD1cIi9tZXRyaWNzXCJ9LCBjbHVzdGVyKSI=" | replace "cluster" $clusterLabel) ".*" $multicluster }}
refresh: 2
sort: 1
type: {{ ternary "query" "constant" $multicluster }}
- datasource:
type: prometheus
uid: ${datasource}
hide: 0
includeAll: true
label: instance
name: instance
query: label_values(up{job="kubelet", metrics_path="/metrics", {{ $clusterLabel }}=~"$cluster"}, instance)
refresh: 2
type: query
time:
from: now-1h
to: now
timezone: {{ default "UTC" ($Values.defaultDashboards).defaultTimezone }}
title: Kubernetes / Kubelet
uid: 3138fa155d5915769fbded898ac09fd9