3205 lines
78 KiB
YAML
3205 lines
78 KiB
YAML
{{- $Values := (.helm).Values | default .Values }}
|
|
{{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }}
|
|
{{- $multicluster := ((($Values.grafana).sidecar).dashboards).multicluster | default false }}
|
|
{{- $defaultDatasource := "prometheus" -}}
|
|
{{- range (((($Values.grafana).sidecar).datasources).victoriametrics | default list) }}
|
|
{{- if and .isDefault .type }}{{ $defaultDatasource = .type }}{{- end }}
|
|
{{- end }}
|
|
annotations:
|
|
list:
|
|
- builtIn: 1
|
|
datasource:
|
|
type: datasource
|
|
uid: grafana
|
|
enable: true
|
|
hide: true
|
|
iconColor: rgba(0, 211, 255, 1)
|
|
name: Annotations & Alerts
|
|
target:
|
|
limit: 100
|
|
matchAny: false
|
|
tags: []
|
|
type: dashboard
|
|
type: dashboard
|
|
- datasource:
|
|
type: prometheus
|
|
uid: $ds
|
|
enable: true
|
|
expr: sum(vm_app_version{job=~"$job", instance=~"$instance"}) by(short_version) unless (sum(vm_app_version{job=~"$job", instance=~"$instance"} offset $__interval) by(short_version))
|
|
hide: true
|
|
iconColor: dark-blue
|
|
name: version
|
|
textFormat: '{{`{{`}}short_version{{`}}`}}'
|
|
titleFormat: Version change
|
|
- datasource:
|
|
type: prometheus
|
|
uid: $ds
|
|
enable: true
|
|
expr: sum(changes(vm_app_start_timestamp{job=~"$job", instance=~"$instance"}[$__interval])) by(job, instance)
|
|
hide: false
|
|
iconColor: dark-yellow
|
|
name: restarts
|
|
textFormat: '{{`{{`}}job{{`}}`}}:{{`{{`}}instance{{`}}`}} restarted'
|
|
condition: {{ ($Values.vmalert).enabled }}
|
|
description: Overview for VictoriaMetrics vmalert v1.117.0 or higher
|
|
editable: false
|
|
fiscalYearStartMonth: 0
|
|
graphTooltip: 1
|
|
id: 3
|
|
links:
|
|
- asDropdown: false
|
|
icon: external link
|
|
includeVars: false
|
|
keepTime: false
|
|
tags: []
|
|
targetBlank: true
|
|
title: vmalert docs
|
|
tooltip: ''
|
|
type: link
|
|
url: https://docs.victoriametrics.com/victoriametrics/vmalert/
|
|
- asDropdown: false
|
|
icon: external link
|
|
includeVars: false
|
|
keepTime: false
|
|
tags: []
|
|
targetBlank: true
|
|
title: Found a bug?
|
|
tooltip: ''
|
|
type: link
|
|
url: ' https://github.com/VictoriaMetrics/VictoriaMetrics/issues'
|
|
- asDropdown: false
|
|
icon: external link
|
|
includeVars: false
|
|
keepTime: false
|
|
tags: []
|
|
targetBlank: true
|
|
title: New releases
|
|
tooltip: ''
|
|
type: link
|
|
url: ' https://github.com/VictoriaMetrics/VictoriaMetrics/releases'
|
|
panels:
|
|
- collapsed: false
|
|
gridPos:
|
|
h: 1
|
|
w: 24
|
|
x: 0
|
|
'y': 0
|
|
id: 47
|
|
panels: []
|
|
title: Stats
|
|
type: row
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
description: Shows if the last configuration update was successful. "Not Ok" means there was an unsuccessful attempt to update the configuration due to some error. Check the log for details.
|
|
fieldConfig:
|
|
defaults:
|
|
mappings:
|
|
- options:
|
|
match: 'null'
|
|
result:
|
|
color: green
|
|
index: 0
|
|
text: Ok
|
|
type: special
|
|
- options:
|
|
from: 1
|
|
result:
|
|
color: red
|
|
index: 1
|
|
text: Not Ok
|
|
to: 999999
|
|
type: range
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
overrides: []
|
|
gridPos:
|
|
h: 3
|
|
w: 4
|
|
x: 0
|
|
'y': 1
|
|
id: 6
|
|
options:
|
|
colorMode: value
|
|
graphMode: area
|
|
justifyMode: auto
|
|
orientation: auto
|
|
percentChangeColorMode: standard
|
|
reduceOptions:
|
|
calcs:
|
|
- last
|
|
fields: ''
|
|
values: false
|
|
showPercentChange: false
|
|
text: {}
|
|
textMode: auto
|
|
wideLayout: true
|
|
pluginVersion: 12.0.2
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
exemplar: false
|
|
expr: count(vmalert_config_last_reload_successful{job=~"$job", instance=~"$instance"} < 1 )
|
|
interval: ''
|
|
legendFormat: ''
|
|
range: true
|
|
refId: A
|
|
title: Config update
|
|
type: stat
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
description: Shows the total number of loaded alerting rules across selected instances and groups.
|
|
fieldConfig:
|
|
defaults:
|
|
mappings: []
|
|
min: 0
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
overrides: []
|
|
gridPos:
|
|
h: 3
|
|
w: 5
|
|
x: 4
|
|
'y': 1
|
|
id: 9
|
|
options:
|
|
colorMode: value
|
|
graphMode: area
|
|
justifyMode: auto
|
|
orientation: auto
|
|
percentChangeColorMode: standard
|
|
reduceOptions:
|
|
calcs:
|
|
- last
|
|
fields: ''
|
|
values: false
|
|
showPercentChange: false
|
|
text: {}
|
|
textMode: auto
|
|
wideLayout: true
|
|
pluginVersion: 12.0.2
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
exemplar: false
|
|
expr: count(vmalert_alerting_rules_last_evaluation_samples{job=~"$job", instance=~"$instance", group=~"$group", file=~"$file"})
|
|
interval: ''
|
|
legendFormat: ''
|
|
refId: A
|
|
title: Alerting rules
|
|
type: stat
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
description: Shows the total number of loaded recording rules across selected instances and groups.
|
|
fieldConfig:
|
|
defaults:
|
|
mappings: []
|
|
min: 0
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
overrides: []
|
|
gridPos:
|
|
h: 3
|
|
w: 5
|
|
x: 9
|
|
'y': 1
|
|
id: 7
|
|
options:
|
|
colorMode: value
|
|
graphMode: area
|
|
justifyMode: auto
|
|
orientation: auto
|
|
percentChangeColorMode: standard
|
|
reduceOptions:
|
|
calcs:
|
|
- last
|
|
fields: ''
|
|
values: false
|
|
showPercentChange: false
|
|
text: {}
|
|
textMode: auto
|
|
wideLayout: true
|
|
pluginVersion: 12.0.2
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
exemplar: false
|
|
expr: count(vmalert_recording_rules_last_evaluation_samples{job=~"$job", instance=~"$instance", group=~"$group", file=~"$file"})
|
|
interval: ''
|
|
legendFormat: ''
|
|
refId: A
|
|
title: Recording rules
|
|
type: stat
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
description: Shows the total number of errors generated by recording/alerting rules for selected instances and groups.
|
|
fieldConfig:
|
|
defaults:
|
|
mappings: []
|
|
min: 0
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
- color: red
|
|
value: 1
|
|
overrides: []
|
|
gridPos:
|
|
h: 3
|
|
w: 5
|
|
x: 14
|
|
'y': 1
|
|
id: 8
|
|
options:
|
|
colorMode: value
|
|
graphMode: area
|
|
justifyMode: auto
|
|
orientation: auto
|
|
percentChangeColorMode: standard
|
|
reduceOptions:
|
|
calcs:
|
|
- last
|
|
fields: ''
|
|
values: false
|
|
showPercentChange: false
|
|
text: {}
|
|
textMode: auto
|
|
wideLayout: true
|
|
pluginVersion: 12.0.2
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
exemplar: false
|
|
expr: "(sum(increase(vmalert_alerting_rules_errors_total{job=~\"$job\", instance=~\"$instance\", group=~\"$group\", file=~\"$file\"}[$__rate_interval]))) + \n(sum(increase(vmalert_recording_rules_errors_total{job=~\"$job\", instance=~\"$instance\", group=~\"$group\", file=~\"$file\"}[$__rate_interval])))"
|
|
interval: ''
|
|
legendFormat: ''
|
|
range: true
|
|
refId: A
|
|
title: Errors
|
|
type: stat
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
description: "Shows number of Recording Rules which produce no data.\n\n Usually it means that such rules are misconfigured, since they give no output during the evaluation.\nPlease check if rule's expression is correct and it is working as expected."
|
|
fieldConfig:
|
|
defaults:
|
|
mappings:
|
|
- options:
|
|
match: 'null'
|
|
result:
|
|
index: 1
|
|
text: '0'
|
|
type: special
|
|
min: 0
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
- color: red
|
|
value: 1
|
|
overrides: []
|
|
gridPos:
|
|
h: 3
|
|
w: 5
|
|
x: 19
|
|
'y': 1
|
|
id: 48
|
|
options:
|
|
colorMode: value
|
|
graphMode: area
|
|
justifyMode: auto
|
|
orientation: auto
|
|
percentChangeColorMode: standard
|
|
reduceOptions:
|
|
calcs:
|
|
- last
|
|
fields: ''
|
|
values: false
|
|
showPercentChange: false
|
|
text: {}
|
|
textMode: auto
|
|
wideLayout: true
|
|
pluginVersion: 12.0.2
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
exemplar: false
|
|
expr: count(vmalert_recording_rules_last_evaluation_samples{job=~"$job", instance=~"$instance"} < 1)
|
|
interval: ''
|
|
legendFormat: ''
|
|
range: true
|
|
refId: A
|
|
title: No data errors
|
|
type: stat
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
fieldConfig:
|
|
defaults:
|
|
color:
|
|
mode: thresholds
|
|
custom:
|
|
align: auto
|
|
cellOptions:
|
|
type: auto
|
|
inspect: false
|
|
minWidth: 50
|
|
mappings: []
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
- color: red
|
|
value: 80
|
|
overrides:
|
|
- matcher:
|
|
id: byName
|
|
options: Time
|
|
properties:
|
|
- id: custom.hidden
|
|
value: true
|
|
- matcher:
|
|
id: byName
|
|
options: Value
|
|
properties:
|
|
- id: displayName
|
|
value: Count
|
|
gridPos:
|
|
h: 4
|
|
w: 9
|
|
x: 0
|
|
'y': 4
|
|
id: 45
|
|
options:
|
|
cellHeight: sm
|
|
footer:
|
|
countRows: false
|
|
fields: ''
|
|
reducer:
|
|
- sum
|
|
show: false
|
|
showHeader: true
|
|
pluginVersion: 12.0.2
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
exemplar: false
|
|
expr: sum(vm_app_version{job=~"$job", instance=~"$instance"}) by(job, short_version)
|
|
format: table
|
|
instant: true
|
|
range: false
|
|
refId: A
|
|
title: ''
|
|
type: table
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
fieldConfig:
|
|
defaults:
|
|
color:
|
|
mode: palette-classic
|
|
custom:
|
|
axisBorderShow: false
|
|
axisCenteredZero: false
|
|
axisColorMode: text
|
|
axisLabel: ''
|
|
axisPlacement: auto
|
|
barAlignment: 0
|
|
barWidthFactor: 0.6
|
|
drawStyle: line
|
|
fillOpacity: 0
|
|
gradientMode: none
|
|
hideFrom:
|
|
legend: false
|
|
tooltip: false
|
|
viz: false
|
|
insertNulls: false
|
|
lineInterpolation: stepAfter
|
|
lineWidth: 1
|
|
pointSize: 5
|
|
scaleDistribution:
|
|
type: linear
|
|
showPoints: never
|
|
spanNulls: false
|
|
stacking:
|
|
group: A
|
|
mode: none
|
|
thresholdsStyle:
|
|
mode: 'off'
|
|
decimals: 0
|
|
links: []
|
|
mappings: []
|
|
min: 0
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
- color: red
|
|
value: 80
|
|
unit: short
|
|
overrides: []
|
|
gridPos:
|
|
h: 4
|
|
w: 15
|
|
x: 9
|
|
'y': 4
|
|
id: 4
|
|
options:
|
|
legend:
|
|
calcs:
|
|
- lastNotNull
|
|
displayMode: table
|
|
placement: right
|
|
showLegend: true
|
|
sortBy: Last *
|
|
sortDesc: true
|
|
tooltip:
|
|
hideZeros: false
|
|
mode: multi
|
|
sort: asc
|
|
pluginVersion: 12.0.2
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
exemplar: false
|
|
expr: sum(min_over_time(up{job=~"$job", instance=~"$instance"}[$__rate_interval])) by (job)
|
|
format: time_series
|
|
instant: false
|
|
interval: ''
|
|
legendFormat: '{{`{{`}}job{{`}}`}}'
|
|
refId: A
|
|
title: Uptime
|
|
type: timeseries
|
|
- collapsed: false
|
|
gridPos:
|
|
h: 1
|
|
w: 24
|
|
x: 0
|
|
'y': 8
|
|
id: 11
|
|
panels: []
|
|
title: Overview ($instance)
|
|
type: row
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
description: Shows the number of fired alerts by job.
|
|
fieldConfig:
|
|
defaults:
|
|
color:
|
|
mode: palette-classic
|
|
custom:
|
|
axisBorderShow: false
|
|
axisCenteredZero: false
|
|
axisColorMode: text
|
|
axisLabel: ''
|
|
axisPlacement: auto
|
|
barAlignment: 0
|
|
barWidthFactor: 0.6
|
|
drawStyle: line
|
|
fillOpacity: 0
|
|
gradientMode: none
|
|
hideFrom:
|
|
legend: false
|
|
tooltip: false
|
|
viz: false
|
|
insertNulls: false
|
|
lineInterpolation: linear
|
|
lineWidth: 1
|
|
pointSize: 5
|
|
scaleDistribution:
|
|
type: linear
|
|
showPoints: never
|
|
spanNulls: false
|
|
stacking:
|
|
group: A
|
|
mode: none
|
|
thresholdsStyle:
|
|
mode: 'off'
|
|
mappings: []
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
- color: red
|
|
value: 80
|
|
unit: short
|
|
overrides: []
|
|
gridPos:
|
|
h: 8
|
|
w: 12
|
|
x: 0
|
|
'y': 9
|
|
id: 15
|
|
options:
|
|
legend:
|
|
calcs:
|
|
- mean
|
|
- lastNotNull
|
|
- max
|
|
displayMode: table
|
|
placement: bottom
|
|
showLegend: true
|
|
sortBy: Last *
|
|
sortDesc: true
|
|
tooltip:
|
|
hideZeros: false
|
|
mode: multi
|
|
sort: desc
|
|
pluginVersion: 12.0.2
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
exemplar: false
|
|
expr: sum(increase(vmalert_alerts_fired_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job)
|
|
interval: ''
|
|
legendFormat: '{{`{{`}}job{{`}}`}}'
|
|
range: true
|
|
refId: A
|
|
title: Alerts fired total ($instance)
|
|
type: timeseries
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
description: Top $topk groups by evaluation duration. Shows groups that take the most of time during the evaluation across all instances.
|
|
fieldConfig:
|
|
defaults:
|
|
color:
|
|
mode: palette-classic
|
|
custom:
|
|
axisBorderShow: false
|
|
axisCenteredZero: false
|
|
axisColorMode: text
|
|
axisLabel: ''
|
|
axisPlacement: auto
|
|
barAlignment: 0
|
|
barWidthFactor: 0.6
|
|
drawStyle: line
|
|
fillOpacity: 0
|
|
gradientMode: none
|
|
hideFrom:
|
|
legend: false
|
|
tooltip: false
|
|
viz: false
|
|
insertNulls: false
|
|
lineInterpolation: linear
|
|
lineWidth: 1
|
|
pointSize: 5
|
|
scaleDistribution:
|
|
type: linear
|
|
showPoints: never
|
|
spanNulls: false
|
|
stacking:
|
|
group: A
|
|
mode: none
|
|
thresholdsStyle:
|
|
mode: 'off'
|
|
mappings: []
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
- color: red
|
|
value: 80
|
|
unit: s
|
|
overrides: []
|
|
gridPos:
|
|
h: 8
|
|
w: 12
|
|
x: 12
|
|
'y': 9
|
|
id: 23
|
|
options:
|
|
legend:
|
|
calcs:
|
|
- mean
|
|
- lastNotNull
|
|
- max
|
|
displayMode: table
|
|
placement: bottom
|
|
showLegend: true
|
|
sortBy: Last *
|
|
sortDesc: true
|
|
tooltip:
|
|
hideZeros: false
|
|
mode: multi
|
|
sort: desc
|
|
pluginVersion: 12.0.2
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
exemplar: false
|
|
expr: "topk($topk, max(sum(\n rate(vmalert_iteration_duration_seconds_sum{job=~\"$job\", instance=~\"$instance\", group=~\"$group\", file=~\"$file\"}[$__rate_interval])\n/\n rate(vmalert_iteration_duration_seconds_count{job=~\"$job\", instance=~\"$instance\", group=~\"$group\", file=~\"$file\"}[$__rate_interval])\n) by(job, instance, group, file)) \nby(job, group, file))"
|
|
interval: ''
|
|
legendFormat: ({{`{{`}}job{{`}}`}}) {{`{{`}}group{{`}}`}}({{`{{`}}file{{`}}`}})
|
|
range: true
|
|
refId: A
|
|
title: Top $topk groups avg evaluation duration ($group)
|
|
type: timeseries
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
description: Shows how many requests (executions) per second vmalert sends to the configured datasource.
|
|
fieldConfig:
|
|
defaults:
|
|
color:
|
|
mode: palette-classic
|
|
custom:
|
|
axisBorderShow: false
|
|
axisCenteredZero: false
|
|
axisColorMode: text
|
|
axisLabel: ''
|
|
axisPlacement: auto
|
|
barAlignment: 0
|
|
barWidthFactor: 0.6
|
|
drawStyle: line
|
|
fillOpacity: 0
|
|
gradientMode: none
|
|
hideFrom:
|
|
legend: false
|
|
tooltip: false
|
|
viz: false
|
|
insertNulls: false
|
|
lineInterpolation: linear
|
|
lineWidth: 1
|
|
pointSize: 5
|
|
scaleDistribution:
|
|
type: linear
|
|
showPoints: never
|
|
spanNulls: false
|
|
stacking:
|
|
group: A
|
|
mode: none
|
|
thresholdsStyle:
|
|
mode: 'off'
|
|
mappings: []
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
- color: red
|
|
value: 80
|
|
unit: short
|
|
overrides: []
|
|
gridPos:
|
|
h: 8
|
|
w: 12
|
|
x: 0
|
|
'y': 17
|
|
id: 24
|
|
options:
|
|
legend:
|
|
calcs:
|
|
- mean
|
|
- lastNotNull
|
|
- max
|
|
displayMode: table
|
|
placement: bottom
|
|
showLegend: true
|
|
tooltip:
|
|
hideZeros: false
|
|
mode: multi
|
|
sort: desc
|
|
pluginVersion: 12.0.2
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
exemplar: false
|
|
expr: sum(rate(vmalert_execution_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by (job)
|
|
interval: ''
|
|
legendFormat: '{{`{{`}}job{{`}}`}}'
|
|
range: true
|
|
refId: A
|
|
title: Rules execution rate ($instance)
|
|
type: timeseries
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
description: Shows the error rate while executing configured rules. Non-zero value means there are some issues with existing rules. Check the logs to get more details.
|
|
fieldConfig:
|
|
defaults:
|
|
color:
|
|
mode: palette-classic
|
|
custom:
|
|
axisBorderShow: false
|
|
axisCenteredZero: false
|
|
axisColorMode: text
|
|
axisLabel: ''
|
|
axisPlacement: auto
|
|
barAlignment: 0
|
|
barWidthFactor: 0.6
|
|
drawStyle: line
|
|
fillOpacity: 10
|
|
gradientMode: none
|
|
hideFrom:
|
|
legend: false
|
|
tooltip: false
|
|
viz: false
|
|
insertNulls: false
|
|
lineInterpolation: linear
|
|
lineWidth: 1
|
|
pointSize: 5
|
|
scaleDistribution:
|
|
type: linear
|
|
showPoints: never
|
|
spanNulls: false
|
|
stacking:
|
|
group: A
|
|
mode: none
|
|
thresholdsStyle:
|
|
mode: 'off'
|
|
mappings: []
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
- color: red
|
|
value: 80
|
|
unit: short
|
|
overrides: []
|
|
gridPos:
|
|
h: 8
|
|
w: 12
|
|
x: 12
|
|
'y': 17
|
|
id: 25
|
|
options:
|
|
legend:
|
|
calcs:
|
|
- mean
|
|
- lastNotNull
|
|
- max
|
|
displayMode: table
|
|
placement: bottom
|
|
showLegend: true
|
|
tooltip:
|
|
hideZeros: false
|
|
mode: multi
|
|
sort: none
|
|
pluginVersion: 12.0.2
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
exemplar: false
|
|
expr: sum(rate(vmalert_execution_errors_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job) > 0
|
|
interval: ''
|
|
legendFormat: __auto
|
|
range: true
|
|
refId: A
|
|
title: Rules execution errors ($instance)
|
|
type: timeseries
|
|
- collapsed: true
|
|
gridPos:
|
|
h: 1
|
|
w: 24
|
|
x: 0
|
|
'y': 25
|
|
id: 43
|
|
panels:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
description: 'The percentage of used RSS memory
|
|
|
|
|
|
If you think that usage is abnormal or unexpected, please file an issue and attach memory profile if possible.'
|
|
fieldConfig:
|
|
defaults:
|
|
color:
|
|
mode: palette-classic
|
|
custom:
|
|
axisBorderShow: false
|
|
axisCenteredZero: false
|
|
axisColorMode: text
|
|
axisLabel: ''
|
|
axisPlacement: auto
|
|
barAlignment: 0
|
|
barWidthFactor: 0.6
|
|
drawStyle: line
|
|
fillOpacity: 0
|
|
gradientMode: none
|
|
hideFrom:
|
|
legend: false
|
|
tooltip: false
|
|
viz: false
|
|
insertNulls: false
|
|
lineInterpolation: linear
|
|
lineWidth: 1
|
|
pointSize: 5
|
|
scaleDistribution:
|
|
type: linear
|
|
showPoints: never
|
|
spanNulls: false
|
|
stacking:
|
|
group: A
|
|
mode: none
|
|
thresholdsStyle:
|
|
mode: 'off'
|
|
links: []
|
|
mappings: []
|
|
min: 0
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
- color: red
|
|
value: 80
|
|
unit: percentunit
|
|
overrides: []
|
|
gridPos:
|
|
h: 8
|
|
w: 12
|
|
x: 0
|
|
'y': 162
|
|
id: 37
|
|
links:
|
|
- targetBlank: true
|
|
title: Profiling
|
|
url: https://docs.victoriametrics.com/victoriametrics/vmagent/#profiling
|
|
options:
|
|
legend:
|
|
calcs:
|
|
- mean
|
|
- lastNotNull
|
|
- max
|
|
displayMode: table
|
|
placement: bottom
|
|
showLegend: true
|
|
sortBy: Last *
|
|
sortDesc: true
|
|
tooltip:
|
|
hideZeros: false
|
|
mode: multi
|
|
sort: desc
|
|
pluginVersion: 11.5.0
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
exemplar: false
|
|
expr: |-
|
|
max(
|
|
max_over_time(process_resident_memory_bytes{job=~"$job", instance=~"$instance"}[$__rate_interval])
|
|
/
|
|
vm_available_memory_bytes{job=~"$job", instance=~"$instance"}
|
|
) by(job)
|
|
interval: ''
|
|
legendFormat: __auto
|
|
range: true
|
|
refId: A
|
|
title: Memory usage % ($instance)
|
|
type: timeseries
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
description: "Shows the CPU usage percentage per vmalert instance. \nIf you think that usage is abnormal or unexpected pls file an issue and attach CPU profile if possible."
|
|
fieldConfig:
|
|
defaults:
|
|
color:
|
|
mode: palette-classic
|
|
custom:
|
|
axisBorderShow: false
|
|
axisCenteredZero: false
|
|
axisColorMode: text
|
|
axisLabel: ''
|
|
axisPlacement: auto
|
|
barAlignment: 0
|
|
barWidthFactor: 0.6
|
|
drawStyle: line
|
|
fillOpacity: 0
|
|
gradientMode: none
|
|
hideFrom:
|
|
legend: false
|
|
tooltip: false
|
|
viz: false
|
|
insertNulls: false
|
|
lineInterpolation: linear
|
|
lineWidth: 1
|
|
pointSize: 5
|
|
scaleDistribution:
|
|
type: linear
|
|
showPoints: never
|
|
spanNulls: false
|
|
stacking:
|
|
group: A
|
|
mode: none
|
|
thresholdsStyle:
|
|
mode: 'off'
|
|
links: []
|
|
mappings: []
|
|
min: 0
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
- color: red
|
|
value: 80
|
|
unit: percentunit
|
|
overrides: []
|
|
gridPos:
|
|
h: 8
|
|
w: 12
|
|
x: 12
|
|
'y': 162
|
|
id: 35
|
|
links:
|
|
- targetBlank: true
|
|
title: Profiling
|
|
url: https://docs.victoriametrics.com/victoriametrics/vmagent/#profiling
|
|
options:
|
|
legend:
|
|
calcs:
|
|
- mean
|
|
- lastNotNull
|
|
- max
|
|
displayMode: table
|
|
placement: bottom
|
|
showLegend: true
|
|
sortBy: Last *
|
|
sortDesc: true
|
|
tooltip:
|
|
hideZeros: false
|
|
mode: multi
|
|
sort: desc
|
|
pluginVersion: 11.5.0
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
exemplar: false
|
|
expr: "max(\n rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) \n / \n process_cpu_cores_available{job=~\"$job\", instance=~\"$instance\"}\n) by(job)"
|
|
format: time_series
|
|
interval: ''
|
|
intervalFactor: 1
|
|
legendFormat: '{{`{{`}}job{{`}}`}}'
|
|
range: true
|
|
refId: A
|
|
title: CPU usage %($instance)
|
|
type: timeseries
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
description: 'Share for memory allocated by the process itself. When memory usage reaches 100% it will be likely OOM-killed.
|
|
|
|
Safe memory usage % considered to be below 80%'
|
|
fieldConfig:
|
|
defaults:
|
|
color:
|
|
mode: palette-classic
|
|
custom:
|
|
axisBorderShow: false
|
|
axisCenteredZero: false
|
|
axisColorMode: text
|
|
axisLabel: ''
|
|
axisPlacement: auto
|
|
barAlignment: 0
|
|
barWidthFactor: 0.6
|
|
drawStyle: line
|
|
fillOpacity: 0
|
|
gradientMode: none
|
|
hideFrom:
|
|
legend: false
|
|
tooltip: false
|
|
viz: false
|
|
insertNulls: false
|
|
lineInterpolation: linear
|
|
lineWidth: 1
|
|
pointSize: 5
|
|
scaleDistribution:
|
|
type: linear
|
|
showPoints: never
|
|
spanNulls: false
|
|
stacking:
|
|
group: A
|
|
mode: none
|
|
thresholdsStyle:
|
|
mode: 'off'
|
|
links: []
|
|
mappings: []
|
|
min: 0
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
- color: red
|
|
value: 80
|
|
unit: percentunit
|
|
overrides: []
|
|
gridPos:
|
|
h: 8
|
|
w: 12
|
|
x: 0
|
|
'y': 170
|
|
id: 65
|
|
options:
|
|
legend:
|
|
calcs:
|
|
- mean
|
|
- lastNotNull
|
|
- max
|
|
displayMode: table
|
|
placement: bottom
|
|
showLegend: true
|
|
sortBy: Last *
|
|
sortDesc: true
|
|
tooltip:
|
|
hideZeros: false
|
|
mode: multi
|
|
sort: desc
|
|
pluginVersion: 11.5.0
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
exemplar: false
|
|
expr: |-
|
|
max(
|
|
max_over_time(process_resident_memory_anon_bytes{job=~"$job", instance=~"$instance"}[$__rate_interval])
|
|
/
|
|
vm_available_memory_bytes{job=~"$job", instance=~"$instance"}
|
|
) by(instance)
|
|
interval: ''
|
|
legendFormat: __auto
|
|
range: true
|
|
refId: A
|
|
title: RSS anonymous memory % usage
|
|
type: timeseries
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
description: Shows the max number of CPU cores used by a `job` and the corresponding limit.
|
|
fieldConfig:
|
|
defaults:
|
|
color:
|
|
mode: palette-classic
|
|
custom:
|
|
axisBorderShow: false
|
|
axisCenteredZero: false
|
|
axisColorMode: text
|
|
axisLabel: ''
|
|
axisPlacement: auto
|
|
barAlignment: 0
|
|
barWidthFactor: 0.6
|
|
drawStyle: line
|
|
fillOpacity: 0
|
|
gradientMode: none
|
|
hideFrom:
|
|
legend: false
|
|
tooltip: false
|
|
viz: false
|
|
insertNulls: false
|
|
lineInterpolation: linear
|
|
lineWidth: 1
|
|
pointSize: 5
|
|
scaleDistribution:
|
|
type: linear
|
|
showPoints: never
|
|
spanNulls: false
|
|
stacking:
|
|
group: A
|
|
mode: none
|
|
thresholdsStyle:
|
|
mode: 'off'
|
|
links: []
|
|
mappings: []
|
|
min: 0
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
- color: red
|
|
value: 80
|
|
unit: short
|
|
overrides: []
|
|
gridPos:
|
|
h: 8
|
|
w: 12
|
|
x: 12
|
|
'y': 170
|
|
id: 56
|
|
links:
|
|
- targetBlank: true
|
|
title: Profiling
|
|
url: https://docs.victoriametrics.com/victoriametrics/vmagent/#profiling
|
|
options:
|
|
legend:
|
|
calcs:
|
|
- mean
|
|
- lastNotNull
|
|
- max
|
|
displayMode: table
|
|
placement: bottom
|
|
showLegend: true
|
|
sortBy: Last *
|
|
sortDesc: true
|
|
tooltip:
|
|
hideZeros: false
|
|
mode: multi
|
|
sort: desc
|
|
pluginVersion: 11.5.0
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
exemplar: false
|
|
expr: max(rate(process_cpu_seconds_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job)
|
|
format: time_series
|
|
interval: ''
|
|
intervalFactor: 1
|
|
legendFormat: '{{`{{`}}job{{`}}`}}'
|
|
range: true
|
|
refId: A
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
exemplar: false
|
|
expr: min(process_cpu_cores_available{job=~"$job", instance=~"$instance"}) by(job)
|
|
format: time_series
|
|
hide: false
|
|
interval: ''
|
|
intervalFactor: 1
|
|
legendFormat: limit ({{`{{`}}job{{`}}`}})
|
|
range: true
|
|
refId: B
|
|
title: CPU usage ($instance)
|
|
type: timeseries
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
description: 'Amount of used RSS memory
|
|
|
|
|
|
If you think that usage is abnormal or unexpected, please file an issue and attach memory profile if possible.'
|
|
fieldConfig:
|
|
defaults:
|
|
color:
|
|
mode: palette-classic
|
|
custom:
|
|
axisBorderShow: false
|
|
axisCenteredZero: false
|
|
axisColorMode: text
|
|
axisLabel: ''
|
|
axisPlacement: auto
|
|
barAlignment: 0
|
|
barWidthFactor: 0.6
|
|
drawStyle: line
|
|
fillOpacity: 0
|
|
gradientMode: none
|
|
hideFrom:
|
|
legend: false
|
|
tooltip: false
|
|
viz: false
|
|
insertNulls: false
|
|
lineInterpolation: linear
|
|
lineWidth: 1
|
|
pointSize: 5
|
|
scaleDistribution:
|
|
type: linear
|
|
showPoints: never
|
|
spanNulls: false
|
|
stacking:
|
|
group: A
|
|
mode: none
|
|
thresholdsStyle:
|
|
mode: 'off'
|
|
links: []
|
|
mappings: []
|
|
min: 0
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
- color: red
|
|
value: 80
|
|
unit: bytes
|
|
overrides: []
|
|
gridPos:
|
|
h: 8
|
|
w: 12
|
|
x: 0
|
|
'y': 178
|
|
id: 57
|
|
links:
|
|
- targetBlank: true
|
|
title: Profiling
|
|
url: https://docs.victoriametrics.com/victoriametrics/vmagent/#profiling
|
|
options:
|
|
legend:
|
|
calcs:
|
|
- mean
|
|
- lastNotNull
|
|
- max
|
|
displayMode: table
|
|
placement: bottom
|
|
showLegend: true
|
|
sortBy: Last *
|
|
sortDesc: true
|
|
tooltip:
|
|
hideZeros: false
|
|
mode: multi
|
|
sort: desc
|
|
pluginVersion: 11.5.0
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
exemplar: false
|
|
expr: |-
|
|
max(
|
|
max_over_time(process_resident_memory_bytes{job=~"$job", instance=~"$instance"}[$__rate_interval])
|
|
) by(job)
|
|
interval: ''
|
|
legendFormat: '{{`{{`}}job{{`}}`}}'
|
|
range: true
|
|
refId: A
|
|
title: Memory usage ($instance)
|
|
type: timeseries
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
description: 'Shows CPU pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).
|
|
|
|
|
|
The lower the better.'
|
|
fieldConfig:
|
|
defaults:
|
|
color:
|
|
mode: palette-classic
|
|
custom:
|
|
axisBorderShow: false
|
|
axisCenteredZero: false
|
|
axisColorMode: text
|
|
axisLabel: ''
|
|
axisPlacement: auto
|
|
barAlignment: 0
|
|
barWidthFactor: 0.6
|
|
drawStyle: line
|
|
fillOpacity: 0
|
|
gradientMode: none
|
|
hideFrom:
|
|
legend: false
|
|
tooltip: false
|
|
viz: false
|
|
insertNulls: false
|
|
lineInterpolation: linear
|
|
lineWidth: 1
|
|
pointSize: 5
|
|
scaleDistribution:
|
|
type: linear
|
|
showPoints: never
|
|
spanNulls: false
|
|
stacking:
|
|
group: A
|
|
mode: none
|
|
thresholdsStyle:
|
|
mode: line
|
|
decimals: 0
|
|
links: []
|
|
mappings: []
|
|
min: 0
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
unit: s
|
|
overrides: []
|
|
gridPos:
|
|
h: 8
|
|
w: 12
|
|
x: 12
|
|
'y': 178
|
|
id: 66
|
|
options:
|
|
legend:
|
|
calcs:
|
|
- mean
|
|
- lastNotNull
|
|
- max
|
|
displayMode: table
|
|
placement: bottom
|
|
showLegend: true
|
|
sortBy: Last *
|
|
sortDesc: true
|
|
tooltip:
|
|
hideZeros: false
|
|
mode: multi
|
|
sort: desc
|
|
pluginVersion: 11.5.0
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
expr: sum(rate(process_pressure_cpu_waiting_seconds_total{job=~"$job"}[$__rate_interval])) by (job, instance)
|
|
format: time_series
|
|
interval: ''
|
|
intervalFactor: 2
|
|
legendFormat: '{{`{{`}}instance{{`}}`}} - waiting'
|
|
range: true
|
|
refId: A
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
expr: sum(rate(process_pressure_cpu_stalled_seconds_total{job=~"$job"}[$__rate_interval])) by (job, instance)
|
|
format: time_series
|
|
hide: false
|
|
interval: ''
|
|
intervalFactor: 2
|
|
legendFormat: '{{`{{`}}instance{{`}}`}} - stalled'
|
|
range: true
|
|
refId: B
|
|
title: CPU pressure
|
|
type: timeseries
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
description: 'Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).
|
|
|
|
|
|
The lower the better.'
|
|
fieldConfig:
|
|
defaults:
|
|
color:
|
|
mode: palette-classic
|
|
custom:
|
|
axisBorderShow: false
|
|
axisCenteredZero: false
|
|
axisColorMode: text
|
|
axisLabel: ''
|
|
axisPlacement: auto
|
|
barAlignment: 0
|
|
barWidthFactor: 0.6
|
|
drawStyle: line
|
|
fillOpacity: 0
|
|
gradientMode: none
|
|
hideFrom:
|
|
legend: false
|
|
tooltip: false
|
|
viz: false
|
|
insertNulls: false
|
|
lineInterpolation: linear
|
|
lineWidth: 1
|
|
pointSize: 5
|
|
scaleDistribution:
|
|
type: linear
|
|
showPoints: never
|
|
spanNulls: false
|
|
stacking:
|
|
group: A
|
|
mode: none
|
|
thresholdsStyle:
|
|
mode: line
|
|
decimals: 0
|
|
links: []
|
|
mappings: []
|
|
min: 0
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
unit: s
|
|
overrides: []
|
|
gridPos:
|
|
h: 8
|
|
w: 12
|
|
x: 0
|
|
'y': 186
|
|
id: 67
|
|
options:
|
|
legend:
|
|
calcs:
|
|
- mean
|
|
- lastNotNull
|
|
- max
|
|
displayMode: table
|
|
placement: bottom
|
|
showLegend: true
|
|
sortBy: Last *
|
|
sortDesc: true
|
|
tooltip:
|
|
hideZeros: false
|
|
mode: multi
|
|
sort: desc
|
|
pluginVersion: 11.5.0
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
expr: sum(rate(process_pressure_memory_waiting_seconds_total{job=~"$job"}[$__rate_interval])) by (job, instance)
|
|
format: time_series
|
|
interval: ''
|
|
intervalFactor: 2
|
|
legendFormat: '{{`{{`}}instance{{`}}`}} - waiting'
|
|
range: true
|
|
refId: A
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
expr: sum(rate(process_pressure_memory_stalled_seconds_total{job=~"$job"}[$__rate_interval])) by (job, instance)
|
|
format: time_series
|
|
hide: false
|
|
interval: ''
|
|
intervalFactor: 2
|
|
legendFormat: '{{`{{`}}instance{{`}}`}} - stalled'
|
|
range: true
|
|
refId: B
|
|
title: Memory pressure
|
|
type: timeseries
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
fieldConfig:
|
|
defaults:
|
|
color:
|
|
mode: palette-classic
|
|
custom:
|
|
axisBorderShow: false
|
|
axisCenteredZero: false
|
|
axisColorMode: text
|
|
axisLabel: ''
|
|
axisPlacement: auto
|
|
barAlignment: 0
|
|
barWidthFactor: 0.6
|
|
drawStyle: line
|
|
fillOpacity: 0
|
|
gradientMode: none
|
|
hideFrom:
|
|
legend: false
|
|
tooltip: false
|
|
viz: false
|
|
insertNulls: false
|
|
lineInterpolation: linear
|
|
lineWidth: 1
|
|
pointSize: 5
|
|
scaleDistribution:
|
|
type: linear
|
|
showPoints: never
|
|
spanNulls: false
|
|
stacking:
|
|
group: A
|
|
mode: none
|
|
thresholdsStyle:
|
|
mode: 'off'
|
|
decimals: 0
|
|
links: []
|
|
mappings: []
|
|
min: 0
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
- color: red
|
|
value: 80
|
|
unit: short
|
|
overrides: []
|
|
gridPos:
|
|
h: 8
|
|
w: 12
|
|
x: 12
|
|
'y': 186
|
|
id: 41
|
|
options:
|
|
legend:
|
|
calcs:
|
|
- mean
|
|
- lastNotNull
|
|
- max
|
|
displayMode: table
|
|
placement: bottom
|
|
showLegend: true
|
|
tooltip:
|
|
hideZeros: false
|
|
mode: multi
|
|
sort: none
|
|
pluginVersion: 11.5.0
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
expr: sum(go_goroutines{job=~"$job", instance=~"$instance"}) by(job)
|
|
format: time_series
|
|
interval: ''
|
|
intervalFactor: 2
|
|
legendFormat: __auto
|
|
range: true
|
|
refId: A
|
|
title: Goroutines ($instance)
|
|
type: timeseries
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
description: 'Panel shows the percentage of open file descriptors in the OS.
|
|
|
|
Reaching the limit of open files can cause various issues and must be prevented.
|
|
|
|
|
|
See how to change limits here https://medium.com/@muhammadtriwibowo/set-permanently-ulimit-n-open-files-in-ubuntu-4d61064429a'
|
|
fieldConfig:
|
|
defaults:
|
|
color:
|
|
mode: palette-classic
|
|
custom:
|
|
axisBorderShow: false
|
|
axisCenteredZero: false
|
|
axisColorMode: text
|
|
axisLabel: ''
|
|
axisPlacement: auto
|
|
barAlignment: 0
|
|
barWidthFactor: 0.6
|
|
drawStyle: line
|
|
fillOpacity: 0
|
|
gradientMode: none
|
|
hideFrom:
|
|
legend: false
|
|
tooltip: false
|
|
viz: false
|
|
insertNulls: false
|
|
lineInterpolation: linear
|
|
lineWidth: 1
|
|
pointSize: 5
|
|
scaleDistribution:
|
|
type: linear
|
|
showPoints: never
|
|
spanNulls: false
|
|
stacking:
|
|
group: A
|
|
mode: none
|
|
thresholdsStyle:
|
|
mode: 'off'
|
|
decimals: 3
|
|
links: []
|
|
mappings: []
|
|
min: 0
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
- color: red
|
|
value: 80
|
|
unit: percentunit
|
|
overrides: []
|
|
gridPos:
|
|
h: 8
|
|
w: 12
|
|
x: 0
|
|
'y': 194
|
|
id: 39
|
|
options:
|
|
legend:
|
|
calcs:
|
|
- mean
|
|
- lastNotNull
|
|
- max
|
|
displayMode: table
|
|
placement: bottom
|
|
showLegend: true
|
|
tooltip:
|
|
hideZeros: false
|
|
mode: multi
|
|
sort: desc
|
|
pluginVersion: 11.5.0
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
exemplar: false
|
|
expr: |-
|
|
max(
|
|
max_over_time(process_open_fds{job=~"$job", instance=~"$instance"}[$__rate_interval])
|
|
/
|
|
process_max_fds{job=~"$job", instance=~"$instance"}
|
|
) by(job)
|
|
format: time_series
|
|
interval: ''
|
|
intervalFactor: 2
|
|
legendFormat: __auto
|
|
range: true
|
|
refId: A
|
|
title: Open FDs usage % ($instance)
|
|
type: timeseries
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
description: "Shows the time goroutines have spent in runnable state before actually running. The lower is better.\n\nHigh values or values exceeding the threshold is usually a sign of insufficient CPU resources or CPU throttling. \n\nVerify that service has enough CPU resources. Otherwise, the service could work unreliably with delays in processing."
|
|
fieldConfig:
|
|
defaults:
|
|
color:
|
|
mode: palette-classic
|
|
custom:
|
|
axisBorderShow: false
|
|
axisCenteredZero: false
|
|
axisColorMode: text
|
|
axisLabel: ''
|
|
axisPlacement: auto
|
|
barAlignment: 0
|
|
barWidthFactor: 0.6
|
|
drawStyle: line
|
|
fillOpacity: 0
|
|
gradientMode: none
|
|
hideFrom:
|
|
legend: false
|
|
tooltip: false
|
|
viz: false
|
|
insertNulls: false
|
|
lineInterpolation: linear
|
|
lineWidth: 1
|
|
pointSize: 5
|
|
scaleDistribution:
|
|
type: linear
|
|
showPoints: never
|
|
spanNulls: false
|
|
stacking:
|
|
group: A
|
|
mode: none
|
|
thresholdsStyle:
|
|
mode: line
|
|
decimals: 0
|
|
links: []
|
|
mappings: []
|
|
min: 0
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
- color: red
|
|
value: 0.1
|
|
unit: s
|
|
overrides: []
|
|
gridPos:
|
|
h: 8
|
|
w: 12
|
|
x: 12
|
|
'y': 194
|
|
id: 61
|
|
options:
|
|
legend:
|
|
calcs:
|
|
- mean
|
|
- lastNotNull
|
|
- max
|
|
displayMode: table
|
|
placement: bottom
|
|
showLegend: true
|
|
tooltip:
|
|
hideZeros: false
|
|
mode: multi
|
|
sort: desc
|
|
pluginVersion: 11.5.0
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
expr: max(histogram_quantile(0.99, sum(rate(go_sched_latencies_seconds_bucket{job=~"$job"}[$__rate_interval])) by (job, instance, le))) by(job)
|
|
format: time_series
|
|
interval: ''
|
|
intervalFactor: 2
|
|
legendFormat: __auto
|
|
range: true
|
|
refId: A
|
|
title: Go scheduling latency
|
|
type: timeseries
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
description: 'Shows the percent of CPU spent on garbage collection.
|
|
|
|
|
|
If % is high, then CPU usage can be decreased by changing GOGC to higher values. Increasing GOGC value will increase memory usage, and decrease CPU usage.
|
|
|
|
|
|
Try searching for keyword `GOGC` at https://docs.victoriametrics.com/victoriametrics/troubleshooting/ '
|
|
fieldConfig:
|
|
defaults:
|
|
color:
|
|
mode: palette-classic
|
|
custom:
|
|
axisBorderShow: false
|
|
axisCenteredZero: false
|
|
axisColorMode: text
|
|
axisLabel: ''
|
|
axisPlacement: auto
|
|
barAlignment: 0
|
|
barWidthFactor: 0.6
|
|
drawStyle: line
|
|
fillOpacity: 0
|
|
gradientMode: none
|
|
hideFrom:
|
|
legend: false
|
|
tooltip: false
|
|
viz: false
|
|
insertNulls: false
|
|
lineInterpolation: linear
|
|
lineWidth: 1
|
|
pointSize: 5
|
|
scaleDistribution:
|
|
type: linear
|
|
showPoints: never
|
|
spanNulls: false
|
|
stacking:
|
|
group: A
|
|
mode: none
|
|
thresholdsStyle:
|
|
mode: 'off'
|
|
decimals: 0
|
|
links: []
|
|
mappings: []
|
|
min: 0
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
- color: red
|
|
value: 80
|
|
unit: percentunit
|
|
overrides: []
|
|
gridPos:
|
|
h: 8
|
|
w: 12
|
|
x: 0
|
|
'y': 202
|
|
id: 59
|
|
options:
|
|
legend:
|
|
calcs:
|
|
- mean
|
|
- lastNotNull
|
|
- max
|
|
displayMode: table
|
|
placement: bottom
|
|
showLegend: true
|
|
tooltip:
|
|
hideZeros: false
|
|
mode: multi
|
|
sort: desc
|
|
pluginVersion: 11.5.0
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
expr: "max(\n rate(go_gc_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) \n / rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n ) by(job)"
|
|
format: time_series
|
|
interval: ''
|
|
intervalFactor: 2
|
|
legendFormat: __auto
|
|
range: true
|
|
refId: A
|
|
title: CPU spent on GC ($instance)
|
|
type: timeseries
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
description: Shows the rate of allocations in memory. Sudden increase in allocations would mean increased pressure on Go Garbage Collector and can saturate CPU resources of the application.
|
|
fieldConfig:
|
|
defaults:
|
|
color:
|
|
mode: palette-classic
|
|
custom:
|
|
axisBorderShow: false
|
|
axisCenteredZero: false
|
|
axisColorMode: text
|
|
axisLabel: ''
|
|
axisPlacement: auto
|
|
barAlignment: 0
|
|
barWidthFactor: 0.6
|
|
drawStyle: line
|
|
fillOpacity: 0
|
|
gradientMode: none
|
|
hideFrom:
|
|
legend: false
|
|
tooltip: false
|
|
viz: false
|
|
insertNulls: false
|
|
lineInterpolation: linear
|
|
lineWidth: 1
|
|
pointSize: 5
|
|
scaleDistribution:
|
|
type: linear
|
|
showPoints: never
|
|
spanNulls: false
|
|
stacking:
|
|
group: A
|
|
mode: none
|
|
thresholdsStyle:
|
|
mode: line
|
|
decimals: 0
|
|
links: []
|
|
mappings: []
|
|
min: 0
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
unit: bytes
|
|
overrides: []
|
|
gridPos:
|
|
h: 8
|
|
w: 12
|
|
x: 0
|
|
'y': 210
|
|
id: 64
|
|
options:
|
|
legend:
|
|
calcs:
|
|
- mean
|
|
- lastNotNull
|
|
- max
|
|
displayMode: table
|
|
placement: bottom
|
|
showLegend: true
|
|
tooltip:
|
|
hideZeros: false
|
|
mode: multi
|
|
sort: desc
|
|
pluginVersion: 11.5.0
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
expr: sum(rate(go_memstats_alloc_bytes_total{job=~"$job"}[$__rate_interval])) by (job, instance)
|
|
format: time_series
|
|
interval: ''
|
|
intervalFactor: 2
|
|
legendFormat: __auto
|
|
range: true
|
|
refId: A
|
|
title: Memory allocations rate
|
|
type: timeseries
|
|
title: Resource usage
|
|
type: row
|
|
- collapsed: true
|
|
gridPos:
|
|
h: 1
|
|
w: 24
|
|
x: 0
|
|
'y': 26
|
|
id: 62
|
|
panels:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
fieldConfig:
|
|
defaults:
|
|
color:
|
|
mode: thresholds
|
|
custom:
|
|
align: auto
|
|
cellOptions:
|
|
type: auto
|
|
inspect: false
|
|
mappings: []
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
- color: red
|
|
value: 80
|
|
overrides:
|
|
- matcher:
|
|
id: byName
|
|
options: Value
|
|
properties:
|
|
- id: custom.hidden
|
|
value: true
|
|
- matcher:
|
|
id: byName
|
|
options: Time
|
|
properties:
|
|
- id: custom.hidden
|
|
value: true
|
|
gridPos:
|
|
h: 7
|
|
w: 12
|
|
x: 0
|
|
'y': 345
|
|
id: 50
|
|
options:
|
|
cellHeight: sm
|
|
footer:
|
|
countRows: false
|
|
fields: ''
|
|
reducer:
|
|
- sum
|
|
show: false
|
|
showHeader: true
|
|
sortBy:
|
|
- desc: true
|
|
displayName: job
|
|
pluginVersion: 10.4.2
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
exemplar: false
|
|
expr: sum(flag{is_set="true", job=~"$job", instance=~"$instance"}) by(job, instance, name, value)
|
|
format: table
|
|
instant: true
|
|
legendFormat: __auto
|
|
range: false
|
|
refId: A
|
|
title: Non-default flags
|
|
transformations:
|
|
- id: groupBy
|
|
options:
|
|
fields:
|
|
instance:
|
|
aggregations:
|
|
- uniqueValues
|
|
operation: aggregate
|
|
job:
|
|
aggregations: []
|
|
operation: groupby
|
|
name:
|
|
aggregations: []
|
|
operation: groupby
|
|
value:
|
|
aggregations: []
|
|
operation: groupby
|
|
type: table
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
description: "Missed evaluation means that group evaluation time takes longer than the configured evaluation interval. \nThis may result in missed alerting notifications or recording rules samples. Try increasing evaluation interval or concurrency for such groups. See https://docs.victoriametrics.com/victoriametrics/vmalert/#groups\n\nIf rule expressions are taking longer than expected, please see https://docs.victoriametrics.com/victoriametrics/troubleshooting/#slow-queries.\""
|
|
fieldConfig:
|
|
defaults:
|
|
color:
|
|
mode: palette-classic
|
|
custom:
|
|
axisBorderShow: false
|
|
axisCenteredZero: false
|
|
axisColorMode: text
|
|
axisLabel: ''
|
|
axisPlacement: auto
|
|
barAlignment: 0
|
|
drawStyle: bars
|
|
fillOpacity: 10
|
|
gradientMode: none
|
|
hideFrom:
|
|
legend: false
|
|
tooltip: false
|
|
viz: false
|
|
insertNulls: false
|
|
lineInterpolation: linear
|
|
lineWidth: 1
|
|
pointSize: 5
|
|
scaleDistribution:
|
|
type: linear
|
|
showPoints: never
|
|
spanNulls: false
|
|
stacking:
|
|
group: A
|
|
mode: none
|
|
thresholdsStyle:
|
|
mode: 'off'
|
|
mappings: []
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
- color: red
|
|
value: 80
|
|
unit: short
|
|
overrides: []
|
|
gridPos:
|
|
h: 7
|
|
w: 12
|
|
x: 12
|
|
'y': 345
|
|
id: 58
|
|
options:
|
|
legend:
|
|
calcs:
|
|
- mean
|
|
- lastNotNull
|
|
- max
|
|
displayMode: table
|
|
placement: bottom
|
|
showLegend: true
|
|
tooltip:
|
|
mode: multi
|
|
sort: none
|
|
pluginVersion: 9.2.6
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
exemplar: false
|
|
expr: sum(increase(vmalert_iteration_missed_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job, group, file) > 0
|
|
interval: 1m
|
|
legendFormat: ({{`{{`}}job{{`}}`}}) {{`{{`}}group{{`}}`}}({{`{{`}}file{{`}}`}})
|
|
range: true
|
|
refId: A
|
|
title: Missed evaluations ($instance)
|
|
type: timeseries
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
description: 'Shows the number of restarts per job. The chart can be useful to identify periodic process restarts and correlate them with potential issues or anomalies. Normally, processes shouldn''t restart unless restart was inited by user. The reason of restarts should be figured out by checking the logs of each specific service. '
|
|
fieldConfig:
|
|
defaults:
|
|
color:
|
|
mode: palette-classic
|
|
custom:
|
|
axisBorderShow: false
|
|
axisCenteredZero: false
|
|
axisColorMode: text
|
|
axisLabel: ''
|
|
axisPlacement: auto
|
|
axisSoftMin: 0
|
|
barAlignment: 0
|
|
drawStyle: line
|
|
fillOpacity: 0
|
|
gradientMode: none
|
|
hideFrom:
|
|
legend: false
|
|
tooltip: false
|
|
viz: false
|
|
insertNulls: false
|
|
lineInterpolation: stepAfter
|
|
lineWidth: 1
|
|
pointSize: 5
|
|
scaleDistribution:
|
|
type: linear
|
|
showPoints: never
|
|
spanNulls: false
|
|
stacking:
|
|
group: A
|
|
mode: none
|
|
thresholdsStyle:
|
|
mode: 'off'
|
|
decimals: 0
|
|
links: []
|
|
mappings: []
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
- color: red
|
|
value: 80
|
|
unit: none
|
|
overrides: []
|
|
gridPos:
|
|
h: 8
|
|
w: 12
|
|
x: 0
|
|
'y': 352
|
|
id: 63
|
|
options:
|
|
legend:
|
|
calcs:
|
|
- lastNotNull
|
|
displayMode: table
|
|
placement: bottom
|
|
showLegend: true
|
|
sortBy: Last *
|
|
sortDesc: true
|
|
tooltip:
|
|
mode: multi
|
|
sort: desc
|
|
pluginVersion: 9.1.0
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
expr: sum(changes(vm_app_start_timestamp{job=~"$job", instance=~"$instance"}[$__rate_interval]) > 0) by(job)
|
|
format: time_series
|
|
instant: false
|
|
legendFormat: '{{`{{`}}job{{`}}`}}'
|
|
refId: A
|
|
title: Restarts ($job)
|
|
type: timeseries
|
|
title: Troubleshooting
|
|
type: row
|
|
- collapsed: true
|
|
gridPos:
|
|
h: 1
|
|
w: 24
|
|
x: 0
|
|
'y': 27
|
|
id: 17
|
|
panels:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
description: Shows top $topk current active (firing) alerting rules.
|
|
fieldConfig:
|
|
defaults:
|
|
color:
|
|
mode: palette-classic
|
|
custom:
|
|
axisCenteredZero: false
|
|
axisColorMode: text
|
|
axisLabel: ''
|
|
axisPlacement: auto
|
|
barAlignment: 0
|
|
drawStyle: line
|
|
fillOpacity: 0
|
|
gradientMode: none
|
|
hideFrom:
|
|
legend: false
|
|
tooltip: false
|
|
viz: false
|
|
lineInterpolation: linear
|
|
lineWidth: 1
|
|
pointSize: 5
|
|
scaleDistribution:
|
|
type: linear
|
|
showPoints: never
|
|
spanNulls: false
|
|
stacking:
|
|
group: A
|
|
mode: none
|
|
thresholdsStyle:
|
|
mode: 'off'
|
|
mappings: []
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
- color: red
|
|
value: 80
|
|
unit: short
|
|
overrides: []
|
|
gridPos:
|
|
h: 8
|
|
w: 12
|
|
x: 0
|
|
'y': 370
|
|
id: 14
|
|
options:
|
|
legend:
|
|
calcs:
|
|
- mean
|
|
- lastNotNull
|
|
- max
|
|
displayMode: table
|
|
placement: bottom
|
|
showLegend: true
|
|
sortBy: Last *
|
|
sortDesc: true
|
|
tooltip:
|
|
mode: multi
|
|
sort: desc
|
|
pluginVersion: 9.2.6
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
exemplar: false
|
|
expr: topk($topk, sum(vmalert_alerts_firing{job=~"$job", instance=~"$instance", group=~"$group", file=~"$file"}) by(job, group, file, alertname) > 0)
|
|
interval: ''
|
|
legendFormat: ({{`{{`}}job{{`}}`}}) {{`{{`}}group{{`}}`}}.{{`{{`}}alertname{{`}}`}}({{`{{`}}file{{`}}`}})
|
|
range: true
|
|
refId: A
|
|
title: Top $topk active alerts ($group)
|
|
type: timeseries
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
description: Shows the events when rule execution resulted into an error. Check the logs for more details.
|
|
fieldConfig:
|
|
defaults:
|
|
color:
|
|
mode: palette-classic
|
|
custom:
|
|
axisCenteredZero: false
|
|
axisColorMode: text
|
|
axisLabel: ''
|
|
axisPlacement: auto
|
|
barAlignment: 0
|
|
drawStyle: line
|
|
fillOpacity: 0
|
|
gradientMode: none
|
|
hideFrom:
|
|
legend: false
|
|
tooltip: false
|
|
viz: false
|
|
lineInterpolation: linear
|
|
lineWidth: 1
|
|
pointSize: 5
|
|
scaleDistribution:
|
|
type: linear
|
|
showPoints: never
|
|
spanNulls: false
|
|
stacking:
|
|
group: A
|
|
mode: none
|
|
thresholdsStyle:
|
|
mode: 'off'
|
|
mappings: []
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
- color: red
|
|
value: 80
|
|
unit: short
|
|
overrides: []
|
|
gridPos:
|
|
h: 8
|
|
w: 12
|
|
x: 12
|
|
'y': 370
|
|
id: 13
|
|
options:
|
|
legend:
|
|
calcs:
|
|
- mean
|
|
- lastNotNull
|
|
- max
|
|
displayMode: table
|
|
placement: bottom
|
|
showLegend: true
|
|
sortBy: Last *
|
|
sortDesc: true
|
|
tooltip:
|
|
mode: multi
|
|
sort: desc
|
|
pluginVersion: 9.2.6
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
exemplar: false
|
|
expr: sum(increase(vmalert_alerting_rules_errors_total{job=~"$job", instance=~"$instance", group=~"$group", file=~"$file"}[$__rate_interval])) by(job, group, file, alertname) > 0
|
|
interval: ''
|
|
legendFormat: ({{`{{`}}job{{`}}`}}) {{`{{`}}group{{`}}`}}.{{`{{`}}alertname{{`}}`}}({{`{{`}}file{{`}}`}})
|
|
range: true
|
|
refId: A
|
|
title: Errors ($group)
|
|
type: timeseries
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
description: 'Shows the current pending alerting rules per group.
|
|
|
|
By pending means the rule which remains active less than configured `for` parameter.'
|
|
fieldConfig:
|
|
defaults:
|
|
color:
|
|
mode: palette-classic
|
|
custom:
|
|
axisCenteredZero: false
|
|
axisColorMode: text
|
|
axisLabel: ''
|
|
axisPlacement: auto
|
|
barAlignment: 0
|
|
drawStyle: line
|
|
fillOpacity: 0
|
|
gradientMode: none
|
|
hideFrom:
|
|
legend: false
|
|
tooltip: false
|
|
viz: false
|
|
lineInterpolation: linear
|
|
lineWidth: 1
|
|
pointSize: 5
|
|
scaleDistribution:
|
|
type: linear
|
|
showPoints: never
|
|
spanNulls: false
|
|
stacking:
|
|
group: A
|
|
mode: none
|
|
thresholdsStyle:
|
|
mode: 'off'
|
|
mappings: []
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
- color: red
|
|
value: 80
|
|
unit: short
|
|
overrides: []
|
|
gridPos:
|
|
h: 8
|
|
w: 12
|
|
x: 0
|
|
'y': 378
|
|
id: 20
|
|
options:
|
|
legend:
|
|
calcs:
|
|
- mean
|
|
- lastNotNull
|
|
- max
|
|
displayMode: table
|
|
placement: bottom
|
|
showLegend: true
|
|
sortBy: Mean
|
|
sortDesc: true
|
|
tooltip:
|
|
mode: multi
|
|
sort: desc
|
|
pluginVersion: 9.2.6
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
exemplar: false
|
|
expr: sum(vmalert_alerts_pending{job=~"$job", instance=~"$instance", group=~"$group", file=~"$file"}) by(job, group, file, alertname) > 0
|
|
interval: ''
|
|
legendFormat: ({{`{{`}}job{{`}}`}}) {{`{{`}}group{{`}}`}}.{{`{{`}}alertname{{`}}`}}({{`{{`}}file{{`}}`}})
|
|
range: true
|
|
refId: A
|
|
title: Pending ($group)
|
|
type: timeseries
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
description: Shows the error rate for the attempts to send alerts to Alertmanager. If not zero it means there issues on attempt to send notification to Alertmanager and some alerts may be not delivered properly. Check the logs for more details.
|
|
fieldConfig:
|
|
defaults:
|
|
color:
|
|
mode: palette-classic
|
|
custom:
|
|
axisCenteredZero: false
|
|
axisColorMode: text
|
|
axisLabel: ''
|
|
axisPlacement: auto
|
|
barAlignment: 0
|
|
drawStyle: line
|
|
fillOpacity: 0
|
|
gradientMode: none
|
|
hideFrom:
|
|
legend: false
|
|
tooltip: false
|
|
viz: false
|
|
lineInterpolation: linear
|
|
lineWidth: 1
|
|
pointSize: 5
|
|
scaleDistribution:
|
|
type: linear
|
|
showPoints: never
|
|
spanNulls: false
|
|
stacking:
|
|
group: A
|
|
mode: none
|
|
thresholdsStyle:
|
|
mode: 'off'
|
|
mappings: []
|
|
min: 0
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
- color: red
|
|
value: 80
|
|
unit: short
|
|
overrides: []
|
|
gridPos:
|
|
h: 8
|
|
w: 12
|
|
x: 12
|
|
'y': 378
|
|
id: 32
|
|
options:
|
|
legend:
|
|
calcs:
|
|
- mean
|
|
- lastNotNull
|
|
- max
|
|
displayMode: table
|
|
placement: bottom
|
|
showLegend: true
|
|
tooltip:
|
|
mode: multi
|
|
sort: desc
|
|
pluginVersion: 9.2.6
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
exemplar: false
|
|
expr: sum(rate(vmalert_alerts_send_errors_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(instance, addr) > 0
|
|
interval: ''
|
|
legendFormat: '{{`{{`}}instance{{`}}`}} => {{`{{`}}addr{{`}}`}}'
|
|
refId: A
|
|
title: Errors rate to Alertmanager
|
|
type: timeseries
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
description: Shows how many alerts are sent to Alertmanager per second. Only active alerts are sent.
|
|
fieldConfig:
|
|
defaults:
|
|
color:
|
|
mode: palette-classic
|
|
custom:
|
|
axisCenteredZero: false
|
|
axisColorMode: text
|
|
axisLabel: ''
|
|
axisPlacement: auto
|
|
barAlignment: 0
|
|
drawStyle: line
|
|
fillOpacity: 0
|
|
gradientMode: none
|
|
hideFrom:
|
|
legend: false
|
|
tooltip: false
|
|
viz: false
|
|
lineInterpolation: linear
|
|
lineWidth: 1
|
|
pointSize: 5
|
|
scaleDistribution:
|
|
type: linear
|
|
showPoints: never
|
|
spanNulls: false
|
|
stacking:
|
|
group: A
|
|
mode: none
|
|
thresholdsStyle:
|
|
mode: 'off'
|
|
mappings: []
|
|
min: 0
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
- color: red
|
|
value: 80
|
|
unit: short
|
|
overrides: []
|
|
gridPos:
|
|
h: 8
|
|
w: 12
|
|
x: 0
|
|
'y': 386
|
|
id: 26
|
|
options:
|
|
legend:
|
|
calcs:
|
|
- mean
|
|
- lastNotNull
|
|
- max
|
|
displayMode: table
|
|
placement: bottom
|
|
showLegend: true
|
|
tooltip:
|
|
mode: multi
|
|
sort: desc
|
|
pluginVersion: 9.2.6
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
exemplar: false
|
|
expr: sum(rate(vmalert_alerts_sent_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job, addr) > 0
|
|
interval: ''
|
|
legendFormat: '{{`{{`}}job{{`}}`}} => {{`{{`}}addr{{`}}`}}'
|
|
range: true
|
|
refId: A
|
|
title: Requests rate to Alertmanager by job ($group)
|
|
type: timeseries
|
|
title: Alerting rules ($instance)
|
|
type: row
|
|
- collapsed: true
|
|
gridPos:
|
|
h: 1
|
|
w: 24
|
|
x: 0
|
|
'y': 28
|
|
id: 28
|
|
panels:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
description: Shows the top $topk recording rules which generate the most of [samples](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#raw-samples). Each generated sample is basically a time series which then ingested into configured remote storage. Rules with high numbers may cause the most pressure on the remote database and become a source of too high cardinality.
|
|
fieldConfig:
|
|
defaults:
|
|
color:
|
|
mode: palette-classic
|
|
custom:
|
|
axisCenteredZero: false
|
|
axisColorMode: text
|
|
axisLabel: ''
|
|
axisPlacement: auto
|
|
barAlignment: 0
|
|
drawStyle: line
|
|
fillOpacity: 0
|
|
gradientMode: none
|
|
hideFrom:
|
|
legend: false
|
|
tooltip: false
|
|
viz: false
|
|
lineInterpolation: linear
|
|
lineWidth: 1
|
|
pointSize: 5
|
|
scaleDistribution:
|
|
type: linear
|
|
showPoints: never
|
|
spanNulls: false
|
|
stacking:
|
|
group: A
|
|
mode: none
|
|
thresholdsStyle:
|
|
mode: 'off'
|
|
mappings: []
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
- color: red
|
|
value: 80
|
|
unit: short
|
|
overrides: []
|
|
gridPos:
|
|
h: 8
|
|
w: 12
|
|
x: 0
|
|
'y': 385
|
|
id: 31
|
|
options:
|
|
legend:
|
|
calcs:
|
|
- mean
|
|
- lastNotNull
|
|
- max
|
|
displayMode: table
|
|
placement: bottom
|
|
showLegend: true
|
|
sortBy: Last *
|
|
sortDesc: true
|
|
tooltip:
|
|
mode: multi
|
|
sort: desc
|
|
pluginVersion: 9.2.6
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
exemplar: false
|
|
expr: "topk($topk, \n max(\n sum(vmalert_recording_rules_last_evaluation_samples{job=~\"$job\", instance=~\"$instance\", group=~\"$group\", file=~\"$file\"}) by(job, instance, group, file, recording) > 0\n ) by(job, group, file, recording)\n)"
|
|
interval: ''
|
|
legendFormat: ({{`{{`}}job{{`}}`}}) {{`{{`}}group{{`}}`}}.{{`{{`}}recording{{`}}`}}({{`{{`}}file{{`}}`}})
|
|
range: true
|
|
refId: A
|
|
title: Top $topk rules by produced samples ($group)
|
|
type: timeseries
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
description: 'Shows the rules which do not produce any [samples](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#raw-samples) during the evaluation. Usually it means that such rules are misconfigured, since they give no output during the evaluation.
|
|
|
|
Please check if rule''s expression is correct and it is working as expected.'
|
|
fieldConfig:
|
|
defaults:
|
|
color:
|
|
mode: palette-classic
|
|
custom:
|
|
axisCenteredZero: false
|
|
axisColorMode: text
|
|
axisLabel: ''
|
|
axisPlacement: auto
|
|
barAlignment: 0
|
|
drawStyle: line
|
|
fillOpacity: 0
|
|
gradientMode: none
|
|
hideFrom:
|
|
legend: false
|
|
tooltip: false
|
|
viz: false
|
|
lineInterpolation: linear
|
|
lineWidth: 1
|
|
pointSize: 5
|
|
scaleDistribution:
|
|
type: linear
|
|
showPoints: never
|
|
spanNulls: true
|
|
stacking:
|
|
group: A
|
|
mode: none
|
|
thresholdsStyle:
|
|
mode: 'off'
|
|
mappings: []
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
- color: red
|
|
value: 80
|
|
unit: short
|
|
overrides: []
|
|
gridPos:
|
|
h: 8
|
|
w: 12
|
|
x: 12
|
|
'y': 385
|
|
id: 33
|
|
options:
|
|
legend:
|
|
calcs:
|
|
- lastNotNull
|
|
- max
|
|
- mean
|
|
displayMode: table
|
|
placement: bottom
|
|
showLegend: true
|
|
sortBy: Last *
|
|
sortDesc: true
|
|
tooltip:
|
|
mode: multi
|
|
sort: desc
|
|
pluginVersion: 8.0.3
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
exemplar: false
|
|
expr: count(vmalert_recording_rules_last_evaluation_samples{job=~"$job", instance=~"$instance", group=~"$group", file=~"$file"} < 1) by(job, group, file, recording)
|
|
interval: ''
|
|
legendFormat: ({{`{{`}}job{{`}}`}}) {{`{{`}}group{{`}}`}}.{{`{{`}}recording{{`}}`}}({{`{{`}}file{{`}}`}})
|
|
range: true
|
|
refId: A
|
|
title: Rules with 0 produced samples ($group)
|
|
type: timeseries
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
fieldConfig:
|
|
defaults:
|
|
color:
|
|
mode: palette-classic
|
|
custom:
|
|
axisCenteredZero: false
|
|
axisColorMode: text
|
|
axisLabel: ''
|
|
axisPlacement: auto
|
|
barAlignment: 0
|
|
drawStyle: line
|
|
fillOpacity: 0
|
|
gradientMode: none
|
|
hideFrom:
|
|
legend: false
|
|
tooltip: false
|
|
viz: false
|
|
lineInterpolation: linear
|
|
lineWidth: 1
|
|
pointSize: 5
|
|
scaleDistribution:
|
|
type: linear
|
|
showPoints: never
|
|
spanNulls: false
|
|
stacking:
|
|
group: A
|
|
mode: none
|
|
thresholdsStyle:
|
|
mode: 'off'
|
|
mappings: []
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
- color: red
|
|
value: 80
|
|
unit: short
|
|
overrides: []
|
|
gridPos:
|
|
h: 8
|
|
w: 12
|
|
x: 0
|
|
'y': 393
|
|
id: 30
|
|
options:
|
|
legend:
|
|
calcs:
|
|
- mean
|
|
- lastNotNull
|
|
- max
|
|
displayMode: table
|
|
placement: bottom
|
|
showLegend: true
|
|
tooltip:
|
|
mode: multi
|
|
sort: none
|
|
pluginVersion: 9.2.6
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
exemplar: false
|
|
expr: sum(increase(vmalert_recording_rules_errors_total{job=~"$job", instance=~"$instance", group=~"$group", file=~"$file"}[$__rate_interval])) by(job, group, file, recording) > 0
|
|
interval: ''
|
|
legendFormat: ({{`{{`}}job{{`}}`}}) {{`{{`}}group{{`}}`}}.{{`{{`}}recording{{`}}`}}({{`{{`}}file{{`}}`}})
|
|
range: true
|
|
refId: A
|
|
title: Errors ($group)
|
|
type: timeseries
|
|
title: Recording rules ($instance)
|
|
type: row
|
|
- collapsed: true
|
|
gridPos:
|
|
h: 1
|
|
w: 24
|
|
x: 0
|
|
'y': 29
|
|
id: 55
|
|
panels:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
fieldConfig:
|
|
defaults:
|
|
color:
|
|
mode: palette-classic
|
|
custom:
|
|
axisCenteredZero: false
|
|
axisColorMode: text
|
|
axisLabel: ''
|
|
axisPlacement: auto
|
|
barAlignment: 0
|
|
drawStyle: line
|
|
fillOpacity: 0
|
|
gradientMode: none
|
|
hideFrom:
|
|
legend: false
|
|
tooltip: false
|
|
viz: false
|
|
lineInterpolation: linear
|
|
lineWidth: 1
|
|
pointSize: 5
|
|
scaleDistribution:
|
|
type: linear
|
|
showPoints: auto
|
|
spanNulls: false
|
|
stacking:
|
|
group: A
|
|
mode: none
|
|
thresholdsStyle:
|
|
mode: 'off'
|
|
mappings: []
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
- color: red
|
|
value: 80
|
|
overrides: []
|
|
gridPos:
|
|
h: 8
|
|
w: 12
|
|
x: 0
|
|
'y': 351
|
|
id: 52
|
|
options:
|
|
legend:
|
|
calcs: []
|
|
displayMode: list
|
|
placement: bottom
|
|
showLegend: true
|
|
tooltip:
|
|
mode: single
|
|
sort: desc
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
expr: sum(rate(vmalert_remotewrite_sent_rows_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job)
|
|
legendFormat: __auto
|
|
range: true
|
|
refId: A
|
|
title: Datapoints send rate ($instance)
|
|
type: timeseries
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
description: Shows the number of datapoints dropped by vmalert while sending to the configured remote write URL. vmalert performs up to 5 retries before dropping the data. Check vmalert's error logs for the specific error message.
|
|
fieldConfig:
|
|
defaults:
|
|
color:
|
|
mode: palette-classic
|
|
custom:
|
|
axisCenteredZero: false
|
|
axisColorMode: text
|
|
axisLabel: ''
|
|
axisPlacement: auto
|
|
barAlignment: 0
|
|
drawStyle: line
|
|
fillOpacity: 0
|
|
gradientMode: none
|
|
hideFrom:
|
|
legend: false
|
|
tooltip: false
|
|
viz: false
|
|
lineInterpolation: linear
|
|
lineWidth: 1
|
|
pointSize: 5
|
|
scaleDistribution:
|
|
type: linear
|
|
showPoints: auto
|
|
spanNulls: false
|
|
stacking:
|
|
group: A
|
|
mode: none
|
|
thresholdsStyle:
|
|
mode: 'off'
|
|
mappings: []
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
- color: red
|
|
value: 80
|
|
overrides: []
|
|
gridPos:
|
|
h: 8
|
|
w: 12
|
|
x: 12
|
|
'y': 351
|
|
id: 53
|
|
options:
|
|
legend:
|
|
calcs: []
|
|
displayMode: list
|
|
placement: bottom
|
|
showLegend: true
|
|
tooltip:
|
|
mode: single
|
|
sort: desc
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
expr: sum(rate(vmalert_remotewrite_dropped_rows_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job) > 0
|
|
legendFormat: __auto
|
|
range: true
|
|
refId: A
|
|
title: Datapoints drop rate ($instance)
|
|
type: timeseries
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
description: 'Shows current number of established connections to remote write endpoints.
|
|
|
|
|
|
'
|
|
fieldConfig:
|
|
defaults:
|
|
color:
|
|
mode: palette-classic
|
|
custom:
|
|
axisBorderShow: false
|
|
axisCenteredZero: false
|
|
axisColorMode: text
|
|
axisLabel: ''
|
|
axisPlacement: auto
|
|
barAlignment: 0
|
|
drawStyle: line
|
|
fillOpacity: 0
|
|
gradientMode: none
|
|
hideFrom:
|
|
legend: false
|
|
tooltip: false
|
|
viz: false
|
|
insertNulls: false
|
|
lineInterpolation: linear
|
|
lineWidth: 1
|
|
pointSize: 5
|
|
scaleDistribution:
|
|
type: linear
|
|
showPoints: never
|
|
spanNulls: false
|
|
stacking:
|
|
group: A
|
|
mode: none
|
|
thresholdsStyle:
|
|
mode: 'off'
|
|
links: []
|
|
mappings: []
|
|
min: 0
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
- color: red
|
|
value: 80
|
|
unit: short
|
|
overrides: []
|
|
gridPos:
|
|
h: 8
|
|
w: 12
|
|
x: 0
|
|
'y': 378
|
|
id: 54
|
|
options:
|
|
legend:
|
|
calcs:
|
|
- mean
|
|
- lastNotNull
|
|
- max
|
|
displayMode: table
|
|
placement: bottom
|
|
showLegend: true
|
|
tooltip:
|
|
mode: multi
|
|
sort: desc
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
exemplar: true
|
|
expr: sum(max_over_time(vmalert_remotewrite_conns{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job)
|
|
interval: ''
|
|
legendFormat: __auto
|
|
range: true
|
|
refId: A
|
|
title: Connections ($instance)
|
|
type: timeseries
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
description: Shows the global rate for number of written bytes via remote write connections.
|
|
fieldConfig:
|
|
defaults:
|
|
color:
|
|
mode: palette-classic
|
|
custom:
|
|
axisBorderShow: false
|
|
axisCenteredZero: false
|
|
axisColorMode: text
|
|
axisLabel: ''
|
|
axisPlacement: auto
|
|
barAlignment: 0
|
|
drawStyle: line
|
|
fillOpacity: 0
|
|
gradientMode: none
|
|
hideFrom:
|
|
legend: false
|
|
tooltip: false
|
|
viz: false
|
|
insertNulls: false
|
|
lineInterpolation: linear
|
|
lineWidth: 1
|
|
pointSize: 5
|
|
scaleDistribution:
|
|
type: linear
|
|
showPoints: never
|
|
spanNulls: false
|
|
stacking:
|
|
group: A
|
|
mode: none
|
|
thresholdsStyle:
|
|
mode: 'off'
|
|
links: []
|
|
mappings: []
|
|
min: 0
|
|
thresholds:
|
|
mode: absolute
|
|
steps:
|
|
- color: green
|
|
- color: red
|
|
value: 80
|
|
unit: decbytes
|
|
overrides: []
|
|
gridPos:
|
|
h: 8
|
|
w: 12
|
|
x: 12
|
|
'y': 378
|
|
id: 60
|
|
options:
|
|
legend:
|
|
calcs:
|
|
- mean
|
|
- lastNotNull
|
|
- max
|
|
displayMode: table
|
|
placement: bottom
|
|
showLegend: true
|
|
tooltip:
|
|
mode: multi
|
|
sort: desc
|
|
targets:
|
|
- datasource:
|
|
type: {{ $defaultDatasource }}
|
|
uid: $ds
|
|
editorMode: code
|
|
exemplar: true
|
|
expr: sum(rate(vmalert_remotewrite_conn_bytes_written_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job) > 0
|
|
interval: ''
|
|
legendFormat: __auto
|
|
range: true
|
|
refId: A
|
|
title: Bytes write rate ($instance)
|
|
type: timeseries
|
|
title: Remote write
|
|
type: row
|
|
preload: false
|
|
refresh: ''
|
|
schemaVersion: 41
|
|
tags:
|
|
- victoriametrics
|
|
- vm-k8s-stack
|
|
templating:
|
|
list:
|
|
- current:
|
|
text: VictoriaMetrics - cluster
|
|
value: PAF93674D0B4E9963
|
|
includeAll: false
|
|
name: ds
|
|
options: []
|
|
query: {{ $defaultDatasource }}
|
|
refresh: 1
|
|
regex: ''
|
|
type: datasource
|
|
- current: {}
|
|
datasource:
|
|
type: prometheus
|
|
uid: $ds
|
|
definition: label_values(vm_app_version{version=~"^vmalert.*"}, job)
|
|
includeAll: true
|
|
multi: true
|
|
name: job
|
|
options: []
|
|
query:
|
|
query: label_values(vm_app_version{version=~"^vmalert.*"}, job)
|
|
refId: StandardVariableQuery
|
|
refresh: 1
|
|
regex: ''
|
|
type: query
|
|
- allValue: .*
|
|
current: {}
|
|
datasource:
|
|
type: prometheus
|
|
uid: $ds
|
|
definition: label_values(vm_app_version{job=~"$job"}, instance)
|
|
includeAll: true
|
|
multi: true
|
|
name: instance
|
|
options: []
|
|
query:
|
|
query: label_values(vm_app_version{job=~"$job"}, instance)
|
|
refId: StandardVariableQuery
|
|
refresh: 1
|
|
regex: ''
|
|
type: query
|
|
- allValue: .*
|
|
current: {}
|
|
datasource:
|
|
type: prometheus
|
|
uid: $ds
|
|
definition: label_values(vmalert_iteration_total{job=~"$job", instance=~"$instance"},file)
|
|
includeAll: true
|
|
multi: true
|
|
name: file
|
|
options: []
|
|
query:
|
|
query: label_values(vmalert_iteration_total{job=~"$job", instance=~"$instance"},file)
|
|
refId: PrometheusVariableQueryEditor-VariableQuery
|
|
refresh: 1
|
|
regex: ''
|
|
type: query
|
|
- allValue: .*
|
|
current: {}
|
|
datasource:
|
|
type: prometheus
|
|
uid: $ds
|
|
definition: label_values(vmalert_iteration_total{job=~"$job", instance=~"$instance"}, group)
|
|
includeAll: true
|
|
multi: true
|
|
name: group
|
|
options: []
|
|
query:
|
|
query: label_values(vmalert_iteration_total{job=~"$job", instance=~"$instance"}, group)
|
|
refId: StandardVariableQuery
|
|
refresh: 1
|
|
regex: ''
|
|
type: query
|
|
- current:
|
|
text: '5'
|
|
value: '5'
|
|
includeAll: false
|
|
name: topk
|
|
options:
|
|
- selected: true
|
|
text: '5'
|
|
value: '5'
|
|
- selected: false
|
|
text: '10'
|
|
value: '10'
|
|
- selected: false
|
|
text: '20'
|
|
value: '20'
|
|
- selected: false
|
|
text: '30'
|
|
value: '30'
|
|
- selected: false
|
|
text: '40'
|
|
value: '40'
|
|
- selected: false
|
|
text: '50'
|
|
value: '50'
|
|
query: 5, 10, 20, 30, 40, 50
|
|
type: custom
|
|
- baseFilters: []
|
|
datasource:
|
|
type: prometheus
|
|
uid: ${ds}
|
|
filters: []
|
|
name: filter
|
|
type: adhoc
|
|
time:
|
|
from: now-3h
|
|
to: now
|
|
timepicker: {}
|
|
timezone: {{ default "utc" ($Values.defaultDashboards).defaultTimezone }}
|
|
title: VictoriaMetrics - vmalert
|
|
uid: LzldHAVnz
|
|
version: 1
|