infra/charts/victoria-metrics-k8s-stack/files/rules/generated/general.rules.yaml
Konstantin Averkiev c45fd1a6ac added vm stack
2025-07-08 17:29:32 +03:00

60 lines
2.7 KiB
YAML

{{- $Values := (.helm).Values | default .Values }}
{{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }}
{{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }}
{{- $additionalGroupByLabels := append $Values.defaultRules.additionalGroupByLabels $clusterLabel }}
{{- $groupLabels := join "," $additionalGroupByLabels }}
{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }}
condition: '{{ true }}'
name: general.rules
rules:
- alert: TargetDown
annotations:
description: '{{`{{`}} printf "%.4g" $value {{`}}`}}% of the {{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.service {{`}}`}} targets in {{`{{`}} $labels.namespace {{`}}`}} namespace are down.'
runbook_url: '{{ $runbookUrl }}/general/targetdown'
summary: 'One or more targets are unreachable.'
condition: '{{ true }}'
expr: 100 * (count(up == 0) BY (job,namespace,service,{{ $groupLabels }}) / count(up) BY (job,namespace,service,{{ $groupLabels }})) > 10
for: 10m
labels:
severity: warning
- alert: Watchdog
annotations:
description: 'This is an alert meant to ensure that the entire alerting pipeline is functional.
This alert is always firing, therefore it should always be firing in Alertmanager
and always fire against a receiver. There are integrations with various notification
mechanisms that send a notification when this alert is not firing. For example the
"DeadMansSnitch" integration in PagerDuty.
'
runbook_url: '{{ $runbookUrl }}/general/watchdog'
summary: 'An alert that should always be firing to certify that Alertmanager is working properly.'
condition: '{{ true }}'
expr: vector(1)
labels:
severity: none
- alert: InfoInhibitor
annotations:
description: 'This is an alert that is used to inhibit info alerts.
By themselves, the info-level alerts are sometimes very noisy, but they are relevant when combined with
other alerts.
This alert fires whenever there''s a severity="info" alert, and stops firing when another alert with a
severity of ''warning'' or ''critical'' starts firing on the same namespace.
This alert should be routed to a null receiver and configured to inhibit alerts with severity="info".
'
runbook_url: '{{ $runbookUrl }}/general/infoinhibitor'
summary: 'Info-level alert inhibition.'
condition: '{{ true }}'
expr: ALERTS{severity = "info"} == 1 unless on (namespace,{{ $groupLabels }}) ALERTS{alertname != "InfoInhibitor", severity =~ "warning|critical", alertstate="firing"} == 1
labels:
severity: none