infra/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-availability.rules.yaml
Konstantin Averkiev c45fd1a6ac added vm stack
2025-07-08 17:29:32 +03:00

127 lines
6.8 KiB
YAML

{{- $Values := (.helm).Values | default .Values }}
{{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }}
{{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }}
{{- $additionalGroupByLabels := append $Values.defaultRules.additionalGroupByLabels $clusterLabel }}
{{- $groupLabels := join "," $additionalGroupByLabels }}
{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }}
condition: '{{ ($Values.kubeApiServer).enabled }}'
interval: 3m
name: kube-apiserver-availability.rules
rules:
- condition: '{{ true }}'
expr: avg_over_time(code_verb:apiserver_request_total:increase1h[30d]) * 24 * 30
record: code_verb:apiserver_request_total:increase30d
- condition: '{{ true }}'
expr: sum by (code,{{ $groupLabels }}) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"})
labels:
verb: read
record: code:apiserver_request_total:increase30d
- condition: '{{ true }}'
expr: sum by (code,{{ $groupLabels }}) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"})
labels:
verb: write
record: code:apiserver_request_total:increase30d
- condition: '{{ true }}'
expr: sum by (verb,scope,le,{{ $groupLabels }}) (increase(apiserver_request_sli_duration_seconds_bucket[1h]))
record: cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase1h
- condition: '{{ true }}'
expr: sum by (verb,scope,le,{{ $groupLabels }}) (avg_over_time(cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase1h[30d]) * 24 * 30)
record: cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d
- condition: '{{ true }}'
expr: sum by (verb,scope,{{ $groupLabels }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase1h{le="+Inf"})
record: cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase1h
- condition: '{{ true }}'
expr: sum by (verb,scope,{{ $groupLabels }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{le="+Inf"})
record: cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d
- condition: '{{ true }}'
expr: |-
1 - (
(
# write too slow
sum by ({{ $groupLabels }}) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"POST|PUT|PATCH|DELETE"})
-
sum by ({{ $groupLabels }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"POST|PUT|PATCH|DELETE",le=~"1(\\.0)?"} or vector(0))
) +
(
# read too slow
sum by ({{ $groupLabels }}) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"LIST|GET"})
-
(
sum by ({{ $groupLabels }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope=~"resource|",le=~"1(\\.0)?"} or vector(0))
+
sum by ({{ $groupLabels }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="namespace",le=~"5(\\.0)?"} or vector(0))
+
sum by ({{ $groupLabels }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="cluster",le=~"30(\\.0)?"} or vector(0))
)
) +
# errors
sum by ({{ $groupLabels }}) (code:apiserver_request_total:increase30d{code=~"5.."} or vector(0))
)
/
sum by ({{ $groupLabels }}) (code:apiserver_request_total:increase30d)
labels:
verb: all
record: apiserver_request:availability30d
- condition: '{{ true }}'
expr: |-
1 - (
sum by ({{ $groupLabels }}) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"LIST|GET"})
-
(
# too slow
sum by ({{ $groupLabels }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope=~"resource|",le=~"1(\\.0)?"} or vector(0))
+
sum by ({{ $groupLabels }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="namespace",le=~"5(\\.0)?"} or vector(0))
+
sum by ({{ $groupLabels }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="cluster",le=~"30(\\.0)?"} or vector(0))
)
+
# errors
sum by ({{ $groupLabels }}) (code:apiserver_request_total:increase30d{verb="read",code=~"5.."} or vector(0))
)
/
sum by ({{ $groupLabels }}) (code:apiserver_request_total:increase30d{verb="read"})
labels:
verb: read
record: apiserver_request:availability30d
- condition: '{{ true }}'
expr: |-
1 - (
(
# too slow
sum by ({{ $groupLabels }}) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"POST|PUT|PATCH|DELETE"})
-
sum by ({{ $groupLabels }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"POST|PUT|PATCH|DELETE",le=~"1(\\.0)?"} or vector(0))
)
+
# errors
sum by ({{ $groupLabels }}) (code:apiserver_request_total:increase30d{verb="write",code=~"5.."} or vector(0))
)
/
sum by ({{ $groupLabels }}) (code:apiserver_request_total:increase30d{verb="write"})
labels:
verb: write
record: apiserver_request:availability30d
- condition: '{{ true }}'
expr: sum by (code,resource,{{ $groupLabels }}) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
labels:
verb: read
record: code_resource:apiserver_request_total:rate5m
- condition: '{{ true }}'
expr: sum by (code,resource,{{ $groupLabels }}) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
labels:
verb: write
record: code_resource:apiserver_request_total:rate5m
- condition: '{{ true }}'
expr: sum by (code,verb,{{ $groupLabels }}) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"2.."}[1h]))
record: code_verb:apiserver_request_total:increase1h
- condition: '{{ true }}'
expr: sum by (code,verb,{{ $groupLabels }}) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"3.."}[1h]))
record: code_verb:apiserver_request_total:increase1h
- condition: '{{ true }}'
expr: sum by (code,verb,{{ $groupLabels }}) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"4.."}[1h]))
record: code_verb:apiserver_request_total:increase1h
- condition: '{{ true }}'
expr: sum by (code,verb,{{ $groupLabels }}) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"5.."}[1h]))
record: code_verb:apiserver_request_total:increase1h