127 lines
6.8 KiB
YAML
127 lines
6.8 KiB
YAML
{{- $Values := (.helm).Values | default .Values }}
|
|
{{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }}
|
|
{{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }}
|
|
{{- $additionalGroupByLabels := append $Values.defaultRules.additionalGroupByLabels $clusterLabel }}
|
|
{{- $groupLabels := join "," $additionalGroupByLabels }}
|
|
{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }}
|
|
condition: '{{ ($Values.kubeApiServer).enabled }}'
|
|
interval: 3m
|
|
name: kube-apiserver-availability.rules
|
|
rules:
|
|
- condition: '{{ true }}'
|
|
expr: avg_over_time(code_verb:apiserver_request_total:increase1h[30d]) * 24 * 30
|
|
record: code_verb:apiserver_request_total:increase30d
|
|
- condition: '{{ true }}'
|
|
expr: sum by (code,{{ $groupLabels }}) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"})
|
|
labels:
|
|
verb: read
|
|
record: code:apiserver_request_total:increase30d
|
|
- condition: '{{ true }}'
|
|
expr: sum by (code,{{ $groupLabels }}) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"})
|
|
labels:
|
|
verb: write
|
|
record: code:apiserver_request_total:increase30d
|
|
- condition: '{{ true }}'
|
|
expr: sum by (verb,scope,le,{{ $groupLabels }}) (increase(apiserver_request_sli_duration_seconds_bucket[1h]))
|
|
record: cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase1h
|
|
- condition: '{{ true }}'
|
|
expr: sum by (verb,scope,le,{{ $groupLabels }}) (avg_over_time(cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase1h[30d]) * 24 * 30)
|
|
record: cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d
|
|
- condition: '{{ true }}'
|
|
expr: sum by (verb,scope,{{ $groupLabels }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase1h{le="+Inf"})
|
|
record: cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase1h
|
|
- condition: '{{ true }}'
|
|
expr: sum by (verb,scope,{{ $groupLabels }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{le="+Inf"})
|
|
record: cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d
|
|
- condition: '{{ true }}'
|
|
expr: |-
|
|
1 - (
|
|
(
|
|
# write too slow
|
|
sum by ({{ $groupLabels }}) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"POST|PUT|PATCH|DELETE"})
|
|
-
|
|
sum by ({{ $groupLabels }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"POST|PUT|PATCH|DELETE",le=~"1(\\.0)?"} or vector(0))
|
|
) +
|
|
(
|
|
# read too slow
|
|
sum by ({{ $groupLabels }}) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"LIST|GET"})
|
|
-
|
|
(
|
|
sum by ({{ $groupLabels }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope=~"resource|",le=~"1(\\.0)?"} or vector(0))
|
|
+
|
|
sum by ({{ $groupLabels }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="namespace",le=~"5(\\.0)?"} or vector(0))
|
|
+
|
|
sum by ({{ $groupLabels }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="cluster",le=~"30(\\.0)?"} or vector(0))
|
|
)
|
|
) +
|
|
# errors
|
|
sum by ({{ $groupLabels }}) (code:apiserver_request_total:increase30d{code=~"5.."} or vector(0))
|
|
)
|
|
/
|
|
sum by ({{ $groupLabels }}) (code:apiserver_request_total:increase30d)
|
|
labels:
|
|
verb: all
|
|
record: apiserver_request:availability30d
|
|
- condition: '{{ true }}'
|
|
expr: |-
|
|
1 - (
|
|
sum by ({{ $groupLabels }}) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"LIST|GET"})
|
|
-
|
|
(
|
|
# too slow
|
|
sum by ({{ $groupLabels }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope=~"resource|",le=~"1(\\.0)?"} or vector(0))
|
|
+
|
|
sum by ({{ $groupLabels }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="namespace",le=~"5(\\.0)?"} or vector(0))
|
|
+
|
|
sum by ({{ $groupLabels }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="cluster",le=~"30(\\.0)?"} or vector(0))
|
|
)
|
|
+
|
|
# errors
|
|
sum by ({{ $groupLabels }}) (code:apiserver_request_total:increase30d{verb="read",code=~"5.."} or vector(0))
|
|
)
|
|
/
|
|
sum by ({{ $groupLabels }}) (code:apiserver_request_total:increase30d{verb="read"})
|
|
labels:
|
|
verb: read
|
|
record: apiserver_request:availability30d
|
|
- condition: '{{ true }}'
|
|
expr: |-
|
|
1 - (
|
|
(
|
|
# too slow
|
|
sum by ({{ $groupLabels }}) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"POST|PUT|PATCH|DELETE"})
|
|
-
|
|
sum by ({{ $groupLabels }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"POST|PUT|PATCH|DELETE",le=~"1(\\.0)?"} or vector(0))
|
|
)
|
|
+
|
|
# errors
|
|
sum by ({{ $groupLabels }}) (code:apiserver_request_total:increase30d{verb="write",code=~"5.."} or vector(0))
|
|
)
|
|
/
|
|
sum by ({{ $groupLabels }}) (code:apiserver_request_total:increase30d{verb="write"})
|
|
labels:
|
|
verb: write
|
|
record: apiserver_request:availability30d
|
|
- condition: '{{ true }}'
|
|
expr: sum by (code,resource,{{ $groupLabels }}) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
|
|
labels:
|
|
verb: read
|
|
record: code_resource:apiserver_request_total:rate5m
|
|
- condition: '{{ true }}'
|
|
expr: sum by (code,resource,{{ $groupLabels }}) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
|
|
labels:
|
|
verb: write
|
|
record: code_resource:apiserver_request_total:rate5m
|
|
- condition: '{{ true }}'
|
|
expr: sum by (code,verb,{{ $groupLabels }}) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"2.."}[1h]))
|
|
record: code_verb:apiserver_request_total:increase1h
|
|
- condition: '{{ true }}'
|
|
expr: sum by (code,verb,{{ $groupLabels }}) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"3.."}[1h]))
|
|
record: code_verb:apiserver_request_total:increase1h
|
|
- condition: '{{ true }}'
|
|
expr: sum by (code,verb,{{ $groupLabels }}) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"4.."}[1h]))
|
|
record: code_verb:apiserver_request_total:increase1h
|
|
- condition: '{{ true }}'
|
|
expr: sum by (code,verb,{{ $groupLabels }}) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"5.."}[1h]))
|
|
record: code_verb:apiserver_request_total:increase1h
|