{{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} {{- $additionalGroupByLabels := append $Values.defaultRules.additionalGroupByLabels $clusterLabel }} {{- $groupLabels := join "," $additionalGroupByLabels }} {{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ ($Values.kubeApiServer).enabled }}' interval: 3m name: kube-apiserver-availability.rules rules: - condition: '{{ true }}' expr: avg_over_time(code_verb:apiserver_request_total:increase1h[30d]) * 24 * 30 record: code_verb:apiserver_request_total:increase30d - condition: '{{ true }}' expr: sum by (code,{{ $groupLabels }}) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"}) labels: verb: read record: code:apiserver_request_total:increase30d - condition: '{{ true }}' expr: sum by (code,{{ $groupLabels }}) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) labels: verb: write record: code:apiserver_request_total:increase30d - condition: '{{ true }}' expr: sum by (verb,scope,le,{{ $groupLabels }}) (increase(apiserver_request_sli_duration_seconds_bucket[1h])) record: cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase1h - condition: '{{ true }}' expr: sum by (verb,scope,le,{{ $groupLabels }}) (avg_over_time(cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase1h[30d]) * 24 * 30) record: cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d - condition: '{{ true }}' expr: sum by (verb,scope,{{ $groupLabels }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase1h{le="+Inf"}) record: cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase1h - condition: '{{ true }}' expr: sum by (verb,scope,{{ $groupLabels }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{le="+Inf"}) record: cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d - condition: '{{ true }}' expr: |- 1 - ( ( # write too slow sum by ({{ $groupLabels }}) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) - sum by ({{ $groupLabels }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"POST|PUT|PATCH|DELETE",le=~"1(\\.0)?"} or vector(0)) ) + ( # read too slow sum by ({{ $groupLabels }}) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"LIST|GET"}) - ( sum by ({{ $groupLabels }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope=~"resource|",le=~"1(\\.0)?"} or vector(0)) + sum by ({{ $groupLabels }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="namespace",le=~"5(\\.0)?"} or vector(0)) + sum by ({{ $groupLabels }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="cluster",le=~"30(\\.0)?"} or vector(0)) ) ) + # errors sum by ({{ $groupLabels }}) (code:apiserver_request_total:increase30d{code=~"5.."} or vector(0)) ) / sum by ({{ $groupLabels }}) (code:apiserver_request_total:increase30d) labels: verb: all record: apiserver_request:availability30d - condition: '{{ true }}' expr: |- 1 - ( sum by ({{ $groupLabels }}) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"LIST|GET"}) - ( # too slow sum by ({{ $groupLabels }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope=~"resource|",le=~"1(\\.0)?"} or vector(0)) + sum by ({{ $groupLabels }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="namespace",le=~"5(\\.0)?"} or vector(0)) + sum by ({{ $groupLabels }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="cluster",le=~"30(\\.0)?"} or vector(0)) ) + # errors sum by ({{ $groupLabels }}) (code:apiserver_request_total:increase30d{verb="read",code=~"5.."} or vector(0)) ) / sum by ({{ $groupLabels }}) (code:apiserver_request_total:increase30d{verb="read"}) labels: verb: read record: apiserver_request:availability30d - condition: '{{ true }}' expr: |- 1 - ( ( # too slow sum by ({{ $groupLabels }}) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) - sum by ({{ $groupLabels }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"POST|PUT|PATCH|DELETE",le=~"1(\\.0)?"} or vector(0)) ) + # errors sum by ({{ $groupLabels }}) (code:apiserver_request_total:increase30d{verb="write",code=~"5.."} or vector(0)) ) / sum by ({{ $groupLabels }}) (code:apiserver_request_total:increase30d{verb="write"}) labels: verb: write record: apiserver_request:availability30d - condition: '{{ true }}' expr: sum by (code,resource,{{ $groupLabels }}) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m])) labels: verb: read record: code_resource:apiserver_request_total:rate5m - condition: '{{ true }}' expr: sum by (code,resource,{{ $groupLabels }}) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) labels: verb: write record: code_resource:apiserver_request_total:rate5m - condition: '{{ true }}' expr: sum by (code,verb,{{ $groupLabels }}) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"2.."}[1h])) record: code_verb:apiserver_request_total:increase1h - condition: '{{ true }}' expr: sum by (code,verb,{{ $groupLabels }}) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"3.."}[1h])) record: code_verb:apiserver_request_total:increase1h - condition: '{{ true }}' expr: sum by (code,verb,{{ $groupLabels }}) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"4.."}[1h])) record: code_verb:apiserver_request_total:increase1h - condition: '{{ true }}' expr: sum by (code,verb,{{ $groupLabels }}) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"5.."}[1h])) record: code_verb:apiserver_request_total:increase1h