{{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} {{- $additionalGroupByLabels := append $Values.defaultRules.additionalGroupByLabels $clusterLabel }} {{- $groupLabels := join "," $additionalGroupByLabels }} {{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ true }}' name: vmoperator rules: - alert: LogErrors annotations: dashboard: '{{`{{`}} $externalURL {{`}}`}}/d/1H179hunk/victoriametrics-operator?ds={{`{{`}} $labels.dc {{`}}`}}&orgId=1&viewPanel=16' description: 'Operator has too many errors at logs: {{`{{`}} $value{{`}}`}}, check operator logs' summary: 'Too many errors at logs of operator: {{`{{`}} $value{{`}}`}}' condition: '{{ true }}' expr: |- sum( rate( operator_log_messages_total{ level="error",job=~".*((victoria.*)|vm)-?operator" }[5m] ) ) by ({{ $groupLabels }}) > 0 for: 15m labels: severity: warning show_at: dashboard - alert: ReconcileErrors annotations: dashboard: '{{`{{`}} $externalURL {{`}}`}}/d/1H179hunk/victoriametrics-operator?ds={{`{{`}} $labels.dc {{`}}`}}&orgId=1&viewPanel=10' description: 'Operator cannot parse response from k8s api server, possible bug: {{`{{`}} $value {{`}}`}}, check operator logs' summary: 'Too many errors at reconcile loop of operator: {{`{{`}} $value{{`}}`}}' condition: '{{ true }}' expr: |- sum( rate( controller_runtime_reconcile_errors_total{ job=~".*((victoria.*)|vm)-?operator" }[5m] ) ) by ({{ $groupLabels }}) > 0 for: 10m labels: severity: warning show_at: dashboard - alert: HighQueueDepth annotations: dashboard: '{{`{{`}} $externalURL {{`}}`}}/d/1H179hunk/victoriametrics-operator?ds={{`{{`}} $labels.dc {{`}}`}}&orgId=1&viewPanel=20' description: 'Operator cannot handle reconciliation load for controller: `{{`{{`}}- $labels.name {{`}}`}}`, current depth: {{`{{`}} $value {{`}}`}}' summary: 'Too many `{{`{{`}}- $labels.name {{`}}`}}` in queue: {{`{{`}} $value {{`}}`}}' condition: '{{ true }}' expr: |- sum( workqueue_depth{ job=~".*((victoria.*)|vm)-?operator", name=~"(vmagent|vmalert|vmalertmanager|vmauth|vmcluster|vmnodescrape|vmpodscrape|vmprobe|vmrule|vmservicescrape|vmsingle|vmstaticscrape)" } ) by (name,{{ $groupLabels }}) > 10 for: 15m labels: severity: warning show_at: dashboard