{{- $Values := (.helm).Values | default .Values }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} {{- $multicluster := ((($Values.grafana).sidecar).dashboards).multicluster | default false }} {{- $defaultDatasource := "prometheus" -}} {{- range (((($Values.grafana).sidecar).datasources).victoriametrics | default list) }} {{- if and .isDefault .type }}{{ $defaultDatasource = .type }}{{- end }} {{- end }} annotations: list: - builtIn: 1 datasource: type: datasource uid: grafana enable: true hide: true iconColor: rgba(0, 211, 255, 1) name: Annotations & Alerts target: limit: 100 matchAny: false tags: [] type: dashboard type: dashboard - datasource: type: prometheus uid: $ds enable: true expr: sum(vm_app_version{job=~"$job", instance=~"$instance"}) by(short_version) unless (sum(vm_app_version{job=~"$job", instance=~"$instance"} offset $__interval) by(short_version)) hide: true iconColor: dark-blue name: version textFormat: '{{`{{`}}short_version{{`}}`}}' titleFormat: Version change - datasource: type: prometheus uid: $ds enable: true expr: sum(changes(vm_app_start_timestamp{job=~"$job", instance=~"$instance"}[$__interval])) by(job, instance) hide: false iconColor: dark-yellow name: restarts textFormat: '{{`{{`}}job{{`}}`}}:{{`{{`}}instance{{`}}`}} restarted' condition: {{ true }} description: Overview for VictoriaMetrics vmagent v1.117.0 or higher editable: false fiscalYearStartMonth: 0 graphTooltip: 1 id: 2 links: - icon: doc tags: [] targetBlank: true title: vmagent wiki tooltip: '' type: link url: https://docs.victoriametrics.com/victoriametrics/vmagent/ - icon: external link tags: [] targetBlank: true title: Found a bug? type: link url: https://github.com/VictoriaMetrics/VictoriaMetrics/issues - icon: external link tags: [] targetBlank: true title: New releases type: link url: https://github.com/VictoriaMetrics/VictoriaMetrics/releases - asDropdown: false icon: external link includeVars: false keepTime: false tags: [] targetBlank: true title: Troubleshooting tooltip: '' type: link url: https://docs.victoriametrics.com/victoriametrics/vmagent/#troubleshooting panels: - collapsed: false gridPos: h: 1 w: 24 x: 0 'y': 0 id: 105 panels: [] title: Stats type: row - datasource: type: {{ $defaultDatasource }} uid: $ds description: Shows the rate of [samples](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#raw-samples) scraped from configured targets. fieldConfig: defaults: mappings: [] min: 0 thresholds: mode: absolute steps: - color: green value: null overrides: [] gridPos: h: 3 w: 6 x: 0 'y': 1 id: 103 options: colorMode: value graphMode: area justifyMode: auto orientation: auto percentChangeColorMode: standard reduceOptions: calcs: - last fields: '' values: false showPercentChange: false text: {} textMode: auto wideLayout: true pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(rate(vm_promscrape_scraped_samples_sum{job=~"$job", instance=~"$instance"}[$__rate_interval])) interval: '' legendFormat: __auto range: true refId: A title: Samples scraped/s type: stat - datasource: type: {{ $defaultDatasource }} uid: $ds description: Shows the number of targets scraped per second. fieldConfig: defaults: decimals: 1 mappings: [] min: 0 thresholds: mode: absolute steps: - color: green value: null overrides: [] gridPos: h: 3 w: 6 x: 6 'y': 1 id: 134 options: colorMode: value graphMode: none justifyMode: auto orientation: auto percentChangeColorMode: standard reduceOptions: calcs: - last fields: '' values: false showPercentChange: false text: {} textMode: auto wideLayout: true pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(rate(vm_promscrape_scrapes_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) interval: '' legendFormat: __auto range: true refId: A title: Targets scraped/s type: stat - datasource: type: {{ $defaultDatasource }} uid: $ds description: Shows number of generated error messages in logs over last 30m. Non-zero value may be a sign of connectivity or misconfiguration errors. fieldConfig: defaults: mappings: [] min: 0 thresholds: mode: absolute steps: - color: green value: null - color: red value: 1 unit: short overrides: [] gridPos: h: 3 w: 6 x: 12 'y': 1 id: 16 links: - targetBlank: true title: Troubleshooting url: https://docs.victoriametrics.com/victoriametrics/vmagent/#troubleshooting options: colorMode: value graphMode: area justifyMode: auto orientation: auto percentChangeColorMode: standard reduceOptions: calcs: - last fields: '' values: false showPercentChange: false text: {} textMode: auto wideLayout: true pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds expr: sum(increase(vm_log_messages_total{job=~"$job", instance=~"$instance", level!="info"}[30m])) interval: '' legendFormat: '' refId: A title: Log errors (30m) type: stat - datasource: type: {{ $defaultDatasource }} uid: $ds description: 'Total number of available CPUs for selected vmagents. ' fieldConfig: defaults: color: mode: thresholds mappings: [] thresholds: mode: absolute steps: - color: green value: null unit: short overrides: [] gridPos: h: 3 w: 6 x: 18 'y': 1 id: 152 maxDataPoints: 100 options: colorMode: value graphMode: area justifyMode: auto orientation: horizontal percentChangeColorMode: standard reduceOptions: calcs: - lastNotNull fields: '' values: false showPercentChange: false text: {} textMode: auto wideLayout: true pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: true expr: sum(vm_available_cpu_cores{job=~"$job", instance=~"$instance"}) format: time_series instant: true interval: '' intervalFactor: 1 legendFormat: '' refId: A title: Available CPU type: stat - datasource: type: {{ $defaultDatasource }} uid: $ds description: Shows the rate of ingested [samples](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#raw-samples) fieldConfig: defaults: mappings: [] min: 0 thresholds: mode: absolute steps: - color: green value: null overrides: [] gridPos: h: 3 w: 6 x: 0 'y': 4 id: 102 options: colorMode: value graphMode: area justifyMode: auto orientation: auto percentChangeColorMode: standard reduceOptions: calcs: - last fields: '' values: false showPercentChange: false text: {} textMode: auto wideLayout: true pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(rate(vmagent_rows_inserted_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) interval: '' legendFormat: __auto range: true refId: A title: Samples ingested/s type: stat - datasource: type: {{ $defaultDatasource }} uid: $ds description: "Shows total number of all configured scrape targets in state `up` or `down`.\n\nSee `http://vmagent-host:8429/targets` to get list of all targets. \n" fieldConfig: defaults: mappings: [] min: 0 thresholds: mode: absolute steps: - color: green value: null overrides: - matcher: id: byName options: down properties: - id: thresholds value: mode: absolute steps: - color: green value: null - color: red value: 1 gridPos: h: 3 w: 6 x: 6 'y': 4 id: 72 options: colorMode: value graphMode: area justifyMode: auto orientation: auto percentChangeColorMode: standard reduceOptions: calcs: - last fields: '' values: false showPercentChange: false text: {} textMode: auto wideLayout: true pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(vm_promscrape_targets{job=~"$job", instance=~"$instance", status="up"}) interval: '' legendFormat: up range: true refId: A - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(vm_promscrape_targets{job=~"$job", instance=~"$instance", status="down"}) hide: false interval: '' legendFormat: down range: true refId: B title: Scrape targets type: stat - datasource: type: {{ $defaultDatasource }} uid: $ds description: "Persistent queue size shows size of pending samples in bytes which hasn't been flushed to remote storage yet. \nIncreasing of value might be a sign of connectivity issues. In such cases, vmagent starts to flush pending data on disk with attempt to send it later once connection is restored." fieldConfig: defaults: mappings: [] min: 0 thresholds: mode: absolute steps: - color: green value: null - color: red value: 10485760 unit: bytes overrides: [] gridPos: h: 3 w: 6 x: 12 'y': 4 id: 56 options: colorMode: value graphMode: area justifyMode: auto orientation: auto percentChangeColorMode: standard reduceOptions: calcs: - last fields: '' values: false showPercentChange: false text: {} textMode: auto wideLayout: true pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds expr: sum(vmagent_remotewrite_pending_data_bytes{job=~"$job", instance=~"$instance"}) interval: '' legendFormat: '' refId: A title: Persistent queue size type: stat - datasource: type: {{ $defaultDatasource }} uid: $ds description: Total size of available memory for selected vmagents. fieldConfig: defaults: color: mode: thresholds mappings: [] thresholds: mode: absolute steps: - color: green value: null unit: bytes overrides: [] gridPos: h: 3 w: 6 x: 18 'y': 4 id: 153 maxDataPoints: 100 options: colorMode: value graphMode: area justifyMode: auto orientation: horizontal percentChangeColorMode: standard reduceOptions: calcs: - lastNotNull fields: '' values: false showPercentChange: false text: {} textMode: auto wideLayout: true pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: true expr: sum(vm_available_memory_bytes{job=~"$job", instance=~"$instance"}) format: time_series instant: true interval: '' intervalFactor: 1 legendFormat: '' refId: A title: Available memory type: stat - datasource: type: {{ $defaultDatasource }} uid: $ds fieldConfig: defaults: color: mode: thresholds custom: align: auto cellOptions: type: auto inspect: false minWidth: 50 mappings: [] thresholds: mode: absolute steps: - color: green value: null - color: red value: 80 overrides: - matcher: id: byName options: Time properties: - id: custom.hidden value: true - matcher: id: byName options: Value properties: - id: displayName value: Count gridPos: h: 5 w: 8 x: 0 'y': 7 id: 101 options: cellHeight: sm footer: countRows: false fields: '' reducer: - sum show: false showHeader: true pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: false expr: sum(vm_app_version{job=~"$job", instance=~"$instance"}) by(job, short_version) format: table instant: true range: false refId: A title: '' type: table - datasource: type: {{ $defaultDatasource }} uid: $ds fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: stepAfter lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' decimals: 0 links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green value: null - color: red value: 80 unit: short overrides: [] gridPos: h: 5 w: 16 x: 8 'y': 7 id: 13 options: legend: calcs: - lastNotNull displayMode: table placement: right showLegend: true sortBy: Last * sortDesc: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(up{job=~"$job", instance=~"$instance"}) by (job) format: time_series instant: false interval: '' legendFormat: __auto refId: A title: Uptime type: timeseries - collapsed: false gridPos: h: 1 w: 24 x: 0 'y': 12 id: 24 panels: [] title: Overview type: row - datasource: type: {{ $defaultDatasource }} uid: $ds description: "Shows in/out [samples](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#raw-samples) rate including push and pull models. \n\nThe out-rate could be different to in-rate because of replication or additional timeseries added by vmagent for every scraped target.\n\nClick on the line and choose Drilldown to show CPU usage per instance\n" fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' links: - title: Drilldown url: /d/G7Z9GzMGz?viewPanel=123&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range} mappings: [] thresholds: mode: absolute steps: - color: green value: null - color: red value: 80 unit: short overrides: - matcher: id: byRegexp options: /out .*/ properties: - id: custom.transform value: negative-Y gridPos: h: 8 w: 12 x: 0 'y': 13 id: 5 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true sortBy: Last * sortDesc: true tooltip: hideZeros: false mode: multi sort: none pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(rate({__name__=~"vmagent_rows_inserted_total|vm_promscrape_scraped_samples_sum", job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job) hide: false interval: '' legendFormat: in {{`{{`}}job{{`}}`}} range: true refId: A - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(rate(vmagent_remotewrite_block_size_rows_sum{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job) interval: '' legendFormat: out {{`{{`}}job{{`}}`}} range: true refId: B title: Samples rate ($instance) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: "Shows the persistent queue size of pending samples in bytes >2MB which hasn't been flushed to remote storage yet. \n\nIncreasing of value might be a sign of connectivity issues. In such cases, vmagent starts to flush pending data on disk with attempt to send it later once connection is restored.\n\nRemote write URLs are hidden by default but might be unveiled once `-remoteWrite.showURL` is set to true.\n\nClick on the line and choose Drilldown to show the persistent queue size per instance.\n" fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' links: - targetBlank: true title: Drilldown url: /d/G7Z9GzMGz?viewPanel=125&var-url=${__field.labels.url}&var-ds=$ds&var-instance=$instance&var-job=$job&${__url_time_range} mappings: [] min: 0 thresholds: mode: absolute steps: - color: green value: null - color: red value: 80 unit: bytes overrides: [] gridPos: h: 8 w: 12 x: 12 'y': 13 id: 17 links: - title: Troubleshooting url: https://docs.victoriametrics.com/victoriametrics/vmagent/#troubleshooting options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true sortBy: Last * sortDesc: true tooltip: hideZeros: false mode: multi sort: none pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: true expr: sum(vmagent_remotewrite_pending_data_bytes{job=~"$job", instance=~"$instance", url=~"$url"}) by (job, url) > 2e6 interval: '' legendFormat: '{{`{{`}}job{{`}}`}} => {{`{{`}}url{{`}}`}}' range: true refId: A title: Persistent queue size ($instance) to ($url) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: Shows the rate of logging the messages by their level. Unexpected spike in rate is a good reason to check logs. fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: bars fillOpacity: 100 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: normal thresholdsStyle: mode: 'off' links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green value: null - color: red value: 80 unit: short overrides: [] gridPos: h: 8 w: 12 x: 0 'y': 21 id: 107 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true sortBy: Last * sortDesc: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: true expr: sum(rate(vm_log_messages_total{job=~"$job",instance=~"$instance", level!="info"}[$__rate_interval])) by (job, level) > 0 format: time_series hide: false interval: 5m intervalFactor: 1 legendFormat: '{{`{{`}}job{{`}}`}} - {{`{{`}}level{{`}}`}}' range: true refId: A title: Logging rate type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: 'Shows rate of dropped [samples](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#raw-samples) from persistent queue. vmagent drops samples from queue if in-memory and on-disk queues are full and it is unable to flush them to remote storage. The max size of on-disk queue is configured by `-remoteWrite.maxDiskUsagePerURL` flag.' fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green value: null - color: red value: 80 unit: decbytes overrides: [] gridPos: h: 8 w: 12 x: 12 'y': 21 id: 49 links: - targetBlank: true title: Troubleshooting url: https://docs.victoriametrics.com/victoriametrics/vmagent/#troubleshooting options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true tooltip: hideZeros: false mode: multi sort: none pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(rate(vm_persistentqueue_bytes_dropped_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by (path) > 0 interval: '' legendFormat: '{{`{{`}} path {{`}}`}}' range: true refId: A title: Persistent queue dropped rate ($instance) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: Shows the rate of requests served by vmagent HTTP server. fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green value: null - color: red value: 80 unit: short overrides: [] gridPos: h: 8 w: 12 x: 0 'y': 29 id: 15 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(rate(vmagent_http_requests_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job, path) > 0 interval: '' legendFormat: '' range: true refId: A title: Requests rate ($instance) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: Errors rate shows rate for multiple metrics that track possible errors in vmagent, such as network or parsing errors. fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green value: null - color: red value: 80 unit: short overrides: [] gridPos: h: 8 w: 12 x: 12 'y': 29 id: 69 links: - targetBlank: true title: Troubleshooting url: https://docs.victoriametrics.com/victoriametrics/vmagent/#troubleshooting options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(rate(vmagent_http_request_errors_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job, protocol) > 0 interval: '' legendFormat: requests:{{`{{`}}protocol{{`}}`}} ({{`{{`}}job{{`}}`}}) range: true refId: A - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(rate(vm_protoparser_read_errors_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job, type) > 0 interval: '' legendFormat: 'parse: {{`{{`}}type{{`}}`}} ({{`{{`}}job{{`}}`}})' range: true refId: B - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(rate(vm_ingestserver_request_errors_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job, type) > 0 interval: '' legendFormat: 'ingest: {{`{{`}}type{{`}}`}} ({{`{{`}}job{{`}}`}})' range: true refId: C - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(rate(vm_protoparser_unmarshal_errors_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job, type) > 0 interval: '' legendFormat: 'unmarshal: {{`{{`}}type{{`}}`}} ({{`{{`}}job{{`}}`}})' range: true refId: D - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(rate(vm_promscrape_dial_errors_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job) > 0 interval: '' legendFormat: scrape dial ({{`{{`}}job{{`}}`}}) range: true refId: E title: Errors rate ($instance) type: timeseries - collapsed: true gridPos: h: 1 w: 24 x: 0 'y': 37 id: 45 panels: - datasource: type: {{ $defaultDatasource }} uid: $ds description: 'Percentage of used RSS memory (resident). The RSS memory shows the amount of memory recently accessed by the application. It includes anonymous memory and data from recently accessed files (aka page cache). The application''s performance will significantly degrade when memory usage is close to 100%. Click on the line and choose Drilldown to show memory usage per instance' fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' links: - targetBlank: true title: Drilldown url: /d/G7Z9GzMGz?viewPanel=117&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range} mappings: [] min: 0 thresholds: mode: absolute steps: - color: green value: null - color: red value: 80 unit: percentunit overrides: [] gridPos: h: 8 w: 12 x: 0 'y': 38 id: 111 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true sortBy: Last * sortDesc: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: true expr: |- max( max_over_time(process_resident_memory_bytes{job=~"$job", instance=~"$instance"}[$__rate_interval]) / vm_available_memory_bytes{job=~"$job", instance=~"$instance"} ) by(job) interval: '' legendFormat: __auto range: true refId: A title: RSS memory % usage ($instance) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' links: - targetBlank: true title: Drilldown url: /d/G7Z9GzMGz?viewPanel=119&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range} mappings: [] min: 0 thresholds: mode: absolute steps: - color: green value: null - color: red value: 80 unit: percentunit overrides: [] gridPos: h: 8 w: 12 x: 12 'y': 38 id: 157 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true sortBy: Last * sortDesc: true tooltip: hideZeros: false mode: multi sort: none pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: false expr: |- max( rate(process_cpu_seconds_total{job=~"$job", instance=~"$instance"}[$__rate_interval]) / process_cpu_cores_available{job=~"$job", instance=~"$instance"} ) by(instance) format: time_series interval: '' intervalFactor: 1 legendFormat: __auto range: true refId: A title: CPU % usage type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: 'Share for memory allocated by the process itself. When memory usage reaches 100% it will be likely OOM-killed. Safe memory usage % considered to be below 80% Click on the line and choose Drilldown to show memory usage per instance' fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green value: null - color: red value: 80 unit: percentunit overrides: [] gridPos: h: 8 w: 12 x: 0 'y': 46 id: 155 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true sortBy: Last * sortDesc: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: false expr: |- max( max_over_time(process_resident_memory_anon_bytes{job=~"$job", instance=~"$instance"}[$__rate_interval]) / vm_available_memory_bytes{job=~"$job", instance=~"$instance"} ) by(instance) interval: '' legendFormat: __auto range: true refId: A title: RSS anonymous memory % usage type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: 'Shows CPU pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html). The lower the better.' fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: line decimals: 0 links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green value: null unit: s overrides: [] gridPos: h: 8 w: 12 x: 12 'y': 46 id: 158 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true sortBy: Last * sortDesc: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(rate(process_pressure_cpu_waiting_seconds_total{job=~"$job"}[$__rate_interval])) by (job, instance) format: time_series interval: '' intervalFactor: 2 legendFormat: '{{`{{`}}instance{{`}}`}} - waiting' range: true refId: A - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(rate(process_pressure_cpu_stalled_seconds_total{job=~"$job"}[$__rate_interval])) by (job, instance) format: time_series hide: false interval: '' intervalFactor: 2 legendFormat: '{{`{{`}}instance{{`}}`}} - stalled' range: true refId: B title: CPU pressure type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: 'Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html). The lower the better.' fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: line decimals: 0 links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green value: null unit: s overrides: [] gridPos: h: 8 w: 12 x: 0 'y': 54 id: 156 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true sortBy: Last * sortDesc: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(rate(process_pressure_memory_waiting_seconds_total{job=~"$job"}[$__rate_interval])) by (job, instance) format: time_series interval: '' intervalFactor: 2 legendFormat: '{{`{{`}}instance{{`}}`}} - waiting' range: true refId: A - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(rate(process_pressure_memory_stalled_seconds_total{job=~"$job"}[$__rate_interval])) by (job, instance) format: time_series hide: false interval: '' intervalFactor: 2 legendFormat: '{{`{{`}}instance{{`}}`}} - stalled' range: true refId: B title: Memory pressure type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: 'Shows the number of bytes read/write from the storage layer when vmagent has to buffer data on disk or read already buffered data. Click on the line and choose Drilldown to show CPU usage per instance' fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' links: - targetBlank: true title: Drilldown url: /d/G7Z9GzMGz?viewPanel=121&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range} mappings: [] thresholds: mode: absolute steps: - color: green value: null - color: red value: 80 unit: bytes overrides: - matcher: id: byName options: read properties: - id: custom.transform value: negative-Y gridPos: h: 8 w: 12 x: 12 'y': 54 id: 81 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true sortBy: Last * sortDesc: true tooltip: hideZeros: false mode: multi sort: none pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(rate(process_io_storage_read_bytes_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job) format: time_series hide: false interval: '' intervalFactor: 1 legendFormat: read {{`{{`}}job{{`}}`}} range: true refId: A - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(rate(process_io_storage_written_bytes_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job) format: time_series hide: false interval: '' intervalFactor: 1 legendFormat: write {{`{{`}}job{{`}}`}} range: true refId: B title: Disk writes/reads ($instance) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' decimals: 0 links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green value: null - color: red value: 80 unit: short overrides: [] gridPos: h: 8 w: 12 x: 0 'y': 62 id: 39 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(max_over_time(go_goroutines{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job) format: time_series interval: '' intervalFactor: 2 legendFormat: __auto range: true refId: A title: Goroutines ($instance) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: 'Shows IO pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html). The lower the better.' fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: line decimals: 0 links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green value: null unit: s overrides: [] gridPos: h: 8 w: 12 x: 12 'y': 62 id: 159 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true sortBy: Last * sortDesc: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(rate(process_pressure_io_waiting_seconds_total{job=~"$job"}[$__rate_interval])) by (job, instance) format: time_series interval: '' intervalFactor: 2 legendFormat: '{{`{{`}}instance{{`}}`}} - waiting' range: true refId: A - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(rate(process_pressure_io_stalled_seconds_total{job=~"$job"}[$__rate_interval])) by (job, instance) format: time_series hide: false interval: '' intervalFactor: 2 legendFormat: '{{`{{`}}instance{{`}}`}} - stalled' range: true refId: B title: IO pressure type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' decimals: 0 links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green value: null - color: red value: 80 unit: short overrides: [] gridPos: h: 8 w: 12 x: 0 'y': 70 id: 41 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(max_over_time(process_num_threads{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job) format: time_series intervalFactor: 2 legendFormat: __auto range: true refId: A title: Threads ($instance) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: 'Network usage shows the bytes rate for data accepted by vmagent and pushed via remotewrite protocol. Discrepancies are possible because of different protocols used for ingesting, scraping and writing data.' fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' links: [] mappings: [] thresholds: mode: absolute steps: - color: green value: null - color: red value: 80 unit: bps overrides: - matcher: id: byRegexp options: /out.*/ properties: - id: custom.transform value: negative-Y gridPos: h: 8 w: 12 x: 12 'y': 70 id: 7 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true sortBy: Last * sortDesc: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: "sum(rate(vm_tcplistener_read_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job) * 8 \n+ sum(rate(vm_promscrape_conn_bytes_read_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job) * 8" interval: '' legendFormat: in {{`{{`}}job{{`}}`}} range: true refId: A - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(rate(vmagent_remotewrite_conn_bytes_written_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job) * 8 interval: '' legendFormat: out {{`{{`}}job{{`}}`}} range: true refId: B title: Network usage ($instance) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: 'Shows the percent of CPU spent on garbage collection. If % is high, then CPU usage can be decreased by changing GOGC to higher values. Increasing GOGC value will increase memory usage, and decrease CPU usage. Try searching for keyword `GOGC` at https://docs.victoriametrics.com/victoriametrics/troubleshooting/ ' fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' decimals: 0 links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green value: null - color: red value: 80 unit: percentunit overrides: [] gridPos: h: 8 w: 12 x: 0 'y': 78 id: 135 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: "max(\n rate(go_gc_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) \n / rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n ) by(job)" format: time_series interval: '' intervalFactor: 2 legendFormat: __auto range: true refId: A title: CPU spent on GC ($instance) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: "Shows the time goroutines have spent in runnable state before actually running. The lower is better.\n\nHigh values or values exceeding the threshold is usually a sign of insufficient CPU resources or CPU throttling. \n\nVerify that service has enough CPU resources. Otherwise, the service could work unreliably with delays in processing." fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: line decimals: 0 links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green value: null - color: red value: 0.1 unit: s overrides: [] gridPos: h: 8 w: 12 x: 12 'y': 78 id: 149 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: max(histogram_quantile(0.99, sum(rate(go_sched_latencies_seconds_bucket{job=~"$job"}[$__rate_interval])) by (job, instance, le))) by(job) format: time_series interval: '' intervalFactor: 2 legendFormat: __auto range: true refId: A title: Go scheduling latency type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: Shows the rate of allocations in memory. Sudden increase in allocations would mean increased pressure on Go Garbage Collector and can saturate CPU resources of the application. fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: line decimals: 0 links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green value: null unit: bytes overrides: [] gridPos: h: 8 w: 12 x: 0 'y': 86 id: 154 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(rate(go_memstats_alloc_bytes_total{job=~"$job"}[$__rate_interval])) by (job, instance) format: time_series interval: '' intervalFactor: 2 legendFormat: __auto range: true refId: A title: Memory allocations rate type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: 'Panel shows the percentage of open file descriptors in the OS per instance. Reaching the limit of open files (100%) can cause various issues and must be prevented. See how to change limits here https://medium.com/@muhammadtriwibowo/set-permanently-ulimit-n-open-files-in-ubuntu-4d61064429a' fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' decimals: 5 links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green value: null - color: red value: 80 unit: percentunit overrides: [] gridPos: h: 8 w: 12 x: 12 'y': 86 id: 83 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: |- max( max_over_time(process_open_fds{job=~"$job", instance=~"$instance"}[$__rate_interval]) / process_max_fds{job=~"$job", instance=~"$instance"} ) by(job) format: time_series interval: '' intervalFactor: 2 legendFormat: __auto range: true refId: A title: Open FDs usage % ($instance) type: timeseries title: Resource usage type: row - collapsed: true gridPos: h: 1 w: 24 x: 0 'y': 38 id: 94 panels: - datasource: type: {{ $defaultDatasource }} uid: $ds description: Shows top 10 jobs by the number of new series registered by vmagent over the 5min range. These jobs generate the most of the churn rate. fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' mappings: [] thresholds: mode: absolute steps: - color: green - color: red value: 80 unit: short overrides: [] gridPos: h: 8 w: 12 x: 0 'y': 1580 id: 92 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true sortBy: Last * sortDesc: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: false expr: topk(10, sum(sum_over_time(scrape_series_added[5m])) by (job)) > 0 interval: '' legendFormat: '{{`{{`}} job {{`}}`}}' range: true refId: A title: Top 10 jobs by unique samples type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: Shows top 10 instances by the number of new series registered by vmagent over the 5min range. These instances generate the most of the churn rate. fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' mappings: [] thresholds: mode: absolute steps: - color: green - color: red value: 80 unit: short overrides: [] gridPos: h: 8 w: 12 x: 12 'y': 1580 id: 95 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true sortBy: Last * sortDesc: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: false expr: topk(10, sum(sum_over_time(scrape_series_added[5m])) by (instance)) > 0 interval: '' legendFormat: __auto range: true refId: A title: Top 10 instances by unique samples type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: Shows write saturation of the persistent queue. If the threshold of 0.9sec is reached, then the persistent queue is saturated by more than 90% and vmagent won't be able to keep up with flushing data on disk. In this case, consider to decrease load on the vmagent or improve the disk throughput. fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto axisSoftMax: 2 barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: line links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: transparent - color: red value: 0.9 unit: s overrides: [] gridPos: h: 8 w: 12 x: 0 'y': 1588 id: 98 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true sortBy: Last * sortDesc: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: true expr: max(rate(vm_persistentqueue_write_duration_seconds_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by (job) interval: '' legendFormat: __auto range: true refId: A title: Persistent queue write saturation ($instance) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: Shows read saturation of the persistent queue. If the threshold of 0.9sec is reached, then the persistent queue is saturated by more than 90% and vmagent won't be able to keep up with reading data from the disk. In this case, consider to decrease load on the vmagent or improve the disk throughput. fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto axisSoftMax: 2 barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: line links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: transparent - color: red value: 0.9 unit: s overrides: [] gridPos: h: 8 w: 12 x: 12 'y': 1588 id: 99 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true sortBy: Last * sortDesc: true tooltip: hideZeros: false mode: multi sort: none pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: true expr: max(rate(vm_persistentqueue_read_duration_seconds_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by (job) interval: '' legendFormat: __auto range: true refId: A title: Persistent queue read saturation ($instance) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: 'Shows the rate of dropped data blocks in cases when remote storage replies with `400 Bad Request` and `409 Conflict` HTTP responses. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1149' fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: normal thresholdsStyle: mode: 'off' links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green - color: red value: 80 unit: short overrides: [] gridPos: h: 8 w: 12 x: 0 'y': 1596 id: 79 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true sortBy: Last * sortDesc: true tooltip: hideZeros: false mode: multi sort: none pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: true expr: sum(rate(vmagent_remotewrite_packets_dropped_total{job=~"$job", instance=~"$instance", url=~"$url"}[$__rate_interval])) by(job, url) > 0 interval: '' legendFormat: __auto range: true refId: A title: Data blocks dropped ($instance) to ($url) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: "Shows the rate of dropped [samples](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#raw-samples) due to relabeling. \nMetric tracks drops for `-remoteWrite.relabelConfig` configuration only." fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: normal thresholdsStyle: mode: 'off' links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green - color: red value: 80 unit: short overrides: [] gridPos: h: 8 w: 12 x: 12 'y': 1596 id: 18 links: - targetBlank: true title: Relabeling url: https://docs.victoriametrics.com/victoriametrics/relabeling/ options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true sortBy: Last * sortDesc: true tooltip: hideZeros: false mode: multi sort: none pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(rate(vmagent_remotewrite_relabel_metrics_dropped_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job, url) > 0 interval: '' legendFormat: __auto range: true refId: B title: Rows dropped by relabeling ($instance) to ($url) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: Shows the rate of parsed datapoints from write or scrape requests. fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green - color: red value: 80 unit: short overrides: [] gridPos: h: 8 w: 12 x: 0 'y': 1604 id: 127 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: true expr: sum(rate(vm_protoparser_rows_read_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job, type) > 0 interval: '' legendFormat: '{{`{{`}} type {{`}}`}} ({{`{{`}}job{{`}}`}})' range: true refId: A title: Datapoints rate ($instance) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: Tracks the rate of dropped invalid rows because of errors while unmarshaling write requests. The exact errors messages will be printed in logs. fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green - color: red value: 80 unit: short overrides: [] gridPos: h: 8 w: 12 x: 12 'y': 1604 id: 50 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: true expr: sum(rate(vm_rows_invalid_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job, type) > 0 interval: '' legendFormat: '{{`{{`}}type{{`}}`}} ({{`{{`}}job{{`}}`}})' range: true refId: A title: Invalid datapoints rate ($instance) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds fieldConfig: defaults: color: mode: thresholds custom: align: auto cellOptions: type: auto inspect: false mappings: [] thresholds: mode: absolute steps: - color: green - color: red value: 80 overrides: - matcher: id: byName options: Value properties: - id: custom.hidden value: true - matcher: id: byName options: Time properties: - id: custom.hidden value: true gridPos: h: 7 w: 12 x: 0 'y': 1612 id: 129 options: cellHeight: sm footer: countRows: false fields: '' reducer: - sum show: false showHeader: true sortBy: - desc: true displayName: job pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: false expr: sum(flag{is_set="true", job=~"$job", instance=~"$instance"}) by(job, instance, name, value) format: table instant: true legendFormat: __auto range: false refId: A title: Non-default flags transformations: - id: groupBy options: fields: instance: aggregations: [] job: aggregations: [] operation: groupby name: aggregations: [] operation: groupby value: aggregations: [] operation: groupby type: table - datasource: type: {{ $defaultDatasource }} uid: $ds description: 'Shows the number of restarts per job. The chart can be useful to identify periodic process restarts and correlate them with potential issues or anomalies. Normally, processes shouldn''t restart unless restart was inited by user. The reason of restarts should be figured out by checking the logs of each specific service. ' fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto axisSoftMin: 0 barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: stepAfter lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' decimals: 0 links: [] mappings: [] thresholds: mode: absolute steps: - color: green - color: red value: 80 unit: none overrides: [] gridPos: h: 7 w: 12 x: 12 'y': 1612 id: 150 options: legend: calcs: - lastNotNull displayMode: table placement: bottom showLegend: true sortBy: Last * sortDesc: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(changes(vm_app_start_timestamp{job=~"$job", instance=~"$instance"}[$__rate_interval]) > 0) by(job) format: time_series instant: false legendFormat: '{{`{{`}}job{{`}}`}}' refId: A title: Restarts ($job) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: Shows how many samples were ignored on insertion due to various reasons. See the reason of rejection application logs. fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' mappings: [] thresholds: mode: absolute steps: - color: green - color: red value: 80 unit: short overrides: [] gridPos: h: 8 w: 12 x: 0 'y': 1619 id: 151 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true sortBy: Last * sortDesc: true tooltip: hideZeros: false mode: multi sort: none pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: true expr: sum(increase(vm_rows_ignored_total{job=~"$job", instance=~"$instance"}[1h])) by (reason) interval: '' legendFormat: __auto range: true refId: A title: Rows ignored for last 1h ($instance) type: timeseries title: Troubleshooting type: row - collapsed: true gridPos: h: 1 w: 24 x: 0 'y': 39 id: 28 panels: - datasource: type: {{ $defaultDatasource }} uid: $ds fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green - color: red value: 80 unit: short overrides: [] gridPos: h: 7 w: 12 x: 0 'y': 1825 id: 48 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true sortBy: Last * sortDesc: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: true expr: sum(vm_promscrape_targets{job=~"$job", instance=~"$instance", status="up"}) by(job, type) > 0 format: time_series interval: '' legendFormat: '{{`{{`}}type{{`}}`}} ({{`{{`}}job{{`}}`}})' range: true refId: A title: Scrape targets UP(By Type) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 10 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green - color: red value: 80 unit: short overrides: [] gridPos: h: 7 w: 12 x: 12 'y': 1825 id: 76 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: true expr: sum(vm_promscrape_targets{job=~"$job", instance=~"$instance", status="down"}) by(job, type) > 0 format: time_series interval: '' legendFormat: '{{`{{`}}type{{`}}`}} ({{`{{`}}job{{`}}`}})' range: true refId: A title: Scrape targets DOWN(By Type) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green - color: red value: 80 unit: short overrides: [] gridPos: h: 7 w: 12 x: 0 'y': 1832 id: 132 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true sortBy: Last * sortDesc: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: true expr: sum(vm_promscrape_scrape_pool_targets{job=~"$job", instance=~"$instance", status="up"}) by(job, scrape_job) > 0 format: time_series interval: '' legendFormat: '{{`{{`}}job{{`}}`}}: {{`{{`}}scrape_job{{`}}`}}' range: true refId: A title: Scrape targets UP(By Job) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 10 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green - color: red value: 80 unit: short overrides: [] gridPos: h: 7 w: 12 x: 12 'y': 1832 id: 133 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: true expr: sum(vm_promscrape_scrape_pool_targets{job=~"$job", instance=~"$instance", status="down"}) by(job, scrape_job) > 0 format: time_series interval: '' legendFormat: '{{`{{`}}job{{`}}`}}: {{`{{`}}scrape_job{{`}}`}}' range: true refId: A title: Scrape targets DOWN(By Job) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: Shows the number of scrapes per second. fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green - color: red value: 80 unit: short overrides: [] gridPos: h: 8 w: 12 x: 0 'y': 1839 id: 20 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: true expr: sum(rate(vm_promscrape_scrapes_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job) interval: '' legendFormat: __auto range: true refId: A title: Scrape rate ($instance) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: Shows the number of datapoints scraped per second. fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green - color: red value: 80 unit: short overrides: [] gridPos: h: 8 w: 12 x: 12 'y': 1839 id: 126 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: true expr: sum(rate(vm_promscrape_scraped_samples_sum{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job) interval: '' legendFormat: __auto range: true refId: A title: Scraped datapoints rate ($instance) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green - color: red value: 80 unit: bytes overrides: [] gridPos: h: 8 w: 12 x: 0 'y': 1847 id: 46 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: max(histogram_quantile(0.99, sum(rate(vm_promscrape_scrape_response_size_bytes_bucket{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job, vmrange))) by(job) format: time_series interval: '' legendFormat: __auto range: true refId: A title: Scrape response size 0.99 quantile ($instance) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green - color: red value: 80 unit: s overrides: [] gridPos: h: 8 w: 12 x: 12 'y': 1847 id: 148 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: max(histogram_quantile(0.99, sum(rate(vm_promscrape_scrape_duration_seconds_bucket{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job, vmrange))) by(job) format: time_series interval: '' legendFormat: __auto range: true refId: A title: Scrape duration 0.99 quantile ($instance) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 10 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green - color: red value: 80 unit: short overrides: [] gridPos: h: 8 w: 12 x: 12 'y': 1855 id: 31 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(rate(vm_promscrape_scrapes_failed_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job) > 0 interval: '' legendFormat: scrapes failed ({{`{{`}}job{{`}}`}}) range: true refId: A - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(rate(vm_promscrape_scrapes_timed_out_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job) > 0 interval: '' legendFormat: timeouts ({{`{{`}}job{{`}}`}}) range: true refId: B - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(rate(vm_promscrape_scrapes_gunzip_failed_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job) > 0 interval: '' legendFormat: gunzip fails ({{`{{`}}job{{`}}`}}) range: true refId: C - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(rate(vm_promscrape_dial_errors_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job) > 0 interval: '' legendFormat: dial fails ({{`{{`}}job{{`}}`}}) range: true refId: D - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(rate(vm_promscrape_max_scrape_size_exceeded_errors_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job) > 0 hide: false interval: '' legendFormat: max scrape size exceeded ({{`{{`}}job{{`}}`}}) range: true refId: E title: Scrape fails ($instance) type: timeseries title: Scraping type: row - collapsed: true gridPos: h: 1 w: 24 x: 0 'y': 40 id: 71 panels: - datasource: type: {{ $defaultDatasource }} uid: $ds description: Shows the rate of write requests served by ingestserver (UDP, TCP connections) and HTTP server. fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green - color: red value: 80 unit: short overrides: [] gridPos: h: 8 w: 12 x: 0 'y': 1547 id: 73 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true tooltip: mode: multi sort: desc pluginVersion: 9.2.6 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: true expr: sum(rate(vm_ingestserver_requests_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job, type, net) > 0 interval: '' legendFormat: '{{`{{`}}net{{`}}`}}: {{`{{`}} type {{`}}`}} ({{`{{`}}job{{`}}`}})' range: true refId: A - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: true expr: sum(rate(vmagent_http_requests_total{job=~"$job", instance=~"$instance", protocol!=""}[$__rate_interval])) by(job, protocol) > 0 interval: '' legendFormat: '{{`{{`}} protocol {{`}}`}}: http ({{`{{`}}job{{`}}`}})' range: true refId: B title: Requests rate ($instance) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: Shows the rate of rows ingested in vmagent via push protocols. fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green - color: red value: 80 unit: short overrides: [] gridPos: h: 8 w: 12 x: 12 'y': 1547 id: 131 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true tooltip: mode: multi sort: desc pluginVersion: 9.2.6 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: true expr: sum(rate(vmagent_rows_inserted_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job, type) > 0 interval: '' legendFormat: '{{`{{`}} type {{`}}`}} ({{`{{`}}job{{`}}`}})' range: true refId: A title: Rows rate ($instance) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: "Shows how many concurrent inserts are taking place.\n\nIf the number of concurrent inserts hitting the `limit` or is close to the `limit` constantly - it might be a sign of a resource shortage.\n\n If vmagent's CPU usage and remote write connection saturation are at normal level, it might be that `-maxConcurrentInserts` cmd-line flag need to be increased." fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green - color: red value: 80 unit: short overrides: [] gridPos: h: 8 w: 12 x: 0 'y': 1823 id: 130 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true tooltip: mode: multi sort: desc pluginVersion: 9.2.6 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: true expr: max_over_time(vm_concurrent_insert_current{job=~"$job", instance=~"$instance"}[$__rate_interval]) interval: '' legendFormat: '{{`{{`}}instance{{`}}`}} ({{`{{`}}job{{`}}`}})' range: true refId: A - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: true expr: min(vm_concurrent_insert_capacity{job=~"$job", instance=~"$instance"}) by(job) interval: '' legendFormat: limit ({{`{{`}}job{{`}}`}}) range: true refId: B title: Concurrent inserts ($instance) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: Shows the rate of write errors in ingestserver (UDP, TCP connections) and HTTP server. fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green - color: red value: 80 unit: short overrides: [] gridPos: h: 8 w: 12 x: 12 'y': 1823 id: 77 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true tooltip: mode: multi sort: desc pluginVersion: 9.2.6 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds exemplar: true expr: sum(rate(vm_ingestserver_request_errors_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(type, net) > 0 interval: '' legendFormat: '{{`{{`}} type {{`}}`}} ({{`{{`}}net{{`}}`}})' refId: A - datasource: type: {{ $defaultDatasource }} uid: $ds exemplar: true expr: sum(rate(vmagent_http_request_errors_total{job=~"$job", instance=~"$instance", protocol!=""}[$__rate_interval])) by(protocol) > 0 interval: '' legendFormat: '{{`{{`}} protocol {{`}}`}} (http)' refId: B title: Error rate ($instance) type: timeseries title: Ingestion type: row - collapsed: true gridPos: h: 1 w: 24 x: 0 'y': 41 id: 136 panels: - datasource: type: {{ $defaultDatasource }} uid: $ds description: "Shows the number of matched samples by the aggregation rule. \n\nThe more samples is matched, the more work this aggregation rule does. The matching rule is specified via `match` param.\n\nSee more details in [stream aggregation config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#stream-aggregation-config). " fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto axisSoftMin: 0 barAlignment: 0 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: auto spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' mappings: [] thresholds: mode: absolute steps: - color: green - color: red value: 80 overrides: [] gridPos: h: 8 w: 12 x: 0 'y': 1870 id: 146 options: legend: calcs: - min - lastNotNull - max displayMode: table placement: bottom showLegend: true sortBy: Last * sortDesc: true tooltip: mode: single sort: none targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(rate(vm_streamaggr_matched_samples_total{job=~"$job",instance=~"$instance", url=~"$url"}[$__rate_interval])) without (instance, pod) > 0 instant: false legendFormat: __auto range: true refId: A title: Matched samples ($instance) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: "The rate of ignored samples during aggregation. \nStream aggregation will drop samples with NaN values, or samples with too old timestamps. See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#ignoring-old-samples " fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto axisSoftMin: 0 barAlignment: 0 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: auto spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' mappings: [] thresholds: mode: absolute steps: - color: green - color: red value: 80 overrides: [] gridPos: h: 8 w: 12 x: 12 'y': 1870 id: 143 options: legend: calcs: [] displayMode: list placement: bottom showLegend: true tooltip: mode: single sort: none targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(rate(vm_streamaggr_ignored_samples_total{job=~"$job",instance=~"$instance", url=~"$url"}[$__rate_interval]) > 0) without (instance, pod) instant: false legendFormat: __auto range: true refId: A title: Ignored samples ($instance) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: "Shows the number of produced samples by the aggregation rule. \n\nNumber of produced samples depend on params like `by`, `without`, `interval`, etc.\n\nSee more details in [stream aggregation config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#stream-aggregation-config). " fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto axisSoftMin: 0 barAlignment: 0 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: auto spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' mappings: [] thresholds: mode: absolute steps: - color: green - color: red value: 80 overrides: [] gridPos: h: 8 w: 12 x: 0 'y': 1878 id: 147 options: legend: calcs: - min - lastNotNull - max displayMode: table placement: bottom showLegend: true sortBy: Last * sortDesc: true tooltip: mode: single sort: none targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(rate(vm_streamaggr_output_samples_total{job=~"$job",instance=~"$instance", url=~"$url"}[$__rate_interval])) without (instance, pod) > 0 instant: false legendFormat: __auto range: true refId: A title: Produced samples ($instance) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: "Shows events when deduplication or aggregation couldn't be finished in the configured interval. Such events may result into bad accuracy of produced data.\n\nPossible solutions:\n* increase interval; \n* use match filter matching smaller number of series;\n* reduce samples ingestion rate to stream aggregation" fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto axisSoftMin: 0 barAlignment: 0 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: auto spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' mappings: [] min: -5 thresholds: mode: absolute steps: - color: green - color: red value: 80 overrides: [] gridPos: h: 8 w: 12 x: 12 'y': 1878 id: 139 options: legend: calcs: [] displayMode: list placement: bottom showLegend: true tooltip: mode: single sort: none targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: increase(vm_streamaggr_flush_timeouts_total{job=~"$job",instance=~"$instance", url=~"$url"}[$__rate_interval]) > 0 instant: false legendFormat: 'aggregation: {{`{{`}}url{{`}}`}} ({{`{{`}}job{{`}}`}}): {{`{{`}}path{{`}}`}}:{{`{{`}}position{{`}}`}}' range: true refId: A - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: increase(vm_streamaggr_dedup_flush_timeouts_total{job=~"$job",instance=~"$instance", url=~"$url"}[$__rate_interval]) > 0 hide: false instant: false legendFormat: 'deduplication: {{`{{`}}url{{`}}`}} ({{`{{`}}job{{`}}`}}): {{`{{`}}path{{`}}`}}:{{`{{`}}position{{`}}`}}' range: true refId: B title: Flush timeouts ($instance) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: 'Shows the max lag between samples timestamps within one batch passed to the aggregation input. Lower is better. Too high lag or lag exceeding the interval might be a sign that data was delayed before aggregation or resource insufficiency on aggregator. Samples with high lag may affect accuracy of aggregation. See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#ignoring-old-samples' fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto axisSoftMin: 0 barAlignment: 0 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: auto spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' mappings: [] min: 0 thresholds: mode: absolute steps: - color: green - color: red value: 80 unit: s overrides: [] gridPos: h: 8 w: 12 x: 0 'y': 1886 id: 142 options: legend: calcs: [] displayMode: list placement: bottom showLegend: true tooltip: mode: single sort: none targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: histogram_quantile(0.99, sum(rate(vm_streamaggr_samples_lag_seconds_bucket{job=~"$job",instance=~"$instance", url=~"$url"}[$__rate_interval])) without (instance, pod)) instant: false legendFormat: __auto range: true refId: A title: Samples lag 0.99 quantile ($instance) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: "The 99th percentile of avg flush duration for the aggregated data. \n\nSmaller is better.\n\nAggregation can produce incorrect results ff flush duration exceeds configured deduplication interval. See \"Flush Timeouts\" panel." fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto axisSoftMin: 0 barAlignment: 0 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: auto spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' mappings: [] thresholds: mode: absolute steps: - color: green - color: red value: 80 unit: s overrides: [] gridPos: h: 8 w: 12 x: 12 'y': 1886 id: 137 options: legend: calcs: [] displayMode: list placement: bottom showLegend: true tooltip: mode: single sort: none targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: histogram_quantile(0.99, rate(vm_streamaggr_dedup_flush_duration_seconds_bucket{job=~"$job",instance=~"$instance", url=~"$url"}[$__rate_interval])) instant: false legendFormat: __auto range: true refId: A title: Dedup flush duration 0.99 quantile ($instance) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: 'Shows the size of Label Compressor in number of entries. Labels compressor encodes label-value pairs during aggregation to optimise memory usage. It is expected for its size to grow with time and to reset on vmagent restarts. Rapid spikes in Label compressor size might be a sign of significant changes in labels of received samples.' fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto axisSoftMin: 0 barAlignment: 0 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: auto spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' mappings: [] thresholds: mode: absolute steps: - color: green - color: red value: 80 overrides: - matcher: id: byRegexp options: /bytes.*/ properties: - id: custom.axisPlacement value: right - id: unit value: bytes gridPos: h: 8 w: 12 x: 12 'y': 1926 id: 141 options: legend: calcs: [] displayMode: list placement: bottom showLegend: true tooltip: mode: single sort: none targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: max(vm_streamaggr_labels_compressor_items_count{job=~"$job",instance=~"$instance"}) by(job, instance) hide: false instant: false legendFormat: 'items: {{`{{`}}instance{{`}}`}} ({{`{{`}}job{{`}}`}})' range: true refId: A - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: max(vm_streamaggr_labels_compressor_size_bytes{job=~"$job", instance=~"$instance"}) by(job, instance) hide: false instant: false legendFormat: 'bytes: {{`{{`}}instance{{`}}`}} ({{`{{`}}job{{`}}`}})' range: true refId: B title: Labels compressor ($instance) type: timeseries title: Streaming aggregation type: row - collapsed: true gridPos: h: 1 w: 24 x: 0 'y': 42 id: 58 panels: - datasource: type: {{ $defaultDatasource }} uid: $ds description: 'Shows the rate of requests to configured remote write endpoints by url and status code. Remote write URLs are hidden by default but might be unveiled once `-remoteWrite.showURL` is set to true. ' fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' decimals: 2 links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green - color: red value: 80 unit: short overrides: [] gridPos: h: 8 w: 12 x: 0 'y': 1828 id: 60 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: true expr: sum(rate(vmagent_remotewrite_requests_total{job=~"$job", instance=~"$instance", url=~"$url"}[$__rate_interval])) by(job, url, status_code) > 0 interval: '' legendFormat: '' range: true refId: A title: Requests rate ($instance) to ($url) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: Shows the global rate for number of written bytes via remote write connections. fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green - color: red value: 80 unit: decbytes overrides: [] gridPos: h: 8 w: 12 x: 12 'y': 1828 id: 66 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: true expr: sum(rate(vmagent_remotewrite_conn_bytes_written_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job) > 0 interval: '' legendFormat: __auto range: true refId: A title: Bytes write rate ($instance) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: 'Shows requests retry rate by url. Number of retries is unlimited but protected with delays up to 1m between attempts. Remote write URLs are hidden by default but might be unveiled once `-remoteWrite.showURL` is set to true. ' fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green - color: red value: 80 unit: short overrides: [] gridPos: h: 8 w: 12 x: 0 'y': 1836 id: 61 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true tooltip: hideZeros: false mode: multi sort: none pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: true expr: sum(rate(vmagent_remotewrite_retries_count_total{job=~"$job", instance=~"$instance", url=~"$url"}[$__rate_interval])) by(url) > 0 interval: '' legendFormat: __auto range: true refId: A title: Retry rate ($instance) to ($url) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: 'Shows current number of established connections to remote write endpoints. ' fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green - color: red value: 80 unit: short overrides: [] gridPos: h: 8 w: 12 x: 12 'y': 1836 id: 65 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: true expr: sum(max_over_time(vmagent_remotewrite_conns{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job) interval: '' legendFormat: __auto range: true refId: A title: Connections ($instance) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: 'Shows the current limit usage of unique series over an hourly period. Vmagent will start to drop series once the limit is reached. Please note, panel will be blank if `remoteWrite.maxHourlySeries` is not set.' fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: line mappings: [] max: 100 thresholds: mode: absolute steps: - color: transparent - color: red value: 0.9 unit: percent overrides: [] gridPos: h: 8 w: 12 x: 0 'y': 1844 id: 88 options: legend: calcs: [] displayMode: list placement: bottom showLegend: false tooltip: hideZeros: false mode: multi sort: none pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: true expr: "max(\n vmagent_hourly_series_limit_current_series{job=~\"$job\", instance=~\"$instance\"} \n / \n vmagent_hourly_series_limit_max_series{job=~\"$job\", instance=~\"$instance\"}\n ) by(job) * 100" interval: '' legendFormat: '{{`{{`}}job{{`}}`}}' range: true refId: A title: Hourly series limit type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: "Shows saturation of every connection to remote storage. If the threshold of 90% is reached, then the connection is saturated (busy or slow) by more than 90%, so vmagent won't be able to keep up and can start buffering data. \n\nThis usually means that `-remoteWrite.queues` command-line flag must be increased in order to increase the number of connections per each remote storage.\n" fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: line links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: transparent - color: red value: 0.9 unit: percentunit overrides: [] gridPos: h: 8 w: 12 x: 12 'y': 1844 id: 84 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: true expr: |- max( rate(vmagent_remotewrite_send_duration_seconds_total{job=~"$job", instance=~"$instance", url=~"$url"}[$__rate_interval]) / vmagent_remotewrite_queues{job=~"$job", instance=~"$instance", url=~"$url"} ) by(job, url) interval: '' legendFormat: '' range: true refId: A title: Remote write connection saturation ($instance) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: 'Shows the current limit usage of unique series over a daily period. Vmagent will start to drop series once the limit is reached. Please note, panel will be blank if `remoteWrite.maxDailySeries` is not set.' fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: line mappings: [] max: 100 thresholds: mode: absolute steps: - color: transparent - color: red value: 0.9 unit: percentunit overrides: [] gridPos: h: 8 w: 12 x: 0 'y': 1852 id: 90 options: legend: calcs: [] displayMode: list placement: bottom showLegend: false tooltip: hideZeros: false mode: multi sort: none pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: true expr: "max(\n vmagent_daily_series_limit_current_series{job=~\"$job\",instance=~\"$instance\"} \n / \n vmagent_daily_series_limit_max_series{job=~\"$job\",instance=~\"$instance\"}\n) by(job)" interval: '' legendFormat: '{{`{{`}}job{{`}}`}}' range: true refId: A title: Daily series limit type: timeseries title: Remote write type: row - collapsed: true gridPos: h: 1 w: 24 x: 0 'y': 43 id: 113 panels: - fieldConfig: defaults: {} overrides: [] gridPos: h: 2 w: 24 x: 0 'y': 100 id: 115 options: code: language: plaintext showLineNumbers: false showMiniMap: false content: Drilldown row is used by other panels on the dashboard to show more detailed metrics per-instance. mode: markdown pluginVersion: 11.5.0 title: '' transparent: true type: text - datasource: type: {{ $defaultDatasource }} uid: $ds description: '' fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green value: null unit: short overrides: [] gridPos: h: 8 w: 12 x: 0 'y': 102 id: 119 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true sortBy: Last * sortDesc: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: false expr: sum(rate(process_cpu_seconds_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job, instance) format: time_series interval: '' intervalFactor: 1 legendFormat: '{{`{{`}}instance{{`}}`}} ({{`{{`}}job{{`}}`}})' range: true refId: A title: CPU usage ($instance) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: 'Shows the used memory (resident). The application''s performance will significantly degrade when memory usage is close to 100%.' fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green value: null unit: decbytes overrides: [] gridPos: h: 8 w: 12 x: 12 'y': 102 id: 117 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true sortBy: Last * sortDesc: true tooltip: hideZeros: false mode: multi sort: desc pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: true expr: max_over_time(process_resident_memory_bytes{job=~"$job", instance=~"$instance"}[$__rate_interval]) interval: '' legendFormat: '{{`{{`}}instance{{`}}`}} ({{`{{`}}job{{`}}`}})' range: true refId: A title: RSS memory usage ($instance) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: "Shows the persistent queue size of pending samples in bytes which hasn't been flushed to remote storage yet. \n\nIncreasing of value might be a sign of connectivity issues. In such cases, vmagent starts to flush pending data on disk with attempt to send it later once connection is restored.\n\nRemote write URLs are hidden by default but might be unveiled once `-remoteWrite.showURL` is set to true." fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' links: [] mappings: [] min: 0 thresholds: mode: absolute steps: - color: green value: null - color: red value: 80 unit: bytes overrides: [] gridPos: h: 8 w: 12 x: 0 'y': 110 id: 125 links: - title: Troubleshooting url: https://docs.victoriametrics.com/victoriametrics/vmagent/#troubleshooting options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true tooltip: hideZeros: false mode: multi sort: none pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code exemplar: true expr: sum(vmagent_remotewrite_pending_data_bytes{job=~"$job", instance=~"$instance", url=~"$url"}) by (instance, url) interval: '' legendFormat: '{{`{{`}}instance{{`}}`}} => {{`{{`}}url{{`}}`}}' range: true refId: A title: Persistent queue size ($instance) to ($url) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: "Shows in/out samples rate including push and pull models. \n\nThe out-rate could be different to in-rate because of replication or additional timeseries added by vmagent for every scraped target." fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' links: [] mappings: [] thresholds: mode: absolute steps: - color: green value: null - color: red value: 80 unit: short overrides: - matcher: id: byRegexp options: /out .*/ properties: - id: custom.transform value: negative-Y gridPos: h: 8 w: 12 x: 12 'y': 110 id: 123 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true sortBy: Last * sortDesc: true tooltip: hideZeros: false mode: multi sort: none pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: |- sum(rate(vm_promscrape_scraped_samples_sum{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job, instance) + sum(rate(vmagent_rows_inserted_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job, instance) hide: false interval: '' legendFormat: in {{`{{`}}instance{{`}}`}} {{`{{`}}job{{`}}`}} range: true refId: A - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(rate(vmagent_remotewrite_block_size_rows_sum{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job, instance) interval: '' legendFormat: out {{`{{`}}instance{{`}}`}} {{`{{`}}job{{`}}`}} range: true refId: B title: Samples rate ($instance) type: timeseries - datasource: type: {{ $defaultDatasource }} uid: $ds description: Shows the number of bytes read/write from the storage layer when vmagent has to buffer data on disk or read already buffered data. fieldConfig: defaults: color: mode: palette-classic custom: axisBorderShow: false axisCenteredZero: false axisColorMode: text axisLabel: '' axisPlacement: auto barAlignment: 0 barWidthFactor: 0.6 drawStyle: line fillOpacity: 0 gradientMode: none hideFrom: legend: false tooltip: false viz: false insertNulls: false lineInterpolation: linear lineWidth: 1 pointSize: 5 scaleDistribution: type: linear showPoints: never spanNulls: false stacking: group: A mode: none thresholdsStyle: mode: 'off' links: [] mappings: [] thresholds: mode: absolute steps: - color: green value: null - color: red value: 80 unit: bytes overrides: - matcher: id: byName options: read properties: - id: custom.transform value: negative-Y gridPos: h: 8 w: 12 x: 0 'y': 118 id: 121 options: legend: calcs: - mean - lastNotNull - max displayMode: table placement: bottom showLegend: true sortBy: Last * sortDesc: true tooltip: hideZeros: false mode: multi sort: none pluginVersion: 11.5.0 targets: - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(rate(process_io_storage_read_bytes_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job, instance) format: time_series hide: false interval: '' intervalFactor: 1 legendFormat: read {{`{{`}}instance{{`}}`}} {{`{{`}}job{{`}}`}} range: true refId: A - datasource: type: {{ $defaultDatasource }} uid: $ds editorMode: code expr: sum(rate(process_io_storage_written_bytes_total{job=~"$job", instance=~"$instance"}[$__rate_interval])) by(job,instance) format: time_series hide: false interval: '' intervalFactor: 1 legendFormat: write {{`{{`}}instance{{`}}`}} {{`{{`}}job{{`}}`}} range: true refId: B title: Disk writes/reads ($instance) type: timeseries title: Drilldown type: row preload: false refresh: '' schemaVersion: 40 tags: - victoriametrics - vm-k8s-stack templating: list: - current: {} includeAll: false name: ds options: [] query: {{ $defaultDatasource }} refresh: 1 regex: '' type: datasource - current: {} datasource: type: prometheus uid: $ds definition: label_values(vm_app_version{version=~"^vmagent.*"}, job) includeAll: true multi: true name: job options: [] query: query: label_values(vm_app_version{version=~"^vmagent.*"}, job) refId: VictoriaMetrics-job-Variable-Query refresh: 1 regex: '' type: query - allValue: .* current: {} datasource: type: prometheus uid: $ds definition: label_values(vm_app_version{job=~"$job"}, instance) includeAll: true multi: true name: instance options: [] query: query: label_values(vm_app_version{job=~"$job"}, instance) refId: VictoriaMetrics-instance-Variable-Query refresh: 1 regex: '' type: query - allValue: .* current: {} datasource: type: prometheus uid: $ds definition: label_values(vmagent_remotewrite_requests_total{job=~"$job", instance=~"$instance"}, url) description: The remote write URLs includeAll: true multi: true name: url options: [] query: query: label_values(vmagent_remotewrite_requests_total{job=~"$job", instance=~"$instance"}, url) refId: StandardVariableQuery refresh: 1 regex: '' type: query - baseFilters: [] datasource: type: prometheus uid: $ds filters: [] name: adhoc type: adhoc time: from: now-3h to: now timepicker: refresh_intervals: - 10s - 30s - 1m - 5m - 15m - 30m - 1h - 2h - 1d timezone: {{ default "utc" ($Values.defaultDashboards).defaultTimezone }} title: VictoriaMetrics - vmagent uid: G7Z9GzMGz version: 1 weekStart: ''