# Generated from 'prometheus.rules' group from https://raw.githubusercontent.com/coreos/prometheus-operator/master/contrib/kube-prometheus/manifests/prometheus-rules.yaml # Do not change in-place! In order to change this file first read following link: # https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack {{- if and .Values.defaultRules.create .Values.defaultRules.rules.prometheus }} {{- $prometheusJob := printf "%s-%s" (include "prometheus-operator.fullname" .) "prometheus" }} {{- $namespace := .Release.Namespace }} apiVersion: {{ printf "%s/v1" (.Values.prometheusOperator.crdApiGroup | default "monitoring.coreos.com") }} kind: PrometheusRule metadata: name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "prometheus.rules" | trunc 63 | trimSuffix "-" }} labels: app: {{ template "prometheus-operator.name" . }} {{ include "prometheus-operator.labels" . | indent 4 }} {{- if .Values.defaultRules.labels }} {{ toYaml .Values.defaultRules.labels | indent 4 }} {{- end }} {{- if .Values.defaultRules.annotations }} annotations: {{ toYaml .Values.defaultRules.annotations | indent 4 }} {{- end }} spec: groups: - name: prometheus.rules rules: - alert: PrometheusConfigReloadFailed annotations: description: Reloading Prometheus' configuration has failed for {{`{{$labels.namespace}}`}}/{{`{{$labels.pod}}`}} summary: Reloading Prometheus' configuration failed expr: prometheus_config_last_reload_successful{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} == 0 for: 10m labels: severity: warning - alert: PrometheusNotificationQueueRunningFull annotations: description: Prometheus' alert notification queue is running full for {{`{{$labels.namespace}}`}}/{{`{{ $labels.pod}}`}} summary: Prometheus' alert notification queue is running full expr: predict_linear(prometheus_notifications_queue_length{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m], 60 * 30) > prometheus_notifications_queue_capacity{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} for: 10m labels: severity: warning - alert: PrometheusErrorSendingAlerts annotations: description: Errors while sending alerts from Prometheus {{`{{$labels.namespace}}`}}/{{`{{ $labels.pod}}`}} to Alertmanager {{`{{$labels.Alertmanager}}`}} summary: Errors while sending alert from Prometheus expr: rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) / rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0.01 for: 10m labels: severity: warning - alert: PrometheusErrorSendingAlerts annotations: description: Errors while sending alerts from Prometheus {{`{{$labels.namespace}}`}}/{{`{{ $labels.pod}}`}} to Alertmanager {{`{{$labels.Alertmanager}}`}} summary: Errors while sending alerts from Prometheus expr: rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) / rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0.03 for: 10m labels: severity: critical - alert: PrometheusNotConnectedToAlertmanagers annotations: description: Prometheus {{`{{ $labels.namespace }}`}}/{{`{{ $labels.pod}}`}} is not connected to any Alertmanagers summary: Prometheus is not connected to any Alertmanagers expr: prometheus_notifications_alertmanagers_discovered{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} < 1 for: 10m labels: severity: warning - alert: PrometheusTSDBReloadsFailing annotations: description: '{{`{{$labels.job}}`}} at {{`{{$labels.instance}}`}} had {{`{{$value | humanize}}`}} reload failures over the last four hours.' summary: Prometheus has issues reloading data blocks from disk expr: increase(prometheus_tsdb_reloads_failures_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[2h]) > 0 for: 12h labels: severity: warning - alert: PrometheusTSDBCompactionsFailing annotations: description: '{{`{{$labels.job}}`}} at {{`{{$labels.instance}}`}} had {{`{{$value | humanize}}`}} compaction failures over the last four hours.' summary: Prometheus has issues compacting sample blocks expr: increase(prometheus_tsdb_compactions_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[2h]) > 0 for: 12h labels: severity: warning - alert: PrometheusTSDBWALCorruptions annotations: description: '{{`{{$labels.job}}`}} at {{`{{$labels.instance}}`}} has a corrupted write-ahead log (WAL).' summary: Prometheus write-ahead log is corrupted expr: tsdb_wal_corruptions_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} > 0 for: 4h labels: severity: warning - alert: PrometheusNotIngestingSamples annotations: description: Prometheus {{`{{ $labels.namespace }}`}}/{{`{{ $labels.pod}}`}} isn't ingesting samples. summary: Prometheus isn't ingesting samples expr: rate(prometheus_tsdb_head_samples_appended_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) <= 0 for: 10m labels: severity: warning - alert: PrometheusTargetScrapesDuplicate annotations: description: '{{`{{$labels.namespace}}`}}/{{`{{$labels.pod}}`}} has many samples rejected due to duplicate timestamps but different values' summary: Prometheus has many samples rejected expr: increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0 for: 10m labels: severity: warning {{- end }}