1 # Generated from 'node.rules' group from https://raw.githubusercontent.com/coreos/prometheus-operator/master/contrib/kube-prometheus/manifests/prometheus-rules.yaml
2 # Do not change in-place! In order to change this file first read following link:
3 # https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
4 {{- if and .Values.defaultRules.create .Values.nodeExporter.enabled .Values.defaultRules.rules.node }}
5 apiVersion: {{ printf "%s/v1" (.Values.prometheusOperator.crdApiGroup | default "monitoring.coreos.com") }}
8 name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "node.rules" | trunc 63 | trimSuffix "-" }}
10 app: {{ template "prometheus-operator.name" . }}
11 {{ include "prometheus-operator.labels" . | indent 4 }}
12 {{- if .Values.defaultRules.labels }}
13 {{ toYaml .Values.defaultRules.labels | indent 4 }}
15 {{- if .Values.defaultRules.annotations }}
17 {{ toYaml .Values.defaultRules.annotations | indent 4 }}
23 - expr: sum(min(kube_pod_info) by (node))
24 record: ':kube_pod_info_node_count:'
25 - expr: max(label_replace(kube_pod_info{job="kube-state-metrics"}, "pod", "$1", "pod", "(.*)")) by (node, namespace, pod)
26 record: 'node_namespace_pod:kube_pod_info:'
28 count by (node) (sum by (node, cpu) (
29 node_cpu_seconds_total{job="node-exporter"}
30 * on (namespace, pod) group_left(node)
31 node_namespace_pod:kube_pod_info:
33 record: node:node_num_cpu:sum
34 - expr: 1 - avg(rate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[1m]))
35 record: :node_cpu_utilisation:avg1m
38 rate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[1m])
39 * on (namespace, pod) group_left(node)
40 node_namespace_pod:kube_pod_info:)
41 record: node:node_cpu_utilisation:avg1m
43 node:node_cpu_utilisation:avg1m
47 scalar(sum(node:node_num_cpu:sum))
48 record: node:cluster_cpu_utilisation:ratio
50 sum(node_load1{job="node-exporter"})
52 sum(node:node_num_cpu:sum)
53 record: ':node_cpu_saturation_load1:'
56 node_load1{job="node-exporter"}
57 * on (namespace, pod) group_left(node)
58 node_namespace_pod:kube_pod_info:
62 record: 'node:node_cpu_saturation_load1:'
65 sum(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"})
67 sum(node_memory_MemTotal_bytes{job="node-exporter"})
68 record: ':node_memory_utilisation:'
69 - expr: sum(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"})
70 record: :node_memory_MemFreeCachedBuffers_bytes:sum
71 - expr: sum(node_memory_MemTotal_bytes{job="node-exporter"})
72 record: :node_memory_MemTotal_bytes:sum
75 (node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"})
76 * on (namespace, pod) group_left(node)
77 node_namespace_pod:kube_pod_info:
79 record: node:node_memory_bytes_available:sum
82 node_memory_MemTotal_bytes{job="node-exporter"}
83 * on (namespace, pod) group_left(node)
84 node_namespace_pod:kube_pod_info:
86 record: node:node_memory_bytes_total:sum
88 (node:node_memory_bytes_total:sum - node:node_memory_bytes_available:sum)
90 node:node_memory_bytes_total:sum
91 record: node:node_memory_utilisation:ratio
93 (node:node_memory_bytes_total:sum - node:node_memory_bytes_available:sum)
95 scalar(sum(node:node_memory_bytes_total:sum))
96 record: node:cluster_memory_utilisation:ratio
99 (rate(node_vmstat_pgpgin{job="node-exporter"}[1m])
100 + rate(node_vmstat_pgpgout{job="node-exporter"}[1m]))
102 record: :node_memory_swap_io_bytes:sum_rate
106 (node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"})
107 * on (namespace, pod) group_left(node)
108 node_namespace_pod:kube_pod_info:
112 node_memory_MemTotal_bytes{job="node-exporter"}
113 * on (namespace, pod) group_left(node)
114 node_namespace_pod:kube_pod_info:
116 record: 'node:node_memory_utilisation:'
117 - expr: 1 - (node:node_memory_bytes_available:sum / node:node_memory_bytes_total:sum)
118 record: 'node:node_memory_utilisation_2:'
120 1e3 * sum by (node) (
121 (rate(node_vmstat_pgpgin{job="node-exporter"}[1m])
122 + rate(node_vmstat_pgpgout{job="node-exporter"}[1m]))
123 * on (namespace, pod) group_left(node)
124 node_namespace_pod:kube_pod_info:
126 record: node:node_memory_swap_io_bytes:sum_rate
127 - expr: avg(irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+"}[1m]))
128 record: :node_disk_utilisation:avg_irate
131 irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+"}[1m])
132 * on (namespace, pod) group_left(node)
133 node_namespace_pod:kube_pod_info:
135 record: node:node_disk_utilisation:avg_irate
136 - expr: avg(irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+"}[1m]) / 1e3)
137 record: :node_disk_saturation:avg_irate
140 irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+"}[1m]) / 1e3
141 * on (namespace, pod) group_left(node)
142 node_namespace_pod:kube_pod_info:
144 record: node:node_disk_saturation:avg_irate
146 max by (namespace, pod, device) ((node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"}
147 - node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"})
148 / node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"})
149 record: 'node:node_filesystem_usage:'
150 - expr: max by (namespace, pod, device) (node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"} / node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"})
151 record: 'node:node_filesystem_avail:'
153 sum(irate(node_network_receive_bytes_total{job="node-exporter",device!~"veth.+"}[1m])) +
154 sum(irate(node_network_transmit_bytes_total{job="node-exporter",device!~"veth.+"}[1m]))
155 record: :node_net_utilisation:sum_irate
158 (irate(node_network_receive_bytes_total{job="node-exporter",device!~"veth.+"}[1m]) +
159 irate(node_network_transmit_bytes_total{job="node-exporter",device!~"veth.+"}[1m]))
160 * on (namespace, pod) group_left(node)
161 node_namespace_pod:kube_pod_info:
163 record: node:node_net_utilisation:sum_irate
165 sum(irate(node_network_receive_drop_total{job="node-exporter",device!~"veth.+"}[1m])) +
166 sum(irate(node_network_transmit_drop_total{job="node-exporter",device!~"veth.+"}[1m]))
167 record: :node_net_saturation:sum_irate
170 (irate(node_network_receive_drop_total{job="node-exporter",device!~"veth.+"}[1m]) +
171 irate(node_network_transmit_drop_total{job="node-exporter",device!~"veth.+"}[1m]))
172 * on (namespace, pod) group_left(node)
173 node_namespace_pod:kube_pod_info:
175 record: node:node_net_saturation:sum_irate
179 kube_pod_info{job="kube-state-metrics", host_ip!=""}
181 * on (host_ip) group_right (node)
183 (max(node_filesystem_files{job="node-exporter", mountpoint="/"}) by (instance)), "host_ip", "$1", "instance", "(.*):.*"
186 record: 'node:node_inodes_total:'
190 kube_pod_info{job="kube-state-metrics", host_ip!=""}
192 * on (host_ip) group_right (node)
194 (max(node_filesystem_files_free{job="node-exporter", mountpoint="/"}) by (instance)), "host_ip", "$1", "instance", "(.*):.*"
197 record: 'node:node_inodes_free:'