From aa5f0fa05ac774865691e2c17cd0c452f28d2492 Mon Sep 17 00:00:00 2001 From: miroslavmasaryk Date: Mon, 20 Mar 2023 10:21:36 +0100 Subject: [PATCH] [STRIMZI] Monitoring chart improvement Add Monitoring into charts of Strimzi Issue-ID: OOM-3150 Signed-off-by: miroslavmasaryk Change-Id: I0621399f5f555f40f96d52f6c64e404bd91f119b --- .../metrics/cruisecontrol-metrics-config.yml | 20 +++ .../resources/metrics/kafka-metrics-config.yml | 137 +++++++++++++++++++++ .../resources/metrics/zookeeper-metrics-config.yml | 44 +++++++ kubernetes/strimzi/templates/configmap.yaml | 21 ++++ kubernetes/strimzi/templates/kafka-rebalance.yaml | 24 ++++ kubernetes/strimzi/templates/pod-monitor.yaml | 45 +++++++ kubernetes/strimzi/templates/strimzi-kafka.yaml | 46 ++++++- kubernetes/strimzi/values.yaml | 48 ++++++++ 8 files changed, 384 insertions(+), 1 deletion(-) create mode 100644 kubernetes/strimzi/resources/metrics/cruisecontrol-metrics-config.yml create mode 100644 kubernetes/strimzi/resources/metrics/kafka-metrics-config.yml create mode 100644 kubernetes/strimzi/resources/metrics/zookeeper-metrics-config.yml create mode 100644 kubernetes/strimzi/templates/configmap.yaml create mode 100644 kubernetes/strimzi/templates/kafka-rebalance.yaml create mode 100644 kubernetes/strimzi/templates/pod-monitor.yaml diff --git a/kubernetes/strimzi/resources/metrics/cruisecontrol-metrics-config.yml b/kubernetes/strimzi/resources/metrics/cruisecontrol-metrics-config.yml new file mode 100644 index 0000000000..12c742ef35 --- /dev/null +++ b/kubernetes/strimzi/resources/metrics/cruisecontrol-metrics-config.yml @@ -0,0 +1,20 @@ +{{/* +# Copyright (c) 2023 Deutsche Telekom +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License.. +*/}} +lowercaseOutputName: true +rules: + - pattern: kafka.cruisecontrol<>(\w+) + name: kafka_cruisecontrol_$1_$2 + type: GAUGE \ No newline at end of file diff --git a/kubernetes/strimzi/resources/metrics/kafka-metrics-config.yml b/kubernetes/strimzi/resources/metrics/kafka-metrics-config.yml new file mode 100644 index 0000000000..7ad971fc16 --- /dev/null +++ b/kubernetes/strimzi/resources/metrics/kafka-metrics-config.yml @@ -0,0 +1,137 @@ +{{/* +# Copyright (c) 2023 Deutsche Telekom +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License.. +*/}} +lowercaseOutputName: true +rules: + # Special cases and very specific rules + - pattern: kafka.server<>Value + name: kafka_server_$1_$2 + type: GAUGE + labels: + clientId: "$3" + topic: "$4" + partition: "$5" + - pattern: kafka.server<>Value + name: kafka_server_$1_$2 + type: GAUGE + labels: + clientId: "$3" + broker: "$4:$5" + - pattern: kafka.server<>connections + name: kafka_server_$1_connections_tls_info + type: GAUGE + labels: + cipher: "$2" + protocol: "$3" + listener: "$4" + networkProcessor: "$5" + - pattern: kafka.server<>connections + name: kafka_server_$1_connections_software + type: GAUGE + labels: + clientSoftwareName: "$2" + clientSoftwareVersion: "$3" + listener: "$4" + networkProcessor: "$5" + - pattern: "kafka.server<>(.+):" + name: kafka_server_$1_$4 + type: GAUGE + labels: + listener: "$2" + networkProcessor: "$3" + - pattern: kafka.server<>(.+) + name: kafka_server_$1_$4 + type: GAUGE + labels: + listener: "$2" + networkProcessor: "$3" + # Some percent metrics use MeanRate attribute + # Ex) kafka.server<>MeanRate + - pattern: kafka.(\w+)<>MeanRate + name: kafka_$1_$2_$3_percent + type: GAUGE + # Generic gauges for percents + - pattern: kafka.(\w+)<>Value + name: kafka_$1_$2_$3_percent + type: GAUGE + - pattern: kafka.(\w+)<>Value + name: kafka_$1_$2_$3_percent + type: GAUGE + labels: + "$4": "$5" + # Generic per-second counters with 0-2 key/value pairs + - pattern: kafka.(\w+)<>Count + name: kafka_$1_$2_$3_total + type: COUNTER + labels: + "$4": "$5" + "$6": "$7" + - pattern: kafka.(\w+)<>Count + name: kafka_$1_$2_$3_total + type: COUNTER + labels: + "$4": "$5" + - pattern: kafka.(\w+)<>Count + name: kafka_$1_$2_$3_total + type: COUNTER + # Generic gauges with 0-2 key/value pairs + - pattern: kafka.(\w+)<>Value + name: kafka_$1_$2_$3 + type: GAUGE + labels: + "$4": "$5" + "$6": "$7" + - pattern: kafka.(\w+)<>Value + name: kafka_$1_$2_$3 + type: GAUGE + labels: + "$4": "$5" + - pattern: kafka.(\w+)<>Value + name: kafka_$1_$2_$3 + type: GAUGE + # Emulate Prometheus 'Summary' metrics for the exported 'Histogram's. + # Note that these are missing the '_sum' metric! + - pattern: kafka.(\w+)<>Count + name: kafka_$1_$2_$3_count + type: COUNTER + labels: + "$4": "$5" + "$6": "$7" + - pattern: kafka.(\w+)<>(\d+)thPercentile + name: kafka_$1_$2_$3 + type: GAUGE + labels: + "$4": "$5" + "$6": "$7" + quantile: "0.$8" + - pattern: kafka.(\w+)<>Count + name: kafka_$1_$2_$3_count + type: COUNTER + labels: + "$4": "$5" + - pattern: kafka.(\w+)<>(\d+)thPercentile + name: kafka_$1_$2_$3 + type: GAUGE + labels: + "$4": "$5" + quantile: "0.$6" + - pattern: kafka.(\w+)<>Count + name: kafka_$1_$2_$3_count + type: COUNTER + - pattern: kafka.(\w+)<>(\d+)thPercentile + name: kafka_$1_$2_$3 + type: GAUGE + labels: + quantile: "0.$4" \ No newline at end of file diff --git a/kubernetes/strimzi/resources/metrics/zookeeper-metrics-config.yml b/kubernetes/strimzi/resources/metrics/zookeeper-metrics-config.yml new file mode 100644 index 0000000000..6a1eab7825 --- /dev/null +++ b/kubernetes/strimzi/resources/metrics/zookeeper-metrics-config.yml @@ -0,0 +1,44 @@ +{{/* +# Copyright (c) 2023 Deutsche Telekom +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License.. +*/}} +lowercaseOutputName: true +rules: + # replicated Zookeeper + - pattern: "org.apache.ZooKeeperService<>(\\w+)" + name: "zookeeper_$2" + type: GAUGE + - pattern: "org.apache.ZooKeeperService<>(\\w+)" + name: "zookeeper_$3" + type: GAUGE + labels: + replicaId: "$2" + - pattern: "org.apache.ZooKeeperService<>(Packets\\w+)" + name: "zookeeper_$4" + type: COUNTER + labels: + replicaId: "$2" + memberType: "$3" + - pattern: "org.apache.ZooKeeperService<>(\\w+)" + name: "zookeeper_$4" + type: GAUGE + labels: + replicaId: "$2" + memberType: "$3" + - pattern: "org.apache.ZooKeeperService<>(\\w+)" + name: "zookeeper_$4_$5" + type: GAUGE + labels: + replicaId: "$2" + memberType: "$3" \ No newline at end of file diff --git a/kubernetes/strimzi/templates/configmap.yaml b/kubernetes/strimzi/templates/configmap.yaml new file mode 100644 index 0000000000..ace51f78ba --- /dev/null +++ b/kubernetes/strimzi/templates/configmap.yaml @@ -0,0 +1,21 @@ +{{/* +# Copyright (c) 2023 Deutsche Telekom +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License.. +*/}} +{{- if .Values.metrics.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: {{- include "common.resourceMetadata" . | nindent 2 }} +data: {{ tpl (.Files.Glob "resources/metrics/*").AsConfig . | nindent 2 }} +{{ end }} diff --git a/kubernetes/strimzi/templates/kafka-rebalance.yaml b/kubernetes/strimzi/templates/kafka-rebalance.yaml new file mode 100644 index 0000000000..6d5f143220 --- /dev/null +++ b/kubernetes/strimzi/templates/kafka-rebalance.yaml @@ -0,0 +1,24 @@ +{{/* +# Copyright (c) 2023 Deutsche Telekom +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License.. +*/}} +{{- if .Values.cruiseControl.kafkaRebalance.enabled }} +apiVersion: kafka.strimzi.io/v1beta2 +kind: KafkaRebalance +metadata: + name: {{ include "common.fullname" . }}-kafka-rebalance + labels: + strimzi.io/cluster: {{ include "common.release" . }}-strimzi +spec: {} +{{- end }} diff --git a/kubernetes/strimzi/templates/pod-monitor.yaml b/kubernetes/strimzi/templates/pod-monitor.yaml new file mode 100644 index 0000000000..be288a4d75 --- /dev/null +++ b/kubernetes/strimzi/templates/pod-monitor.yaml @@ -0,0 +1,45 @@ +{{/* +# Copyright (c) 2023 Deutsche Telekom +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License.. +*/}} +{{- if .Values.metrics.podMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: {{ include "common.fullname" . }}-podmonitor + ## podMonitor labels for prometheus to pick up the podMonitor + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#monitoring.coreos.com/v1.PodMonitor + ## + # labels: + # prometheus: kube-prometheus + labels: {{- toYaml $.Values.metrics.podMonitor.labels | nindent 4 }} +spec: + selector: + matchLabels: + strimzi.io/cluster: {{ include "common.release" . }}-strimzi + podMetricsEndpoints: + - port: {{ .Values.metrics.podMonitor.port }} + {{- if .Values.metrics.podMonitor.relabelings }} + ## RelabelConfigs to apply to samples before scraping + ## ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#relabelconfig + ## Value is evalued as a template + relabelings: {{- toYaml .Values.metrics.podMonitor.relabelings | nindent 6 }} + {{- end }} + {{- if .Values.metrics.podMonitor.metricRelabelings }} + metricRelabelings: {{- toYaml .Values.metrics.podMonitor.metricRelabelings | nindent 6 }} + ## MetricRelabelConfigs to apply to samples before ingestion + ## ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#relabelconfig + ## Value is evalued as a template + {{- end }} +{{- end }} diff --git a/kubernetes/strimzi/templates/strimzi-kafka.yaml b/kubernetes/strimzi/templates/strimzi-kafka.yaml index 3ce7b1d627..421d93a6cb 100644 --- a/kubernetes/strimzi/templates/strimzi-kafka.yaml +++ b/kubernetes/strimzi/templates/strimzi-kafka.yaml @@ -89,6 +89,14 @@ spec: size: {{ .Values.persistence.kafka.size }} deleteClaim: true class: {{ include "common.storageClass" (dict "dot" . "suffix" "kafka" "persistenceInfos" .Values.persistence.kafka) }} + {{- if .Values.metrics.kafkaExporter.enabled }} + metricsConfig: + type: {{ .Values.metrics.kafkaExporter.metricsConfig.type }} + valueFrom: + configMapKeyRef: + name: {{ include "common.fullname" . }} + key: kafka-metrics-config.yml + {{- end }} zookeeper: template: pod: @@ -107,7 +115,43 @@ spec: size: {{ .Values.persistence.zookeeper.size }} deleteClaim: true class: {{ include "common.storageClass" (dict "dot" . "suffix" "zk" "persistenceInfos" .Values.persistence.zookeeper) }} + {{- if .Values.metrics.kafkaExporter.enabled }} + metricsConfig: + type: {{ .Values.metrics.kafkaExporter.metricsConfig.type }} + valueFrom: + configMapKeyRef: + name: {{ include "common.fullname" . }} + key: zookeeper-metrics-config.yml + {{- end }} entityOperator: topicOperator: {} userOperator: {} - + {{- if .Values.cruiseControl.enabled }} + cruiseControl: + metricsConfig: + type: {{ .Values.cruiseControl.metricsConfig.type }} + valueFrom: + configMapKeyRef: + name: {{ include "common.fullname" . }} + key: cruisecontrol-metrics-config.yml + {{- end }} + {{- if .Values.metrics.kafkaExporter.enabled }} + kafkaExporter: + topicRegex: {{ .Values.metrics.kafkaExporter.topicRegex }} + groupRegex: {{ .Values.metrics.kafkaExporter.groupRegex }} + resources: + requests: + cpu: {{ .Values.metrics.kafkaExporter.resources.requests.cpu }} + memory: {{ .Values.metrics.kafkaExporter.resources.requests.memory }} + limits: + cpu: {{ .Values.metrics.kafkaExporter.resources.limits.cpu }} + memory: {{ .Values.metrics.kafkaExporter.resources.limits.memory }} + logging: {{ .Values.metrics.kafkaExporter.logging }} + enableSaramaLogging: {{ .Values.metrics.kafkaExporter.enableSaramaLogging }} + readinessProbe: + initialDelaySeconds: {{ .Values.metrics.kafkaExporter.readinessProbe.initialDelaySeconds }} + timeoutSeconds: {{ .Values.metrics.kafkaExporter.readinessProbe.timeoutSeconds }} + livenessProbe: + initialDelaySeconds: {{ .Values.metrics.kafkaExporter.livenessProbe.initialDelaySeconds }} + timeoutSeconds: {{ .Values.metrics.kafkaExporter.livenessProbe.timeoutSeconds }} + {{- end }} diff --git a/kubernetes/strimzi/values.yaml b/kubernetes/strimzi/values.yaml index 057f2003c7..8963cf3cda 100644 --- a/kubernetes/strimzi/values.yaml +++ b/kubernetes/strimzi/values.yaml @@ -90,6 +90,54 @@ ingress: exposedPort: *advertizedPortBroker2 exposedProtocol: TLS +# Kafka Exporter for metrics +metrics: + enabled: false + kafkaExporter: + enabled: false + metricsConfig: + type: jmxPrometheusExporter + topicRegex: ".*" + groupRegex: ".*" + resources: + requests: + cpu: 2000m + memory: 640Mi + limits: + cpu: 5000m + memory: 1280Mi + logging: debug + enableSaramaLogging: true + readinessProbe: + initialDelaySeconds: 15 + timeoutSeconds: 5 + livenessProbe: + initialDelaySeconds: 15 + timeoutSeconds: 5 + podMonitor: + # Prometheus pre requisite. Currently an optional addon in the OOM docs + enabled: false + # default port for strimzi metrics + port: "tcp-prometheus" + # podMonitor labels for prometheus to pick up the podMonitor + # dummy value + labels: + release: dummy + relabelings: [] + metricRelabelings: [] + +cruiseControl: +## Cruise Control provides a Kafka metrics reporter implementation +## once installed into the Kafka brokers, filters and records a wide range of metrics provided by the brokers themselves. +## pre requisite is having 2 or more broker nodes + enabled: false + metricsConfig: + type: jmxPrometheusExporter + ## Custom resource for Kafka that can rebalance your cluster + # ref. https://strimzi.io/blog/2020/06/15/cruise-control/ + kafkaRebalance: + enabled: false + ###################### # Component overrides ###################### -- 2.16.6