[STRIMZI] Monitoring chart improvement 92/133692/14
authormiroslavmasaryk <miroslav.masaryk@telekom.com>
Mon, 20 Mar 2023 09:21:36 +0000 (10:21 +0100)
committersamrim96 <miroslav.masaryk@telekom.com>
Mon, 3 Apr 2023 14:07:39 +0000 (16:07 +0200)
Add Monitoring into charts of Strimzi

Issue-ID: OOM-3150
Signed-off-by: miroslavmasaryk <miroslav.masaryk@telekom.com>
Change-Id: I0621399f5f555f40f96d52f6c64e404bd91f119b

kubernetes/strimzi/resources/metrics/cruisecontrol-metrics-config.yml [new file with mode: 0644]
kubernetes/strimzi/resources/metrics/kafka-metrics-config.yml [new file with mode: 0644]
kubernetes/strimzi/resources/metrics/zookeeper-metrics-config.yml [new file with mode: 0644]
kubernetes/strimzi/templates/configmap.yaml [new file with mode: 0644]
kubernetes/strimzi/templates/kafka-rebalance.yaml [new file with mode: 0644]
kubernetes/strimzi/templates/pod-monitor.yaml [new file with mode: 0644]
kubernetes/strimzi/templates/strimzi-kafka.yaml
kubernetes/strimzi/values.yaml

diff --git a/kubernetes/strimzi/resources/metrics/cruisecontrol-metrics-config.yml b/kubernetes/strimzi/resources/metrics/cruisecontrol-metrics-config.yml
new file mode 100644 (file)
index 0000000..12c742e
--- /dev/null
@@ -0,0 +1,20 @@
+{{/*
+# Copyright (c) 2023 Deutsche Telekom
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License..
+*/}}
+lowercaseOutputName: true
+rules:
+  - pattern: kafka.cruisecontrol<name=(.+)><>(\w+)
+    name: kafka_cruisecontrol_$1_$2
+    type: GAUGE
\ No newline at end of file
diff --git a/kubernetes/strimzi/resources/metrics/kafka-metrics-config.yml b/kubernetes/strimzi/resources/metrics/kafka-metrics-config.yml
new file mode 100644 (file)
index 0000000..7ad971f
--- /dev/null
@@ -0,0 +1,137 @@
+{{/*
+# Copyright (c) 2023 Deutsche Telekom
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License..
+*/}}
+lowercaseOutputName: true
+rules:
+  # Special cases and very specific rules
+  - pattern: kafka.server<type=(.+), name=(.+), clientId=(.+), topic=(.+), partition=(.*)><>Value
+    name: kafka_server_$1_$2
+    type: GAUGE
+    labels:
+      clientId: "$3"
+      topic: "$4"
+      partition: "$5"
+  - pattern: kafka.server<type=(.+), name=(.+), clientId=(.+), brokerHost=(.+), brokerPort=(.+)><>Value
+    name: kafka_server_$1_$2
+    type: GAUGE
+    labels:
+      clientId: "$3"
+      broker: "$4:$5"
+  - pattern: kafka.server<type=(.+), cipher=(.+), protocol=(.+), listener=(.+), networkProcessor=(.+)><>connections
+    name: kafka_server_$1_connections_tls_info
+    type: GAUGE
+    labels:
+      cipher: "$2"
+      protocol: "$3"
+      listener: "$4"
+      networkProcessor: "$5"
+  - pattern: kafka.server<type=(.+), clientSoftwareName=(.+), clientSoftwareVersion=(.+), listener=(.+), networkProcessor=(.+)><>connections
+    name: kafka_server_$1_connections_software
+    type: GAUGE
+    labels:
+      clientSoftwareName: "$2"
+      clientSoftwareVersion: "$3"
+      listener: "$4"
+      networkProcessor: "$5"
+  - pattern: "kafka.server<type=(.+), listener=(.+), networkProcessor=(.+)><>(.+):"
+    name: kafka_server_$1_$4
+    type: GAUGE
+    labels:
+      listener: "$2"
+      networkProcessor: "$3"
+  - pattern: kafka.server<type=(.+), listener=(.+), networkProcessor=(.+)><>(.+)
+    name: kafka_server_$1_$4
+    type: GAUGE
+    labels:
+      listener: "$2"
+      networkProcessor: "$3"
+  # Some percent metrics use MeanRate attribute
+  # Ex) kafka.server<type=(KafkaRequestHandlerPool), name=(RequestHandlerAvgIdlePercent)><>MeanRate
+  - pattern: kafka.(\w+)<type=(.+), name=(.+)Percent\w*><>MeanRate
+    name: kafka_$1_$2_$3_percent
+    type: GAUGE
+  # Generic gauges for percents
+  - pattern: kafka.(\w+)<type=(.+), name=(.+)Percent\w*><>Value
+    name: kafka_$1_$2_$3_percent
+    type: GAUGE
+  - pattern: kafka.(\w+)<type=(.+), name=(.+)Percent\w*, (.+)=(.+)><>Value
+    name: kafka_$1_$2_$3_percent
+    type: GAUGE
+    labels:
+      "$4": "$5"
+  # Generic per-second counters with 0-2 key/value pairs
+  - pattern: kafka.(\w+)<type=(.+), name=(.+)PerSec\w*, (.+)=(.+), (.+)=(.+)><>Count
+    name: kafka_$1_$2_$3_total
+    type: COUNTER
+    labels:
+      "$4": "$5"
+      "$6": "$7"
+  - pattern: kafka.(\w+)<type=(.+), name=(.+)PerSec\w*, (.+)=(.+)><>Count
+    name: kafka_$1_$2_$3_total
+    type: COUNTER
+    labels:
+      "$4": "$5"
+  - pattern: kafka.(\w+)<type=(.+), name=(.+)PerSec\w*><>Count
+    name: kafka_$1_$2_$3_total
+    type: COUNTER
+  # Generic gauges with 0-2 key/value pairs
+  - pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.+), (.+)=(.+)><>Value
+    name: kafka_$1_$2_$3
+    type: GAUGE
+    labels:
+      "$4": "$5"
+      "$6": "$7"
+  - pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.+)><>Value
+    name: kafka_$1_$2_$3
+    type: GAUGE
+    labels:
+      "$4": "$5"
+  - pattern: kafka.(\w+)<type=(.+), name=(.+)><>Value
+    name: kafka_$1_$2_$3
+    type: GAUGE
+  # Emulate Prometheus 'Summary' metrics for the exported 'Histogram's.
+  # Note that these are missing the '_sum' metric!
+  - pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.+), (.+)=(.+)><>Count
+    name: kafka_$1_$2_$3_count
+    type: COUNTER
+    labels:
+      "$4": "$5"
+      "$6": "$7"
+  - pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.*), (.+)=(.+)><>(\d+)thPercentile
+    name: kafka_$1_$2_$3
+    type: GAUGE
+    labels:
+      "$4": "$5"
+      "$6": "$7"
+      quantile: "0.$8"
+  - pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.+)><>Count
+    name: kafka_$1_$2_$3_count
+    type: COUNTER
+    labels:
+      "$4": "$5"
+  - pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.*)><>(\d+)thPercentile
+    name: kafka_$1_$2_$3
+    type: GAUGE
+    labels:
+      "$4": "$5"
+      quantile: "0.$6"
+  - pattern: kafka.(\w+)<type=(.+), name=(.+)><>Count
+    name: kafka_$1_$2_$3_count
+    type: COUNTER
+  - pattern: kafka.(\w+)<type=(.+), name=(.+)><>(\d+)thPercentile
+    name: kafka_$1_$2_$3
+    type: GAUGE
+    labels:
+      quantile: "0.$4"
\ No newline at end of file
diff --git a/kubernetes/strimzi/resources/metrics/zookeeper-metrics-config.yml b/kubernetes/strimzi/resources/metrics/zookeeper-metrics-config.yml
new file mode 100644 (file)
index 0000000..6a1eab7
--- /dev/null
@@ -0,0 +1,44 @@
+{{/*
+# Copyright (c) 2023 Deutsche Telekom
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License..
+*/}}
+lowercaseOutputName: true
+rules:
+  # replicated Zookeeper
+  - pattern: "org.apache.ZooKeeperService<name0=ReplicatedServer_id(\\d+)><>(\\w+)"
+    name: "zookeeper_$2"
+    type: GAUGE
+  - pattern: "org.apache.ZooKeeperService<name0=ReplicatedServer_id(\\d+), name1=replica.(\\d+)><>(\\w+)"
+    name: "zookeeper_$3"
+    type: GAUGE
+    labels:
+      replicaId: "$2"
+  - pattern: "org.apache.ZooKeeperService<name0=ReplicatedServer_id(\\d+), name1=replica.(\\d+), name2=(\\w+)><>(Packets\\w+)"
+    name: "zookeeper_$4"
+    type: COUNTER
+    labels:
+      replicaId: "$2"
+      memberType: "$3"
+  - pattern: "org.apache.ZooKeeperService<name0=ReplicatedServer_id(\\d+), name1=replica.(\\d+), name2=(\\w+)><>(\\w+)"
+    name: "zookeeper_$4"
+    type: GAUGE
+    labels:
+      replicaId: "$2"
+      memberType: "$3"
+  - pattern: "org.apache.ZooKeeperService<name0=ReplicatedServer_id(\\d+), name1=replica.(\\d+), name2=(\\w+), name3=(\\w+)><>(\\w+)"
+    name: "zookeeper_$4_$5"
+    type: GAUGE
+    labels:
+      replicaId: "$2"
+      memberType: "$3"
\ No newline at end of file
diff --git a/kubernetes/strimzi/templates/configmap.yaml b/kubernetes/strimzi/templates/configmap.yaml
new file mode 100644 (file)
index 0000000..ace51f7
--- /dev/null
@@ -0,0 +1,21 @@
+{{/*
+# Copyright (c) 2023 Deutsche Telekom
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License..
+*/}}
+{{- if .Values.metrics.enabled }}
+apiVersion: v1
+kind: ConfigMap
+metadata:  {{- include "common.resourceMetadata" . | nindent 2 }}
+data: {{ tpl (.Files.Glob "resources/metrics/*").AsConfig . | nindent 2 }}
+{{ end }}
diff --git a/kubernetes/strimzi/templates/kafka-rebalance.yaml b/kubernetes/strimzi/templates/kafka-rebalance.yaml
new file mode 100644 (file)
index 0000000..6d5f143
--- /dev/null
@@ -0,0 +1,24 @@
+{{/*
+# Copyright (c) 2023 Deutsche Telekom
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License..
+*/}}
+{{- if .Values.cruiseControl.kafkaRebalance.enabled }}
+apiVersion: kafka.strimzi.io/v1beta2
+kind: KafkaRebalance
+metadata:
+  name: {{ include "common.fullname" . }}-kafka-rebalance
+  labels:
+    strimzi.io/cluster: {{ include "common.release" . }}-strimzi
+spec: {}
+{{- end }}
diff --git a/kubernetes/strimzi/templates/pod-monitor.yaml b/kubernetes/strimzi/templates/pod-monitor.yaml
new file mode 100644 (file)
index 0000000..be288a4
--- /dev/null
@@ -0,0 +1,45 @@
+{{/*
+# Copyright (c) 2023 Deutsche Telekom
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License..
+*/}}
+{{- if .Values.metrics.podMonitor.enabled }}
+apiVersion: monitoring.coreos.com/v1
+kind: PodMonitor
+metadata:
+  name: {{ include "common.fullname" . }}-podmonitor
+    ## podMonitor labels for prometheus to pick up the podMonitor
+    ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#monitoring.coreos.com/v1.PodMonitor
+    ##
+    # labels:
+    #   prometheus: kube-prometheus
+  labels: {{- toYaml $.Values.metrics.podMonitor.labels | nindent 4 }}
+spec:
+  selector:
+    matchLabels:
+      strimzi.io/cluster: {{ include "common.release" . }}-strimzi
+  podMetricsEndpoints:
+  - port: {{ .Values.metrics.podMonitor.port }}
+    {{- if .Values.metrics.podMonitor.relabelings }}
+    ## RelabelConfigs to apply to samples before scraping
+    ## ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
+    ## Value is evalued as a template
+    relabelings: {{- toYaml .Values.metrics.podMonitor.relabelings | nindent 6 }}
+    {{- end }}
+    {{- if .Values.metrics.podMonitor.metricRelabelings }}
+    metricRelabelings: {{- toYaml .Values.metrics.podMonitor.metricRelabelings | nindent 6 }}
+    ## MetricRelabelConfigs to apply to samples before ingestion
+    ## ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
+    ## Value is evalued as a template
+    {{- end }}
+{{- end }}
index 3ce7b1d..421d93a 100644 (file)
@@ -89,6 +89,14 @@ spec:
         size: {{ .Values.persistence.kafka.size }}
         deleteClaim: true
         class: {{ include "common.storageClass" (dict "dot" . "suffix" "kafka" "persistenceInfos" .Values.persistence.kafka) }}
+    {{- if .Values.metrics.kafkaExporter.enabled }}
+    metricsConfig:
+      type: {{ .Values.metrics.kafkaExporter.metricsConfig.type }}
+      valueFrom:
+        configMapKeyRef:
+          name: {{ include "common.fullname" . }}
+          key: kafka-metrics-config.yml
+    {{- end }}
   zookeeper:
     template:
       pod:
@@ -107,7 +115,43 @@ spec:
       size: {{ .Values.persistence.zookeeper.size }}
       deleteClaim: true
       class: {{ include "common.storageClass" (dict "dot" . "suffix" "zk" "persistenceInfos" .Values.persistence.zookeeper) }}
+    {{- if .Values.metrics.kafkaExporter.enabled }}
+    metricsConfig:
+      type: {{ .Values.metrics.kafkaExporter.metricsConfig.type }}
+      valueFrom:
+        configMapKeyRef:
+          name: {{ include "common.fullname" . }}
+          key: zookeeper-metrics-config.yml
+    {{- end }}
   entityOperator:
     topicOperator: {}
     userOperator: {}
-
+  {{- if .Values.cruiseControl.enabled }}
+  cruiseControl:
+    metricsConfig:
+      type: {{ .Values.cruiseControl.metricsConfig.type }}
+      valueFrom:
+        configMapKeyRef:
+          name: {{ include "common.fullname" . }}
+          key: cruisecontrol-metrics-config.yml
+  {{- end }}
+  {{- if .Values.metrics.kafkaExporter.enabled }}
+  kafkaExporter:
+    topicRegex: {{ .Values.metrics.kafkaExporter.topicRegex }}
+    groupRegex: {{ .Values.metrics.kafkaExporter.groupRegex }}
+    resources:
+      requests:
+        cpu: {{ .Values.metrics.kafkaExporter.resources.requests.cpu }}
+        memory: {{ .Values.metrics.kafkaExporter.resources.requests.memory }}
+      limits:
+        cpu: {{ .Values.metrics.kafkaExporter.resources.limits.cpu }}
+        memory: {{ .Values.metrics.kafkaExporter.resources.limits.memory }}
+    logging: {{ .Values.metrics.kafkaExporter.logging }}
+    enableSaramaLogging: {{ .Values.metrics.kafkaExporter.enableSaramaLogging }}
+    readinessProbe:
+      initialDelaySeconds: {{ .Values.metrics.kafkaExporter.readinessProbe.initialDelaySeconds }}
+      timeoutSeconds: {{ .Values.metrics.kafkaExporter.readinessProbe.timeoutSeconds }}
+    livenessProbe:
+      initialDelaySeconds: {{ .Values.metrics.kafkaExporter.livenessProbe.initialDelaySeconds }}
+      timeoutSeconds: {{ .Values.metrics.kafkaExporter.livenessProbe.timeoutSeconds }}
+  {{- end }}
index 057f200..8963cf3 100644 (file)
@@ -90,6 +90,54 @@ ingress:
       exposedPort: *advertizedPortBroker2
       exposedProtocol: TLS
 
+# Kafka Exporter for metrics
+metrics:
+  enabled: false
+  kafkaExporter:
+    enabled: false
+    metricsConfig:
+      type: jmxPrometheusExporter
+    topicRegex: ".*"
+    groupRegex: ".*"
+    resources:
+      requests:
+        cpu: 2000m
+        memory: 640Mi
+      limits:
+        cpu: 5000m
+        memory: 1280Mi
+    logging: debug
+    enableSaramaLogging: true
+    readinessProbe:
+      initialDelaySeconds: 15
+      timeoutSeconds: 5
+    livenessProbe:
+      initialDelaySeconds: 15
+      timeoutSeconds: 5
+  podMonitor:
+    # Prometheus pre requisite. Currently an optional addon in the OOM docs
+    enabled: false
+    # default port for strimzi metrics
+    port: "tcp-prometheus"
+    # podMonitor labels for prometheus to pick up the podMonitor
+    # dummy value
+    labels:
+      release: dummy
+    relabelings: []
+    metricRelabelings: []
+
+cruiseControl:
+## Cruise Control provides a Kafka metrics reporter implementation
+## once installed into the Kafka brokers, filters and records a wide range of metrics provided by the brokers themselves.
+## pre requisite is having 2 or more broker nodes
+  enabled: false
+  metricsConfig:
+    type: jmxPrometheusExporter
+  ## Custom resource for Kafka that can rebalance your cluster
+  # ref. https://strimzi.io/blog/2020/06/15/cruise-control/
+  kafkaRebalance:
+    enabled: false
+
 ######################
 #  Component overrides
 ######################