From 3402567f97fe48a8acfa57b04688a11c30b6a8d3 Mon Sep 17 00:00:00 2001 From: srinivasyanamadala Date: Thu, 10 Apr 2025 14:57:42 +0200 Subject: [PATCH] Support for Prometheus for opa-pdp Issue-ID: POLICY-5336 Change-Id: I2ede376363b5d52b13c1bf8b7df228d519db20b2 Signed-off-by: srinivasyanamadala --- compose/metrics/dashboards/dashboard-opa-pdp.json | 753 ++++++++++++++++++++++ compose/metrics/prometheus.yml | 9 + csit/resources/scripts/config_setup.sh | 3 +- csit/resources/tests/opa-pdp-slas.robot | 60 ++ csit/run-project-csit.sh | 4 +- helm/prometheus/resources/prometheus.yml | 9 + 6 files changed, 835 insertions(+), 3 deletions(-) create mode 100644 compose/metrics/dashboards/dashboard-opa-pdp.json create mode 100644 csit/resources/tests/opa-pdp-slas.robot diff --git a/compose/metrics/dashboards/dashboard-opa-pdp.json b/compose/metrics/dashboards/dashboard-opa-pdp.json new file mode 100644 index 00000000..771c994c --- /dev/null +++ b/compose/metrics/dashboards/dashboard-opa-pdp.json @@ -0,0 +1,753 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "Grafana Dashboard for Policy OPA-PDP", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 6, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "dkSf71fnz" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 19, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dkSf71fnz" + }, + "refId": "A" + } + ], + "title": "Quick Info", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dkSf71fnz" + }, + "description": "Uptime per pod for Policy OPA-PDP application calculated in days", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "decimals": 1, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 1 + }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "vertical", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": { + "titleSize": 16 + }, + "textMode": "value_and_name", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dkSf71fnz" + }, + "editorMode": "code", + "exemplar": true, + "expr": "(time() - process_start_time_seconds{job=\"opa-pdp-metrics\"})", + "format": "time_series", + "instant": false, + "interval": "", + "legendFormat": "process_uptime: {{pod}}", + "refId": "A" + } + ], + "title": "Process Uptime", + "transparent": true, + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dkSf71fnz" + }, + "description": "Policy OPA-PDP CPU Usage Monitoring", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "decimals": 2, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percent" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "process-1h: dev-policy-api-69f54f45cc-wq6xq" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "process_1h: dev-policy-api-69f54f45cc-wq6xq" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-purple", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 1 + }, + "id": 26, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": false, + "sizing": "auto", + "text": {} + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dkSf71fnz" + }, + "exemplar": true, + "expr": "irate(process_cpu_seconds_total{job=\"opa-pdp-metrics\"}[$__range]) * 100", + "interval": "", + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "title": "CPU Usage", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dkSf71fnz" + }, + "description": "Policy OPA-PDP Memory Usage Monitoring", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "{area=\"heap\", container=\"policy-api\", endpoint=\"policy-api\", id=\"Tenured Gen\", instance=\"10.42.7.19:6969\", job=\"policy-api\", namespace=\"onap\", pod=\"dev-policy-api-69f54f45cc-sb56v\", service=\"policy-api\"}" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "text", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Tenured Gen - dev-policy-api-69f54f45cc-wq6xq" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-purple", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 1 + }, + "id": 27, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": false, + "sizing": "auto" + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dkSf71fnz" + }, + "exemplar": true, + "expr": "100 * process_resident_memory_bytes{job=\"opa-pdp-metrics\"} / (1024 * 1024 * 1024)", + "hide": false, + "interval": "", + "legendFormat": "RSS : {{ pod }}", + "refId": "A" + } + ], + "title": "Memory Usage", + "type": "gauge" + }, + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "dkSf71fnz" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 7 + }, + "id": 17, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dkSf71fnz" + }, + "refId": "A" + } + ], + "title": "System", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dkSf71fnz" + }, + "description": "Policy OPA-PDP CPU Usage Monitoring", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "process-1h: dev-policy-api-69f54f45cc-wq6xq" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "process_1h: dev-policy-api-69f54f45cc-wq6xq" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-purple", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 0, + "y": 8 + }, + "id": 2, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dkSf71fnz" + }, + "editorMode": "code", + "exemplar": true, + "expr": "irate(process_cpu_seconds_total{job=\"opa-pdp-metrics\"}[$__range])*100", + "interval": "", + "legendFormat": "opa: {{pod}}", + "range": true, + "refId": "A" + } + ], + "title": "CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dkSf71fnz" + }, + "description": "Policy OPA-PDP Memory Usage Monitoring", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "{area=\"heap\", container=\"policy-api\", endpoint=\"policy-api\", id=\"Tenured Gen\", instance=\"10.42.7.19:6969\", job=\"policy-api\", namespace=\"onap\", pod=\"dev-policy-api-69f54f45cc-sb56v\", service=\"policy-api\"}" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "text", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Tenured Gen - dev-policy-api-69f54f45cc-wq6xq" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-purple", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 7, + "x": 8, + "y": 8 + }, + "id": 13, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last", + "sortDesc": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dkSf71fnz" + }, + "editorMode": "code", + "exemplar": true, + "expr": "100 * process_resident_memory_bytes{job=\"opa-pdp-metrics\"} / (1024 * 1024 * 1024)", + "hide": false, + "interval": "", + "legendFormat": "Alloc : {{ pod }}", + "range": true, + "refId": "A" + } + ], + "title": "Memory Usage", + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "dkSf71fnz" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 26 + }, + "id": 21, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dkSf71fnz" + }, + "refId": "A" + } + ], + "title": "Requests", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dkSf71fnz" + }, + "description": "Policy Decisions Total for OPA-PDP per instance", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 27 + }, + "id": 30, + "options": { + "displayMode": "gradient", + "maxVizHeight": 300, + "minVizHeight": 16, + "minVizWidth": 8, + "namePlacement": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dkSf71fnz" + }, + "exemplar": true, + "expr": "opa_decision_response_time_seconds_count{instance=\"policy-opa-pdp:8282\", job=\"opa-pdp-metrics\"}", + "hide": false, + "interval": "", + "legendFormat": "Decision Count", + "refId": "B" + } + ], + "title": "Policy Decisions", + "type": "bargauge" + } + ], + "refresh": "", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Policy OPA-PDP", + "uid": "go-U1x9FQmA", + "version": 3, + "weekStart": "" +} diff --git a/compose/metrics/prometheus.yml b/compose/metrics/prometheus.yml index 5ff5f949..5cf4051c 100644 --- a/compose/metrics/prometheus.yml +++ b/compose/metrics/prometheus.yml @@ -1,6 +1,7 @@ # # ===========LICENSE_START==================================================== # Copyright (C) 2022-2024 Nordix Foundation. +# Modifications Copyright 2025 Deutsche Telekom # ============================================================================ # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -79,6 +80,14 @@ scrape_configs: username: "policyadmin" password: "zb!XztG34" +- job_name: "opa-pdp-metrics" + static_configs: + - targets: + - "policy-opa-pdp:8282" + basic_auth: + username: "policyadmin" + password: "zb!XztG34" + - job_name: "xacml-pdp-metrics" static_configs: - targets: diff --git a/csit/resources/scripts/config_setup.sh b/csit/resources/scripts/config_setup.sh index 1cdd260b..bd039958 100755 --- a/csit/resources/scripts/config_setup.sh +++ b/csit/resources/scripts/config_setup.sh @@ -1,6 +1,7 @@ #!/bin/bash # ============LICENSE_START======================================================= # Copyright (C) 2025 Nordix Foundation. All rights reserved. +# Modifications Copyright 2025 Deutsche Telekom # ================================================================================ # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -22,7 +23,7 @@ export POLICY_API_ROBOT="api-test.robot api-slas.robot" export POLICY_PAP_ROBOT="pap-test.robot pap-slas.robot" export POLICY_APEX_PDP_ROBOT="apex-pdp-test.robot apex-slas.robot" export POLICY_XACML_PDP_ROBOT="xacml-pdp-test.robot xacml-pdp-slas.robot" -export POLICY_OPA_PDP_ROBOT="opa-pdp-test.robot" +export POLICY_OPA_PDP_ROBOT="opa-pdp-test.robot opa-pdp-slas.robot" export POLICY_DROOLS_PDP_ROBOT="drools-pdp-test.robot" export POLICY_DISTRIBUTION_ROBOT="distribution-test.robot" diff --git a/csit/resources/tests/opa-pdp-slas.robot b/csit/resources/tests/opa-pdp-slas.robot new file mode 100644 index 00000000..da28b415 --- /dev/null +++ b/csit/resources/tests/opa-pdp-slas.robot @@ -0,0 +1,60 @@ +*** Settings *** +Library OperatingSystem +Resource common-library.robot + +*** Test Cases *** +WaitForPrometheusServer + [Documentation] Sleep time to wait for Prometheus server to gather all metrics + Sleep 1 minute + +ValidateOPAPolicyDecisionsTotalCounter + [Documentation] Validate opa policy decision counters using prometheus metrics + ValidateOPAPrometheusMetric opa_decision_response_time_seconds_count{instance="policy-opa-pdp:8282", job="opa-pdp-metrics"} 9 + +ValidateOPAPolicyDataTotalCounter + [Documentation] Validate opa policy data counters using prometheus metrics + ValidateOPAPrometheusMetric opa_data_response_time_seconds_count{instance="policy-opa-pdp:8282", job="opa-pdp-metrics"} 12 + +ValidateOPADecisionAverageResponseTime + [Documentation] Ensure average response time is less than 10ms + ValidateOPADecisionAverageResponseTimeMetric 1.5 + +ValidateOPADataAverageResponseTime + [Documentation] Ensure average response time is less than 10ms + ValidateOPADataAverageResponseTimeMetric 1.5 + + +*** Keywords *** +ValidateOPAPrometheusMetric + [Arguments] ${url} ${expectedLimit} + [Documentation] Check the policy decision/data execution count + ${resp}= QueryPrometheus ${url} + ${actualValue}= Evaluate ${resp['data']['result'][0]['value'][1]} + Should Be True ${actual_value} == ${expectedLimit} + +ValidateOPADecisionAverageResponseTimeMetric + [Arguments] ${threshold} + [Documentation] Validate that the average response time is below the threshold + + ${sum_resp}= QueryPrometheus opa_decision_response_time_seconds_sum{instance="policy-opa-pdp:8282", job="opa-pdp-metrics"} + ${count_resp}= QueryPrometheus opa_decision_response_time_seconds_count{instance="policy-opa-pdp:8282", job="opa-pdp-metrics"} + + ${sum_value}= Evaluate ${sum_resp['data']['result'][0]['value'][1]} + ${count_value}= Evaluate ${count_resp['data']['result'][0]['value'][1]} + + ${avg_response_time}= Evaluate float(${sum_value}) / float(${count_value}) + Should Be True ${avg_response_time} < ${threshold} msg=Average response time exceeded ${threshold} + + +ValidateOPADataAverageResponseTimeMetric + [Arguments] ${threshold} + [Documentation] Validate that the average response time is below the threshold + + ${sum_resp}= QueryPrometheus opa_data_response_time_seconds_sum{instance="policy-opa-pdp:8282", job="opa-pdp-metrics"} + ${count_resp}= QueryPrometheus opa_data_response_time_seconds_count{instance="policy-opa-pdp:8282", job="opa-pdp-metrics"} + + ${sum_value}= Evaluate ${sum_resp['data']['result'][0]['value'][1]} + ${count_value}= Evaluate ${count_resp['data']['result'][0]['value'][1]} + + ${avg_response_time}= Evaluate float(${sum_value}) / float(${count_value}) + Should Be True ${avg_response_time} < ${threshold} msg=Average response time exceeded ${threshold} diff --git a/csit/run-project-csit.sh b/csit/run-project-csit.sh index 0e1b9c79..da01ea12 100755 --- a/csit/run-project-csit.sh +++ b/csit/run-project-csit.sh @@ -189,9 +189,9 @@ function setup_xacml_pdp() { } function setup_opa_pdp() { - export ROBOT_FILES="opa-pdp-test.robot" + export ROBOT_FILES="opa-pdp-test.robot opa-pdp-slas.robot" export PROJECT="opa-pdp" - source "${DOCKER_COMPOSE_DIR}"/start-compose.sh opa-pdp + source "${DOCKER_COMPOSE_DIR}"/start-compose.sh opa-pdp --grafana echo "Waiting 3 minutes for OPA-PDP to start..." sleep 180 check_rest_endpoint "${PAP_PORT}" diff --git a/helm/prometheus/resources/prometheus.yml b/helm/prometheus/resources/prometheus.yml index b3f656fe..71ecdded 100644 --- a/helm/prometheus/resources/prometheus.yml +++ b/helm/prometheus/resources/prometheus.yml @@ -1,6 +1,7 @@ # # ===========LICENSE_START==================================================== # Copyright (C) 2023 Nordix Foundation. +# Modifications Copyright 2025 Deutsche Telekom # ============================================================================ # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -78,6 +79,14 @@ scrape_configs: username: "policyadmin" password: "zb!XztG34" + - job_name: "opa-pdp-metrics" + static_configs: + - targets: + - "policy-opa-pdp:8282" + basic_auth: + username: "policyadmin" + password: "zb!XztG34" + - job_name: "xacml-pdp-metrics" static_configs: - targets: -- 2.16.6