From 57768208cefc3c25206b46fb8fbb1315feeb0e59 Mon Sep 17 00:00:00 2001 From: Seth Call Date: Thu, 8 Jan 2026 06:50:30 -0600 Subject: [PATCH] Big change to how we monitor. let's see --- k8s/monitoring/values-production.yaml | 32 ++++++++++++++----------- k8s/monitoring/values-staging.yaml | 34 +++++++++++++++------------ 2 files changed, 37 insertions(+), 29 deletions(-) diff --git a/k8s/monitoring/values-production.yaml b/k8s/monitoring/values-production.yaml index 1a6feb3..bb03cd3 100644 --- a/k8s/monitoring/values-production.yaml +++ b/k8s/monitoring/values-production.yaml @@ -87,39 +87,43 @@ kube-prometheus-stack: additionalScrapeConfigs: - job_name: 'node-exporter' kubernetes_sd_configs: - - role: endpoints + - role: pod relabel_configs: - # 1. Filter: Precisely target the node-exporter service in the monitoring namespace. - - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name] + # 1. Filter: Precisely target the node-exporter pods in the monitoring namespace. + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_name] separator: '/' - # Assuming the service name is 'monitoring-prometheus-node-exporter' - regex: 'monitoring/monitoring-prometheus-node-exporter' + regex: 'monitoring/monitoring-prometheus-node-exporter-.*' action: keep # 2. Filter: Ensure we are targeting the standard port (usually 9100) - - source_labels: [__address__] - regex: '.*:9100$' + - source_labels: [__meta_kubernetes_pod_container_port_number] + regex: '9100' action: keep - # 3. THE FIX: Set the instance label correctly - - source_labels: [__meta_kubernetes_endpoint_node_name] + # 3. Pull node labels (workload) + - source_labels: [__meta_kubernetes_pod_node_label_workload] + target_label: workload + action: replace + + # 4. Set instance and node labels correctly + - source_labels: [__meta_kubernetes_pod_node_name] target_label: instance action: replace - - source_labels: [__address__] + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + action: replace + - source_labels: [__meta_kubernetes_pod_ip] target_label: ip_address action: replace - # 4. Replicate standard labels for dashboard compatibility + # 5. Replicate standard labels for dashboard compatibility - action: labelmap regex: __meta_kubernetes_pod_label_(.+) - # Ensure standard labels are present for dashboard compatibility - source_labels: [__meta_kubernetes_namespace] target_label: namespace - source_labels: [__meta_kubernetes_pod_name] target_label: pod - - source_labels: [__meta_kubernetes_endpoint_node_name] - target_label: node alertmanager: ingress: enabled: true diff --git a/k8s/monitoring/values-staging.yaml b/k8s/monitoring/values-staging.yaml index 322e3f0..1eb8cbd 100644 --- a/k8s/monitoring/values-staging.yaml +++ b/k8s/monitoring/values-staging.yaml @@ -3,7 +3,7 @@ rbac: create: true -cpuThresholdMedia: 65 +cpuThresholdMedia: 1 cpuThresholdOther: 80 @@ -87,39 +87,43 @@ kube-prometheus-stack: additionalScrapeConfigs: - job_name: 'node-exporter' kubernetes_sd_configs: - - role: endpoints + - role: pod relabel_configs: - # 1. Filter: Precisely target the node-exporter service in the monitoring namespace. - - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name] + # 1. Filter: Precisely target the node-exporter pods in the monitoring namespace. + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_name] separator: '/' - # Assuming the service name is 'monitoring-prometheus-node-exporter' - regex: 'monitoring/monitoring-prometheus-node-exporter' + regex: 'monitoring/monitoring-prometheus-node-exporter-.*' action: keep # 2. Filter: Ensure we are targeting the standard port (usually 9100) - - source_labels: [__address__] - regex: '.*:9100$' + - source_labels: [__meta_kubernetes_pod_container_port_number] + regex: '9100' action: keep - # 3. THE FIX: Set the instance label correctly - - source_labels: [__meta_kubernetes_endpoint_node_name] + # 3. Pull node labels (workload) + - source_labels: [__meta_kubernetes_pod_node_label_workload] + target_label: workload + action: replace + + # 4. Set instance and node labels correctly + - source_labels: [__meta_kubernetes_pod_node_name] target_label: instance action: replace - - source_labels: [__address__] + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + action: replace + - source_labels: [__meta_kubernetes_pod_ip] target_label: ip_address action: replace - # 4. Replicate standard labels for dashboard compatibility + # 5. Replicate standard labels for dashboard compatibility - action: labelmap regex: __meta_kubernetes_pod_label_(.+) - # Ensure standard labels are present for dashboard compatibility - source_labels: [__meta_kubernetes_namespace] target_label: namespace - source_labels: [__meta_kubernetes_pod_name] target_label: pod - - source_labels: [__meta_kubernetes_endpoint_node_name] - target_label: node alertmanager: ingress: enabled: true