Test alert

This commit is contained in:
Seth Call 2026-01-08 08:34:48 -06:00
parent 9a6b86f96e
commit 9a0303cbb5
2 changed files with 31 additions and 3 deletions

View File

@ -6,10 +6,29 @@ metadata:
app: kube-prometheus-stack
app.kubernetes.io/instance: {{ .Release.Name }}
spec:
groups:
groups:
- name: node.alerts
rules:
- alert: NodeHighCPU
- alert: InternalTestAlert
expr: vector(1)
for: 0m
labels:
severity: critical
annotations:
summary: "Internal Alert Pipeline Test"
description: "This alert is manually triggered to verify the Slack alerting pipeline."
- alert: NodeMissingWorkloadLabel
expr: |
count by (instance) (node_cpu_seconds_total) unless count by (instance) (node_cpu_seconds_total{workload=~".+"})
for: 5m
labels:
severity: warning
annotations:
summary: "Node missing workload label on metrics"
description: "Metrics for instance {{ "{{" }} $labels.instance {{ "}}" }} are missing the 'workload' label, which is required for NodeHighCPU alerts."
- alert: MediaNodeHighCPU
expr: |
(
(1 - avg without (cpu, mode) (rate(node_cpu_seconds_total{mode="idle", workload="media"}[1m]))) * 100 > {{ .Values.cpuThresholdMedia | default 65 }}
@ -23,4 +42,4 @@ spec:
severity: warning
annotations:
summary: "High CPU usage on node {{ "{{" }} $labels.instance {{ "}}" }}"
description: "Node {{ "{{" }} $labels.instance {{ "}}" }} has CPU usage above threshold (current value: {{ "{{" }} $value | printf \"%.2f\" {{ "}}" }}%)"
description: "Node {{ "{{" }} $labels.instance {{ "}}" }} (workload: {{ "{{" }} $labels.workload {{ "}}" }}) has CPU usage above threshold (current value: {{ "{{" }} $value | printf \"%.2f\" {{ "}}" }}%)"

View File

@ -170,6 +170,15 @@ kube-prometheus-stack:
repeat_interval: 12h
receiver: 'null'
routes:
- match:
alertname: InternalTestAlert
receiver: 'slack-notifications'
- match:
alertname: MediaNodeHighCPU
receiver: 'slack-notifications'
- match:
alertname: NodeMissingWorkloadLabel
receiver: 'slack-notifications'
- match:
alertname: NodeHighCPU
receiver: 'slack-notifications'