try to fix this broken prometheus in staging

This commit is contained in:
Seth Call 2025-11-19 20:48:35 -06:00
parent 1e4b8d0d65
commit 001f621769
1 changed files with 96 additions and 33 deletions

View File

@ -1,38 +1,43 @@
# Helm chart values for Prometheus Operator with HTTPS and basic auth
# Explicitly enable RBAC resource creation
rbac:
create: true
kube-prometheus-stack:
nodeExporter:
enabled: true
# Disable the default ServiceMonitor configuration paths to prevent duplicates
prometheus-node-exporter:
serviceMonitor:
enabled: true
relabelings:
- sourceLabels: [__meta_kubernetes_pod_node_name]
targetLabel: instance
action: replace
# Optional Rule 2: If the original IP address needs to be retained
# as a separate label (e.g., 'ip_address'), this rule can be added:
- sourceLabels: [__address__]
targetLabel: ip_address
action: replace
enabled: false
nodeExporter:
serviceMonitor:
enabled: false
prometheus:
ingress:
enabled: true
pathType: Prefix
annotations:
kubernetes.io/ingress.class: nginx
nginx.ingress.kubernetes.io/rewrite-target: /$2
#nginx.ingress.kubernetes.io/rewrite-target: /$2
cert-manager.io/cluster-issuer: letsencrypt-nginx-production
# nginx.ingress.kubernetes.io/auth-type: basic
# nginx.ingress.kubernetes.io/auth-secret: basic-auth
# nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required'
nginx.ingress.kubernetes.io/backend-protocol: "HTTP"
nginx.ingress.kubernetes.io/auth-type: basic
nginx.ingress.kubernetes.io/auth-secret: monitoring-basic-auth
nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required'
hosts:
- monitoring.staging.video.jamkazam.com
paths:
- /prometheus(/|$)(.*)
- /prometheus
tls:
- secretName: monitoring
hosts:
- monitoring.staging.video.jamkazam.com
prometheusSpec:
routePrefix: /
retention: 60d
retentionSize: 20GB
routePrefix: /prometheus
externalUrl: https://monitoring.staging.video.jamkazam.com/prometheus
storageSpec:
volumeClaimTemplate:
@ -40,28 +45,83 @@ kube-prometheus-stack:
storageClassName: linode-block-storage-retain
resources:
requests:
storage: 10Gi
storage: 30Gi
# 2. !!! CRUCIAL: Ensure the default ServiceMonitor is ignored !!!
# This prevents duplicate metrics by telling Prometheus to ignore the default SM.
serviceMonitorSelector:
matchExpressions:
# Exclude the default node-exporter ServiceMonitor
- key: app.kubernetes.io/name
operator: NotIn
values:
# Use the label identified above
- prometheus-node-exporter
serviceMonitorNamespaceSelector:
matchExpressions:
- key: kubernetes.io/metadata.name
operator: In
values:
- monitoring # Its own namespace
- webrtc-be # Your app's namespace
# Add the manual scrape configuration
additionalScrapeConfigs:
- job_name: 'node-exporter'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
# 1. Filter: Precisely target the node-exporter service in the monitoring namespace.
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name]
separator: '/'
# Assuming the service name is 'monitoring-prometheus-node-exporter'
regex: 'monitoring/monitoring-prometheus-node-exporter'
action: keep
# 2. Filter: Ensure we are targeting the standard port (usually 9100)
- source_labels: [__address__]
regex: '.*:9100$'
action: keep
# 3. THE FIX: Set the instance label correctly
- source_labels: [__meta_kubernetes_endpoint_node_name]
target_label: instance
action: replace
- source_labels: [__address__]
target_label: ip_address
action: replace
# 4. Replicate standard labels for dashboard compatibility
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
# Ensure standard labels are present for dashboard compatibility
- source_labels: [__meta_kubernetes_namespace]
target_label: namespace
- source_labels: [__meta_kubernetes_pod_name]
target_label: pod
- source_labels: [__meta_kubernetes_endpoint_node_name]
target_label: node
alertmanager:
ingress:
enabled: true
pathType: Prefix
annotations:
kubernetes.io/ingress.class: nginx
nginx.ingress.kubernetes.io/rewrite-target: /$2
#nginx.ingress.kubernetes.io/rewrite-target: /$2
cert-manager.io/cluster-issuer: letsencrypt-nginx-production
# nginx.ingress.kubernetes.io/auth-type: basic
# nginx.ingress.kubernetes.io/auth-secret: basic-auth
# nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required'
nginx.ingress.kubernetes.io/backend-protocol: "HTTP"
nginx.ingress.kubernetes.io/auth-type: basic
nginx.ingress.kubernetes.io/auth-secret: monitoring-basic-auth
nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required'
hosts:
- monitoring.staging.video.jamkazam.com
paths:
- /alertmanager(/|$)(.*)
- /alertmanager
tls:
- secretName: monitoring
hosts:
- monitoring.staging.video.jamkazam.com
alertmanagerSpec:
routePrefix: /
routePrefix: /alertmanager
externalUrl: https://monitoring.staging.video.jamkazam.com/alertmanager
storage:
volumeClaimTemplate:
@ -69,25 +129,27 @@ kube-prometheus-stack:
storageClassName: linode-block-storage-retain
resources:
requests:
storage: 10Gi
storage: 30Gi
grafana:
persistence:
enabled: true
storageClassName: linode-block-storage-retain
size: 10Gi
size: 30Gi
ingress:
enabled: true
pathType: Prefix
annotations:
kubernetes.io/ingress.class: nginx
nginx.ingress.kubernetes.io/rewrite-target: /$2
#nginx.ingress.kubernetes.io/rewrite-target: /$2
cert-manager.io/cluster-issuer: letsencrypt-nginx-production
nginx.ingress.kubernetes.io/backend-protocol: "HTTP"
#nginx.ingress.kubernetes.io/auth-type: basic
# nginx.ingress.kubernetes.io/auth-secret: basic-auth
#nginx.ingress.kubernetes.io/auth-secret: monitoring-basic-auth
#nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required'
hosts:
- monitoring.staging.video.jamkazam.com
path: /grafana(/|$)(.*)
path: /grafana
tls:
- secretName: monitoring
hosts:
@ -97,6 +159,7 @@ kube-prometheus-stack:
domain: monitoring.staging.video.jamkazam.com
root_url: "%(protocol)s://%(domain)s/grafana/"
enable_gzip: "true"
serve_from_sub_path: true
# Disable control plane metrics
kubeEtcd: