try to fix this broken prometheus in staging

This commit is contained in:
Seth Call 2025-11-19 20:48:35 -06:00
parent 1e4b8d0d65
commit 001f621769
1 changed files with 96 additions and 33 deletions

View File

@ -1,38 +1,43 @@
# Helm chart values for Prometheus Operator with HTTPS and basic auth # Helm chart values for Prometheus Operator with HTTPS and basic auth
# Explicitly enable RBAC resource creation
rbac:
create: true
kube-prometheus-stack: kube-prometheus-stack:
nodeExporter:
enabled: true # Disable the default ServiceMonitor configuration paths to prevent duplicates
prometheus-node-exporter:
serviceMonitor: serviceMonitor:
enabled: true enabled: false
relabelings: nodeExporter:
- sourceLabels: [__meta_kubernetes_pod_node_name] serviceMonitor:
targetLabel: instance enabled: false
action: replace
# Optional Rule 2: If the original IP address needs to be retained
# as a separate label (e.g., 'ip_address'), this rule can be added:
- sourceLabels: [__address__]
targetLabel: ip_address
action: replace
prometheus: prometheus:
ingress: ingress:
enabled: true enabled: true
pathType: Prefix
annotations: annotations:
kubernetes.io/ingress.class: nginx kubernetes.io/ingress.class: nginx
nginx.ingress.kubernetes.io/rewrite-target: /$2 #nginx.ingress.kubernetes.io/rewrite-target: /$2
cert-manager.io/cluster-issuer: letsencrypt-nginx-production cert-manager.io/cluster-issuer: letsencrypt-nginx-production
# nginx.ingress.kubernetes.io/auth-type: basic nginx.ingress.kubernetes.io/backend-protocol: "HTTP"
# nginx.ingress.kubernetes.io/auth-secret: basic-auth nginx.ingress.kubernetes.io/auth-type: basic
# nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required' nginx.ingress.kubernetes.io/auth-secret: monitoring-basic-auth
nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required'
hosts: hosts:
- monitoring.staging.video.jamkazam.com - monitoring.staging.video.jamkazam.com
paths: paths:
- /prometheus(/|$)(.*) - /prometheus
tls: tls:
- secretName: monitoring - secretName: monitoring
hosts: hosts:
- monitoring.staging.video.jamkazam.com - monitoring.staging.video.jamkazam.com
prometheusSpec: prometheusSpec:
routePrefix: / retention: 60d
retentionSize: 20GB
routePrefix: /prometheus
externalUrl: https://monitoring.staging.video.jamkazam.com/prometheus externalUrl: https://monitoring.staging.video.jamkazam.com/prometheus
storageSpec: storageSpec:
volumeClaimTemplate: volumeClaimTemplate:
@ -40,28 +45,83 @@ kube-prometheus-stack:
storageClassName: linode-block-storage-retain storageClassName: linode-block-storage-retain
resources: resources:
requests: requests:
storage: 10Gi storage: 30Gi
# 2. !!! CRUCIAL: Ensure the default ServiceMonitor is ignored !!!
# This prevents duplicate metrics by telling Prometheus to ignore the default SM.
serviceMonitorSelector:
matchExpressions:
# Exclude the default node-exporter ServiceMonitor
- key: app.kubernetes.io/name
operator: NotIn
values:
# Use the label identified above
- prometheus-node-exporter
serviceMonitorNamespaceSelector:
matchExpressions:
- key: kubernetes.io/metadata.name
operator: In
values:
- monitoring # Its own namespace
- webrtc-be # Your app's namespace
# Add the manual scrape configuration
additionalScrapeConfigs:
- job_name: 'node-exporter'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
# 1. Filter: Precisely target the node-exporter service in the monitoring namespace.
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name]
separator: '/'
# Assuming the service name is 'monitoring-prometheus-node-exporter'
regex: 'monitoring/monitoring-prometheus-node-exporter'
action: keep
# 2. Filter: Ensure we are targeting the standard port (usually 9100)
- source_labels: [__address__]
regex: '.*:9100$'
action: keep
# 3. THE FIX: Set the instance label correctly
- source_labels: [__meta_kubernetes_endpoint_node_name]
target_label: instance
action: replace
- source_labels: [__address__]
target_label: ip_address
action: replace
# 4. Replicate standard labels for dashboard compatibility
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
# Ensure standard labels are present for dashboard compatibility
- source_labels: [__meta_kubernetes_namespace]
target_label: namespace
- source_labels: [__meta_kubernetes_pod_name]
target_label: pod
- source_labels: [__meta_kubernetes_endpoint_node_name]
target_label: node
alertmanager: alertmanager:
ingress: ingress:
enabled: true enabled: true
pathType: Prefix
annotations: annotations:
kubernetes.io/ingress.class: nginx kubernetes.io/ingress.class: nginx
nginx.ingress.kubernetes.io/rewrite-target: /$2 #nginx.ingress.kubernetes.io/rewrite-target: /$2
cert-manager.io/cluster-issuer: letsencrypt-nginx-production cert-manager.io/cluster-issuer: letsencrypt-nginx-production
# nginx.ingress.kubernetes.io/auth-type: basic nginx.ingress.kubernetes.io/backend-protocol: "HTTP"
# nginx.ingress.kubernetes.io/auth-secret: basic-auth nginx.ingress.kubernetes.io/auth-type: basic
# nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required' nginx.ingress.kubernetes.io/auth-secret: monitoring-basic-auth
nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required'
hosts: hosts:
- monitoring.staging.video.jamkazam.com - monitoring.staging.video.jamkazam.com
paths: paths:
- /alertmanager(/|$)(.*) - /alertmanager
tls: tls:
- secretName: monitoring - secretName: monitoring
hosts: hosts:
- monitoring.staging.video.jamkazam.com - monitoring.staging.video.jamkazam.com
alertmanagerSpec: alertmanagerSpec:
routePrefix: / routePrefix: /alertmanager
externalUrl: https://monitoring.staging.video.jamkazam.com/alertmanager externalUrl: https://monitoring.staging.video.jamkazam.com/alertmanager
storage: storage:
volumeClaimTemplate: volumeClaimTemplate:
@ -69,25 +129,27 @@ kube-prometheus-stack:
storageClassName: linode-block-storage-retain storageClassName: linode-block-storage-retain
resources: resources:
requests: requests:
storage: 10Gi storage: 30Gi
grafana: grafana:
persistence: persistence:
enabled: true enabled: true
storageClassName: linode-block-storage-retain storageClassName: linode-block-storage-retain
size: 10Gi size: 30Gi
ingress: ingress:
enabled: true enabled: true
pathType: Prefix
annotations: annotations:
kubernetes.io/ingress.class: nginx kubernetes.io/ingress.class: nginx
nginx.ingress.kubernetes.io/rewrite-target: /$2 #nginx.ingress.kubernetes.io/rewrite-target: /$2
cert-manager.io/cluster-issuer: letsencrypt-nginx-production cert-manager.io/cluster-issuer: letsencrypt-nginx-production
# nginx.ingress.kubernetes.io/auth-type: basic nginx.ingress.kubernetes.io/backend-protocol: "HTTP"
# nginx.ingress.kubernetes.io/auth-secret: basic-auth #nginx.ingress.kubernetes.io/auth-type: basic
# nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required' #nginx.ingress.kubernetes.io/auth-secret: monitoring-basic-auth
#nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required'
hosts: hosts:
- monitoring.staging.video.jamkazam.com - monitoring.staging.video.jamkazam.com
path: /grafana(/|$)(.*) path: /grafana
tls: tls:
- secretName: monitoring - secretName: monitoring
hosts: hosts:
@ -97,6 +159,7 @@ kube-prometheus-stack:
domain: monitoring.staging.video.jamkazam.com domain: monitoring.staging.video.jamkazam.com
root_url: "%(protocol)s://%(domain)s/grafana/" root_url: "%(protocol)s://%(domain)s/grafana/"
enable_gzip: "true" enable_gzip: "true"
serve_from_sub_path: true
# Disable control plane metrics # Disable control plane metrics
kubeEtcd: kubeEtcd:
@ -107,4 +170,4 @@ kube-prometheus-stack:
kubeScheduler: kubeScheduler:
enabled: false enabled: false