# Helm chart values for Prometheus Operator with HTTPS and basic auth # Explicitly enable RBAC resource creation rbac: create: true kube-prometheus-stack: prometheus: prometheusSpec: nodeSelector: workload: infra grafana: nodeSelector: workload: infra alertmanager: alertmanagerSpec: nodeSelector: workload: infra crds: enabled: false # Disable the default ServiceMonitor configuration paths to prevent duplicates prometheus-node-exporter: serviceMonitor: enabled: false nodeExporter: serviceMonitor: enabled: false prometheus: ingress: enabled: true pathType: Prefix annotations: kubernetes.io/ingress.class: nginx #nginx.ingress.kubernetes.io/rewrite-target: /$2 cert-manager.io/cluster-issuer: letsencrypt-nginx-production nginx.ingress.kubernetes.io/backend-protocol: "HTTP" nginx.ingress.kubernetes.io/auth-type: basic nginx.ingress.kubernetes.io/auth-secret: monitoring-basic-auth nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required' hosts: - monitoring.video.jamkazam.com paths: - /prometheus tls: - secretName: monitoring hosts: - monitoring.video.jamkazam.com prometheusSpec: retention: 60d retentionSize: 20GB routePrefix: /prometheus externalUrl: https://monitoring.video.jamkazam.com/prometheus storageSpec: volumeClaimTemplate: spec: storageClassName: linode-block-storage-retain resources: requests: storage: 30Gi # 2. !!! CRUCIAL: Ensure the default ServiceMonitor is ignored !!! # This prevents duplicate metrics by telling Prometheus to ignore the default SM. serviceMonitorSelector: matchExpressions: # Exclude the default node-exporter ServiceMonitor - key: app.kubernetes.io/name operator: NotIn values: # Use the label identified above - prometheus-node-exporter serviceMonitorNamespaceSelector: matchExpressions: - key: kubernetes.io/metadata.name operator: In values: - monitoring # Its own namespace - webrtc-be # Your app's namespace # Enable discovery of PrometheusRules in these namespaces ruleNamespaceSelector: {} # Match all namespaces (avoids dependency on namespace labels) ruleSelector: matchExpressions: [] # Match all rules in selected namespaces # Add the manual scrape configuration additionalScrapeConfigs: - job_name: 'node-exporter' kubernetes_sd_configs: - role: endpoints relabel_configs: # 1. Filter: Precisely target the node-exporter service in the monitoring namespace. - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name] separator: '/' # Assuming the service name is 'monitoring-prometheus-node-exporter' regex: 'monitoring/monitoring-prometheus-node-exporter' action: keep # 2. Filter: Ensure we are targeting the standard port (usually 9100) - source_labels: [__address__] regex: '.*:9100$' action: keep # 3. THE FIX: Set the instance label correctly - source_labels: [__meta_kubernetes_endpoint_node_name] target_label: instance action: replace - source_labels: [__address__] target_label: ip_address action: replace # 4. Replicate standard labels for dashboard compatibility - action: labelmap regex: __meta_kubernetes_pod_label_(.+) # Ensure standard labels are present for dashboard compatibility - source_labels: [__meta_kubernetes_namespace] target_label: namespace - source_labels: [__meta_kubernetes_pod_name] target_label: pod - source_labels: [__meta_kubernetes_endpoint_node_name] target_label: node alertmanager: ingress: enabled: true pathType: Prefix annotations: kubernetes.io/ingress.class: nginx #nginx.ingress.kubernetes.io/rewrite-target: /$2 cert-manager.io/cluster-issuer: letsencrypt-nginx-production nginx.ingress.kubernetes.io/backend-protocol: "HTTP" nginx.ingress.kubernetes.io/auth-type: basic nginx.ingress.kubernetes.io/auth-secret: monitoring-basic-auth nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required' hosts: - monitoring.video.jamkazam.com paths: - /alertmanager tls: - secretName: monitoring hosts: - monitoring.video.jamkazam.com alertmanagerSpec: routePrefix: /alertmanager externalUrl: https://monitoring.video.jamkazam.com/alertmanager storage: volumeClaimTemplate: spec: storageClassName: linode-block-storage-retain resources: requests: storage: 30Gi config: global: resolve_timeout: 5m smtp_smarthost: 'email-smtp.us-east-1.amazonaws.com:587' smtp_from: 'support@jamkazam.com' smtp_auth_username: 'AKIA2SXEHOQFM326T4WJ' smtp_auth_password: 'BM6zKJUOWSc4XF+1dXZZlqAkbybGX+KbY+YciI7PIcsn' smtp_require_tls: true route: group_by: ['job'] group_wait: 30s group_interval: 5m repeat_interval: 12h receiver: 'null' routes: - match: alertname: NodeHighCPU receiver: 'slack-notifications' - match: alertname: WebrtcBeCrashed receiver: 'email-and-slack-notifications' - match: alertname: WebrtcBeDown receiver: 'email-and-slack-notifications' - match: alertname: WebrtcBeError receiver: 'email-and-slack-notifications' - match: alertname: PodOOMKilled receiver: 'slack-notifications-oom' - match: alertname: PodCrashLoopBackOff receiver: 'slack-notifications' receivers: - name: 'null' - name: 'email-alerts' email_configs: - to: 'alerts@jamkazam.com' send_resolved: true - name: 'slack-notifications' slack_configs: - api_url: 'https://hooks.slack.com/services/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' channel: '#video-cluster-prd-alerts' send_resolved: true title: '[PRODUCTION] [{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] Monitoring Event Notification' text: >- {{ range .Alerts }} *Alert:* {{ .Annotations.summary }} - `{{ .Labels.severity }}` *Description:* {{ .Annotations.description }} *Details:* {{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}` {{ end }} {{ end }} - name: 'slack-notifications-oom' slack_configs: - api_url: 'https://hooks.slack.com/services/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' channel: '#video-cluster-prd-alerts' send_resolved: false title: '[PRODUCTION] [OOM KILLED] Monitoring Event Notification' text: >- {{ range .Alerts }} *Alert:* {{ .Annotations.summary }} - `{{ .Labels.severity }}` *Description:* {{ .Annotations.description }} *Details:* {{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}` {{ end }} {{ end }} - name: 'email-and-slack-notifications' email_configs: - to: 'alerts@jamkazam.com' send_resolved: true headers: Subject: '[PRODUCTION] {{ .Status | toUpper }} - {{ range .Alerts }}{{ .Annotations.summary }} {{ end }}' html: '{{ template "email.default.html" . }}

View in Alertmanager
{{ range .Alerts }}{{ if .Annotations.loki_link }}View Logs in Loki{{ end }}{{ end }}' slack_configs: - api_url: 'https://hooks.slack.com/services/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' channel: '#video-cluster-prd-alerts' send_resolved: true title: '[PRODUCTION] [{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] Monitoring Event Notification' text: >- {{ range .Alerts }} *Alert:* {{ .Annotations.summary }} - `{{ .Labels.severity }}` *Description:* {{ .Annotations.description }} *Details:* {{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}` {{ end }} {{ if .Annotations.loki_link }} *Logs:* <{{ .Annotations.loki_link }}|View in Loki> {{ end }} {{ end }} *Source:* <{{ .ExternalURL }}|Alertmanager> grafana: persistence: enabled: true storageClassName: linode-block-storage-retain size: 30Gi ingress: enabled: true pathType: Prefix annotations: kubernetes.io/ingress.class: nginx #nginx.ingress.kubernetes.io/rewrite-target: /$2 cert-manager.io/cluster-issuer: letsencrypt-nginx-production nginx.ingress.kubernetes.io/backend-protocol: "HTTP" #nginx.ingress.kubernetes.io/auth-type: basic #nginx.ingress.kubernetes.io/auth-secret: monitoring-basic-auth #nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required' hosts: - monitoring.video.jamkazam.com path: /grafana tls: - secretName: monitoring hosts: - monitoring.video.jamkazam.com grafana.ini: server: domain: monitoring.video.jamkazam.com root_url: "%(protocol)s://%(domain)s/grafana/" enable_gzip: "true" serve_from_sub_path: true sidecar: dashboards: enabled: true label: grafana_dashboard searchNamespace: ALL additionalDataSources: - name: Loki type: loki uid: loki url: http://loki.loki.svc:3100 access: proxy # Disable control plane metrics kubeEtcd: enabled: false kubeControllerManager: enabled: false kubeScheduler: enabled: false kubelet: serviceMonitor: trackTimestampsStaleness: false