Attem to deploy loki and promtail
This commit is contained in:
parent
caf2078b64
commit
507ddbab2e
|
|
@ -0,0 +1,24 @@
|
||||||
|
# Kubernetes Configuration
|
||||||
|
|
||||||
|
This directory contains Kubernetes manifests and configuration for the video infrastructure.
|
||||||
|
|
||||||
|
## Managing CRDs
|
||||||
|
|
||||||
|
The file `all-crds.yaml` contains all Custom Resource Definitions (CRDs) required by the monitoring stack (Prometheus Operator).
|
||||||
|
|
||||||
|
### When to update CRDs
|
||||||
|
|
||||||
|
You should regenerate `all-crds.yaml` by running `scripts/update-crds.sh` when:
|
||||||
|
|
||||||
|
1. **Upgrading the `kube-prometheus-stack` Helm chart**: If you bump the chart version in `k8s/monitoring/Chart.yaml` and update the dependencies, you must also update the CRDs to match the new version.
|
||||||
|
2. **Missing CRD fields**: If you encounter errors like `field not declared in schema` during ArgoCD syncs, it likely means the installed CRDs are outdated.
|
||||||
|
|
||||||
|
### How to update
|
||||||
|
|
||||||
|
Run the update script from the repository root:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./scripts/update-crds.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
This script extracts the CRDs from the local `kube-prometheus-stack` chart package and concatenates them into `k8s/all-crds.yaml`.
|
||||||
|
|
@ -0,0 +1,28 @@
|
||||||
|
apiVersion: argoproj.io/v1alpha1
|
||||||
|
kind: Application
|
||||||
|
metadata:
|
||||||
|
name: loki
|
||||||
|
spec:
|
||||||
|
destination:
|
||||||
|
namespace: loki
|
||||||
|
server: 'https://kubernetes.default.svc'
|
||||||
|
source:
|
||||||
|
helm:
|
||||||
|
valueFiles:
|
||||||
|
- values.yaml
|
||||||
|
path: k8s/loki
|
||||||
|
repoURL: 'git@bitbucket.org:jamkazam/video-iac.git'
|
||||||
|
targetRevision: {{ .Values.gitBranch }}
|
||||||
|
project: default
|
||||||
|
syncPolicy:
|
||||||
|
syncOptions:
|
||||||
|
- CreateNamespace=true
|
||||||
|
- ServerSideApply=true
|
||||||
|
automated:
|
||||||
|
prune: true
|
||||||
|
retry:
|
||||||
|
limit: 5
|
||||||
|
backoff:
|
||||||
|
duration: 5s
|
||||||
|
factor: 2
|
||||||
|
maxDuration: 3m
|
||||||
|
|
@ -0,0 +1,28 @@
|
||||||
|
apiVersion: argoproj.io/v1alpha1
|
||||||
|
kind: Application
|
||||||
|
metadata:
|
||||||
|
name: promtail
|
||||||
|
spec:
|
||||||
|
destination:
|
||||||
|
namespace: loki
|
||||||
|
server: 'https://kubernetes.default.svc'
|
||||||
|
source:
|
||||||
|
helm:
|
||||||
|
valueFiles:
|
||||||
|
- values.yaml
|
||||||
|
path: k8s/promtail
|
||||||
|
repoURL: 'git@bitbucket.org:jamkazam/video-iac.git'
|
||||||
|
targetRevision: {{ .Values.gitBranch }}
|
||||||
|
project: default
|
||||||
|
syncPolicy:
|
||||||
|
syncOptions:
|
||||||
|
- CreateNamespace=true
|
||||||
|
- ServerSideApply=true
|
||||||
|
automated:
|
||||||
|
prune: true
|
||||||
|
retry:
|
||||||
|
limit: 5
|
||||||
|
backoff:
|
||||||
|
duration: 5s
|
||||||
|
factor: 2
|
||||||
|
maxDuration: 3m
|
||||||
|
|
@ -0,0 +1,6 @@
|
||||||
|
apiVersion: v2
|
||||||
|
name: loki
|
||||||
|
description: A Helm chart for Loki
|
||||||
|
type: application
|
||||||
|
version: 0.1.0
|
||||||
|
appVersion: "1.0"
|
||||||
Binary file not shown.
|
|
@ -0,0 +1,51 @@
|
||||||
|
loki:
|
||||||
|
config: |
|
||||||
|
auth_enabled: false
|
||||||
|
server:
|
||||||
|
http_listen_port: 3100
|
||||||
|
ingester:
|
||||||
|
lifecycler:
|
||||||
|
address: 127.0.0.1
|
||||||
|
ring:
|
||||||
|
kvstore:
|
||||||
|
store: inmemory
|
||||||
|
replication_factor: 1
|
||||||
|
schema_config:
|
||||||
|
configs:
|
||||||
|
- from: 2020-10-24
|
||||||
|
store: boltdb-shipper
|
||||||
|
object_store: filesystem
|
||||||
|
schema: v11
|
||||||
|
index:
|
||||||
|
prefix: index_
|
||||||
|
period: 24h
|
||||||
|
storage_config:
|
||||||
|
boltdb_shipper:
|
||||||
|
active_index_directory: /data/loki/index
|
||||||
|
shared_store: filesystem
|
||||||
|
filesystem:
|
||||||
|
directory: /data/loki/chunks
|
||||||
|
chunk_store_config:
|
||||||
|
max_look_back_period: 672h
|
||||||
|
table_manager:
|
||||||
|
retention_deletes_enabled: true
|
||||||
|
retention_period: 672h
|
||||||
|
|
||||||
|
singleBinary:
|
||||||
|
replicas: 1
|
||||||
|
persistence:
|
||||||
|
enabled: true
|
||||||
|
size: 20Gi
|
||||||
|
storageClass: "linode-block-storage-retain"
|
||||||
|
|
||||||
|
read:
|
||||||
|
replicas: 0
|
||||||
|
|
||||||
|
write:
|
||||||
|
replicas: 0
|
||||||
|
|
||||||
|
backend:
|
||||||
|
replicas: 0
|
||||||
|
|
||||||
|
gateway:
|
||||||
|
enabled: false
|
||||||
|
|
@ -131,6 +131,30 @@ kube-prometheus-stack:
|
||||||
requests:
|
requests:
|
||||||
storage: 30Gi
|
storage: 30Gi
|
||||||
|
|
||||||
|
config:
|
||||||
|
global:
|
||||||
|
resolve_timeout: 5m
|
||||||
|
smtp_smarthost: 'email-smtp.us-east-1.amazonaws.com:587'
|
||||||
|
smtp_from: 'support@jamkazam.com'
|
||||||
|
smtp_auth_username: 'ses-smtp-user.20251206-174105'
|
||||||
|
smtp_auth_password: 'BEeyqbF7U/2BvCxXVU672geq1c9fXKisAw+gM5J+vaZi'
|
||||||
|
smtp_require_tls: true
|
||||||
|
route:
|
||||||
|
group_by: ['job']
|
||||||
|
group_wait: 30s
|
||||||
|
group_interval: 5m
|
||||||
|
repeat_interval: 12h
|
||||||
|
receiver: 'null'
|
||||||
|
routes:
|
||||||
|
- match:
|
||||||
|
alertname: WebrtcBeCrashed
|
||||||
|
receiver: 'email-alerts'
|
||||||
|
receivers:
|
||||||
|
- name: 'null'
|
||||||
|
- name: 'email-alerts'
|
||||||
|
email_configs:
|
||||||
|
- to: 'alerts@jamkazam.com'
|
||||||
|
send_resolved: true
|
||||||
grafana:
|
grafana:
|
||||||
persistence:
|
persistence:
|
||||||
enabled: true
|
enabled: true
|
||||||
|
|
|
||||||
|
|
@ -131,6 +131,30 @@ kube-prometheus-stack:
|
||||||
requests:
|
requests:
|
||||||
storage: 30Gi
|
storage: 30Gi
|
||||||
|
|
||||||
|
config:
|
||||||
|
global:
|
||||||
|
resolve_timeout: 5m
|
||||||
|
smtp_smarthost: 'email-smtp.us-east-1.amazonaws.com:587'
|
||||||
|
smtp_from: 'support@jamkazam.com'
|
||||||
|
smtp_auth_username: 'ses-smtp-user.20251206-174105'
|
||||||
|
smtp_auth_password: 'BEeyqbF7U/2BvCxXVU672geq1c9fXKisAw+gM5J+vaZi'
|
||||||
|
smtp_require_tls: true
|
||||||
|
route:
|
||||||
|
group_by: ['job']
|
||||||
|
group_wait: 30s
|
||||||
|
group_interval: 5m
|
||||||
|
repeat_interval: 12h
|
||||||
|
receiver: 'null'
|
||||||
|
routes:
|
||||||
|
- match:
|
||||||
|
alertname: WebrtcBeCrashed
|
||||||
|
receiver: 'email-alerts'
|
||||||
|
receivers:
|
||||||
|
- name: 'null'
|
||||||
|
- name: 'email-alerts'
|
||||||
|
email_configs:
|
||||||
|
- to: 'alerts@jamkazam.com'
|
||||||
|
send_resolved: true
|
||||||
grafana:
|
grafana:
|
||||||
persistence:
|
persistence:
|
||||||
enabled: true
|
enabled: true
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,6 @@
|
||||||
|
apiVersion: v2
|
||||||
|
name: promtail
|
||||||
|
description: A Helm chart for Promtail
|
||||||
|
type: application
|
||||||
|
version: 0.1.0
|
||||||
|
appVersion: "1.0"
|
||||||
Binary file not shown.
|
|
@ -0,0 +1,4 @@
|
||||||
|
promtail:
|
||||||
|
config:
|
||||||
|
clients:
|
||||||
|
- url: http://loki.loki.svc:3100/loki/api/v1/push
|
||||||
|
|
@ -0,0 +1,29 @@
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: PrometheusRule
|
||||||
|
metadata:
|
||||||
|
name: webrtc-be-log-alerts
|
||||||
|
labels:
|
||||||
|
app: webrtc-be
|
||||||
|
spec:
|
||||||
|
groups:
|
||||||
|
- name: webrtc-be.alerts
|
||||||
|
rules:
|
||||||
|
- alert: WebrtcBeError
|
||||||
|
expr: 'sum(count_over_time({container="webrtc-be", namespace="webrtc-be"} |= "error" [5m])) > 0'
|
||||||
|
for: 1m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
annotations:
|
||||||
|
summary: "Errors found in webrtc-be logs"
|
||||||
|
description: "The webrtc-be container is logging errors. Please check the logs."
|
||||||
|
loki_link: >-
|
||||||
|
{{ .Values.grafana.externalUrl }}/explore?orgId=1&left=["now-1h","now","Loki",{"expr":"{container=\"webrtc-be\", namespace=\"webrtc-be\"}"}]
|
||||||
|
|
||||||
|
- alert: WebrtcBeCrashed
|
||||||
|
expr: increase(kube_pod_container_status_restarts_total{container="webrtc-be", namespace="webrtc-be"}[5m]) > 0
|
||||||
|
for: 1m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
annotations:
|
||||||
|
summary: "webrtc-be crashed"
|
||||||
|
description: "The webrtc-be pod has crashed. Please check the logs."
|
||||||
Loading…
Reference in New Issue