Attem to deploy loki and promtail
This commit is contained in:
parent
caf2078b64
commit
507ddbab2e
|
|
@ -0,0 +1,24 @@
|
|||
# Kubernetes Configuration
|
||||
|
||||
This directory contains Kubernetes manifests and configuration for the video infrastructure.
|
||||
|
||||
## Managing CRDs
|
||||
|
||||
The file `all-crds.yaml` contains all Custom Resource Definitions (CRDs) required by the monitoring stack (Prometheus Operator).
|
||||
|
||||
### When to update CRDs
|
||||
|
||||
You should regenerate `all-crds.yaml` by running `scripts/update-crds.sh` when:
|
||||
|
||||
1. **Upgrading the `kube-prometheus-stack` Helm chart**: If you bump the chart version in `k8s/monitoring/Chart.yaml` and update the dependencies, you must also update the CRDs to match the new version.
|
||||
2. **Missing CRD fields**: If you encounter errors like `field not declared in schema` during ArgoCD syncs, it likely means the installed CRDs are outdated.
|
||||
|
||||
### How to update
|
||||
|
||||
Run the update script from the repository root:
|
||||
|
||||
```bash
|
||||
./scripts/update-crds.sh
|
||||
```
|
||||
|
||||
This script extracts the CRDs from the local `kube-prometheus-stack` chart package and concatenates them into `k8s/all-crds.yaml`.
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: loki
|
||||
spec:
|
||||
destination:
|
||||
namespace: loki
|
||||
server: 'https://kubernetes.default.svc'
|
||||
source:
|
||||
helm:
|
||||
valueFiles:
|
||||
- values.yaml
|
||||
path: k8s/loki
|
||||
repoURL: 'git@bitbucket.org:jamkazam/video-iac.git'
|
||||
targetRevision: {{ .Values.gitBranch }}
|
||||
project: default
|
||||
syncPolicy:
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
- ServerSideApply=true
|
||||
automated:
|
||||
prune: true
|
||||
retry:
|
||||
limit: 5
|
||||
backoff:
|
||||
duration: 5s
|
||||
factor: 2
|
||||
maxDuration: 3m
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: promtail
|
||||
spec:
|
||||
destination:
|
||||
namespace: loki
|
||||
server: 'https://kubernetes.default.svc'
|
||||
source:
|
||||
helm:
|
||||
valueFiles:
|
||||
- values.yaml
|
||||
path: k8s/promtail
|
||||
repoURL: 'git@bitbucket.org:jamkazam/video-iac.git'
|
||||
targetRevision: {{ .Values.gitBranch }}
|
||||
project: default
|
||||
syncPolicy:
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
- ServerSideApply=true
|
||||
automated:
|
||||
prune: true
|
||||
retry:
|
||||
limit: 5
|
||||
backoff:
|
||||
duration: 5s
|
||||
factor: 2
|
||||
maxDuration: 3m
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
apiVersion: v2
|
||||
name: loki
|
||||
description: A Helm chart for Loki
|
||||
type: application
|
||||
version: 0.1.0
|
||||
appVersion: "1.0"
|
||||
Binary file not shown.
|
|
@ -0,0 +1,51 @@
|
|||
loki:
|
||||
config: |
|
||||
auth_enabled: false
|
||||
server:
|
||||
http_listen_port: 3100
|
||||
ingester:
|
||||
lifecycler:
|
||||
address: 127.0.0.1
|
||||
ring:
|
||||
kvstore:
|
||||
store: inmemory
|
||||
replication_factor: 1
|
||||
schema_config:
|
||||
configs:
|
||||
- from: 2020-10-24
|
||||
store: boltdb-shipper
|
||||
object_store: filesystem
|
||||
schema: v11
|
||||
index:
|
||||
prefix: index_
|
||||
period: 24h
|
||||
storage_config:
|
||||
boltdb_shipper:
|
||||
active_index_directory: /data/loki/index
|
||||
shared_store: filesystem
|
||||
filesystem:
|
||||
directory: /data/loki/chunks
|
||||
chunk_store_config:
|
||||
max_look_back_period: 672h
|
||||
table_manager:
|
||||
retention_deletes_enabled: true
|
||||
retention_period: 672h
|
||||
|
||||
singleBinary:
|
||||
replicas: 1
|
||||
persistence:
|
||||
enabled: true
|
||||
size: 20Gi
|
||||
storageClass: "linode-block-storage-retain"
|
||||
|
||||
read:
|
||||
replicas: 0
|
||||
|
||||
write:
|
||||
replicas: 0
|
||||
|
||||
backend:
|
||||
replicas: 0
|
||||
|
||||
gateway:
|
||||
enabled: false
|
||||
|
|
@ -131,6 +131,30 @@ kube-prometheus-stack:
|
|||
requests:
|
||||
storage: 30Gi
|
||||
|
||||
config:
|
||||
global:
|
||||
resolve_timeout: 5m
|
||||
smtp_smarthost: 'email-smtp.us-east-1.amazonaws.com:587'
|
||||
smtp_from: 'support@jamkazam.com'
|
||||
smtp_auth_username: 'ses-smtp-user.20251206-174105'
|
||||
smtp_auth_password: 'BEeyqbF7U/2BvCxXVU672geq1c9fXKisAw+gM5J+vaZi'
|
||||
smtp_require_tls: true
|
||||
route:
|
||||
group_by: ['job']
|
||||
group_wait: 30s
|
||||
group_interval: 5m
|
||||
repeat_interval: 12h
|
||||
receiver: 'null'
|
||||
routes:
|
||||
- match:
|
||||
alertname: WebrtcBeCrashed
|
||||
receiver: 'email-alerts'
|
||||
receivers:
|
||||
- name: 'null'
|
||||
- name: 'email-alerts'
|
||||
email_configs:
|
||||
- to: 'alerts@jamkazam.com'
|
||||
send_resolved: true
|
||||
grafana:
|
||||
persistence:
|
||||
enabled: true
|
||||
|
|
|
|||
|
|
@ -131,6 +131,30 @@ kube-prometheus-stack:
|
|||
requests:
|
||||
storage: 30Gi
|
||||
|
||||
config:
|
||||
global:
|
||||
resolve_timeout: 5m
|
||||
smtp_smarthost: 'email-smtp.us-east-1.amazonaws.com:587'
|
||||
smtp_from: 'support@jamkazam.com'
|
||||
smtp_auth_username: 'ses-smtp-user.20251206-174105'
|
||||
smtp_auth_password: 'BEeyqbF7U/2BvCxXVU672geq1c9fXKisAw+gM5J+vaZi'
|
||||
smtp_require_tls: true
|
||||
route:
|
||||
group_by: ['job']
|
||||
group_wait: 30s
|
||||
group_interval: 5m
|
||||
repeat_interval: 12h
|
||||
receiver: 'null'
|
||||
routes:
|
||||
- match:
|
||||
alertname: WebrtcBeCrashed
|
||||
receiver: 'email-alerts'
|
||||
receivers:
|
||||
- name: 'null'
|
||||
- name: 'email-alerts'
|
||||
email_configs:
|
||||
- to: 'alerts@jamkazam.com'
|
||||
send_resolved: true
|
||||
grafana:
|
||||
persistence:
|
||||
enabled: true
|
||||
|
|
|
|||
|
|
@ -0,0 +1,6 @@
|
|||
apiVersion: v2
|
||||
name: promtail
|
||||
description: A Helm chart for Promtail
|
||||
type: application
|
||||
version: 0.1.0
|
||||
appVersion: "1.0"
|
||||
Binary file not shown.
|
|
@ -0,0 +1,4 @@
|
|||
promtail:
|
||||
config:
|
||||
clients:
|
||||
- url: http://loki.loki.svc:3100/loki/api/v1/push
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
name: webrtc-be-log-alerts
|
||||
labels:
|
||||
app: webrtc-be
|
||||
spec:
|
||||
groups:
|
||||
- name: webrtc-be.alerts
|
||||
rules:
|
||||
- alert: WebrtcBeError
|
||||
expr: 'sum(count_over_time({container="webrtc-be", namespace="webrtc-be"} |= "error" [5m])) > 0'
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Errors found in webrtc-be logs"
|
||||
description: "The webrtc-be container is logging errors. Please check the logs."
|
||||
loki_link: >-
|
||||
{{ .Values.grafana.externalUrl }}/explore?orgId=1&left=["now-1h","now","Loki",{"expr":"{container=\"webrtc-be\", namespace=\"webrtc-be\"}"}]
|
||||
|
||||
- alert: WebrtcBeCrashed
|
||||
expr: increase(kube_pod_container_status_restarts_total{container="webrtc-be", namespace="webrtc-be"}[5m]) > 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "webrtc-be crashed"
|
||||
description: "The webrtc-be pod has crashed. Please check the logs."
|
||||
Loading…
Reference in New Issue