k8s-api-microsvc-app-monitoring / monitoring / backend-rules-config.yaml
backend-rules-config.yaml
Raw
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
  name: backend-rules
  namespace: monitoring
  labels:
    app: api-app-backend
    release: monitoring
spec:
  groups:
  - name: backend.rules
    rules:
    - alert: HostHighCpuLoad
      expr: 100 - (avg by(instance) rate(node_cpu_seconds_total{mode="idle"}[2m]) * 100) > 50
      for: 2m
      labels: 
        severity: warning
        namespace: monitoring
      annotations: 
        summary: "Host CPU load high" 
        description: "CPU load on host is over 50% \n Value = {{$value}} \n Instance = {{$labels.instance}}"
    - alert: KubernetesPodCrashLooping
      expr: kube_pod_container_status_restarts_total > 5
      for: 0m
      labels: 
        severity: critical
        namespace: monitoring
      annotations: 
        summary: "Kubernetes pod crash looping"
        description: "Pod {{ $labels.pod }} is crash looping \n Value = {{ $value }}"
    - alert: InstanceDown
      expr: up == 0
      for: 1m
      labels:
        severity: critical
        namespace: monitoring
      annotations:
        summary: "Instance {{ $labels.instance }} down"
        description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minute."