apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: wamblee-cluster-monitoring namespace: monitoring spec: groups: - name: wamblee-org partial_response_strategy: "" rules: - alert: etcdBackupAbsentOrTooOld annotations: description: 'etcd backup is too old or not present' summary: etcd cluster backup is too old or not present expr: |- time() - max(max_over_time(file_time_seconds{job="pantherbackupexporter",type="backup"}[60m])) > 3600 for: 5m labels: severity: critical - alert: etcdBackupFileTooSmall annotations: description: 'etcd backup is too small or not present' summary: etcd cluster backup is too small or not present expr: |- max(max_over_time(file_size{job="pantherbackupexporter",type="backup"}[60m])) < 10000000 for: 5m labels: severity: critical - alert: etcdImageFileTooOld annotations: description: 'etcd image file is too old or not present' summary: etcd image file is too old or not present expr: |- time() - max(max_over_time(file_time_seconds{job="pantherbackupexporter",type="image"}[60m])) > 3600 for: 5m labels: severity: critical - alert: etcdImageFileTooSmall annotations: description: 'etcd image file is too small or not present' summary: etcd cluster image file is too small or not present expr: |- max(max_over_time(file_size{job="pantherbackupexporter",type="image"}[60m])) < 10 for: 5m labels: severity: critical - alert: etcdDiskAlmostFull annotations: description: 'etcd high disk usage' summary: etcd high disk usage expr: |- node_filesystem_free_bytes{mountpoint="/var/lib/etcd"}/ node_filesystem_size_bytes{mountpoint="/var/lib/etcd"} < 0.30 for: 5m labels: severity: critical