2023-04-10 18:52:24 +00:00
|
|
|
apiVersion: monitoring.coreos.com/v1
|
|
|
|
kind: PrometheusRule
|
|
|
|
metadata:
|
|
|
|
name: wamblee-cluster-monitoring
|
|
|
|
namespace: monitoring
|
|
|
|
spec:
|
|
|
|
groups:
|
|
|
|
- name: wamblee-org
|
|
|
|
partial_response_strategy: ""
|
|
|
|
rules:
|
|
|
|
- alert: etcdBackupAbsentOrTooOld
|
|
|
|
annotations:
|
|
|
|
description: 'etcd backup is too old or not present'
|
|
|
|
summary: etcd cluster backup is too old or not present
|
|
|
|
expr: |-
|
|
|
|
time() -
|
2023-04-10 19:23:12 +00:00
|
|
|
max(max_over_time(file_time_seconds{job="controllerbackupexporter",type="backup"}[60m]))
|
2023-04-10 18:52:24 +00:00
|
|
|
> 3600
|
|
|
|
for: 5m
|
|
|
|
labels:
|
|
|
|
severity: critical
|
|
|
|
- alert: etcdBackupFileTooSmall
|
|
|
|
annotations:
|
|
|
|
description: 'etcd backup is too small or not present'
|
|
|
|
summary: etcd cluster backup is too small or not present
|
|
|
|
expr: |-
|
2023-04-10 19:23:12 +00:00
|
|
|
max(max_over_time(file_size{job="controllerbackupexporter",type="backup"}[60m]))
|
2023-04-10 18:52:24 +00:00
|
|
|
<
|
|
|
|
10000000
|
|
|
|
for: 5m
|
|
|
|
labels:
|
|
|
|
severity: critical
|
|
|
|
- alert: etcdImageFileTooOld
|
|
|
|
annotations:
|
|
|
|
description: 'etcd image file is too old or not present'
|
|
|
|
summary: etcd image file is too old or not present
|
|
|
|
expr: |-
|
|
|
|
time() -
|
2023-04-10 19:23:12 +00:00
|
|
|
max(max_over_time(file_time_seconds{job="controllerbackupexporter",type="image"}[60m]))
|
2023-04-10 18:52:24 +00:00
|
|
|
> 3600
|
|
|
|
for: 5m
|
|
|
|
labels:
|
|
|
|
severity: critical
|
|
|
|
- alert: etcdImageFileTooSmall
|
|
|
|
annotations:
|
|
|
|
description: 'etcd image file is too small or not present'
|
|
|
|
summary: etcd cluster image file is too small or not present
|
|
|
|
expr: |-
|
2023-04-10 19:23:12 +00:00
|
|
|
max(max_over_time(file_size{job="controllerbackupexporter",type="image"}[60m]))
|
2023-04-10 18:52:24 +00:00
|
|
|
<
|
|
|
|
10
|
|
|
|
for: 5m
|
|
|
|
labels:
|
|
|
|
severity: critical
|
|
|
|
- alert: etcdDiskAlmostFull
|
|
|
|
annotations:
|
|
|
|
description: 'etcd high disk usage'
|
|
|
|
summary: etcd high disk usage
|
|
|
|
expr: |-
|
|
|
|
node_filesystem_free_bytes{mountpoint="/var/lib/etcd"}/
|
|
|
|
node_filesystem_size_bytes{mountpoint="/var/lib/etcd"} < 0.30
|
|
|
|
for: 5m
|
|
|
|
labels:
|
|
|
|
severity: critical
|