etcd monitoring code examples.
This commit is contained in:
parent
092c830f52
commit
f86bb5f55a
6
etcd-inmemory-monitoring/Dockerfile
Normal file
6
etcd-inmemory-monitoring/Dockerfile
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
|
||||||
|
FROM python:3.8
|
||||||
|
RUN pip install prometheus_client
|
||||||
|
|
||||||
|
COPY exporter.py /
|
||||||
|
|
3
etcd-inmemory-monitoring/README
Normal file
3
etcd-inmemory-monitoring/README
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
https://github.com/prometheus/client_python
|
||||||
|
pip install prometheus-client
|
||||||
|
|
64
etcd-inmemory-monitoring/alertingrules.yaml
Normal file
64
etcd-inmemory-monitoring/alertingrules.yaml
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: PrometheusRule
|
||||||
|
metadata:
|
||||||
|
name: wamblee-cluster-monitoring
|
||||||
|
namespace: monitoring
|
||||||
|
spec:
|
||||||
|
groups:
|
||||||
|
- name: wamblee-org
|
||||||
|
partial_response_strategy: ""
|
||||||
|
rules:
|
||||||
|
- alert: etcdBackupAbsentOrTooOld
|
||||||
|
annotations:
|
||||||
|
description: 'etcd backup is too old or not present'
|
||||||
|
summary: etcd cluster backup is too old or not present
|
||||||
|
expr: |-
|
||||||
|
time() -
|
||||||
|
max(max_over_time(file_time_seconds{job="pantherbackupexporter",type="backup"}[60m]))
|
||||||
|
> 3600
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
- alert: etcdBackupFileTooSmall
|
||||||
|
annotations:
|
||||||
|
description: 'etcd backup is too small or not present'
|
||||||
|
summary: etcd cluster backup is too small or not present
|
||||||
|
expr: |-
|
||||||
|
max(max_over_time(file_size{job="pantherbackupexporter",type="backup"}[60m]))
|
||||||
|
<
|
||||||
|
10000000
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
- alert: etcdImageFileTooOld
|
||||||
|
annotations:
|
||||||
|
description: 'etcd image file is too old or not present'
|
||||||
|
summary: etcd image file is too old or not present
|
||||||
|
expr: |-
|
||||||
|
time() -
|
||||||
|
max(max_over_time(file_time_seconds{job="pantherbackupexporter",type="image"}[60m]))
|
||||||
|
> 3600
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
- alert: etcdImageFileTooSmall
|
||||||
|
annotations:
|
||||||
|
description: 'etcd image file is too small or not present'
|
||||||
|
summary: etcd cluster image file is too small or not present
|
||||||
|
expr: |-
|
||||||
|
max(max_over_time(file_size{job="pantherbackupexporter",type="image"}[60m]))
|
||||||
|
<
|
||||||
|
10
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
- alert: etcdDiskAlmostFull
|
||||||
|
annotations:
|
||||||
|
description: 'etcd high disk usage'
|
||||||
|
summary: etcd high disk usage
|
||||||
|
expr: |-
|
||||||
|
node_filesystem_free_bytes{mountpoint="/var/lib/etcd"}/
|
||||||
|
node_filesystem_size_bytes{mountpoint="/var/lib/etcd"} < 0.30
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
47
etcd-inmemory-monitoring/deployment.yaml
Normal file
47
etcd-inmemory-monitoring/deployment.yaml
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: pantherbackupmonitoring
|
||||||
|
namespace: monitoring
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: pantherbackupmonitoring
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: pantherbackupmonitoring
|
||||||
|
prometheus-scrapable: "true"
|
||||||
|
spec:
|
||||||
|
terminationGracePeriodSeconds: 0
|
||||||
|
tolerations:
|
||||||
|
- effect: NoSchedule
|
||||||
|
key: node-role.kubernetes.io/control-plane
|
||||||
|
operator: Exists
|
||||||
|
- effect: NoSchedule
|
||||||
|
key: node-role.kubernetes.io/master
|
||||||
|
operator: Exists
|
||||||
|
nodeSelector:
|
||||||
|
kubernetes.io/hostname: panther
|
||||||
|
containers:
|
||||||
|
- name: exporter
|
||||||
|
image: docker.example.com/filemonitor:1.0
|
||||||
|
args:
|
||||||
|
- python3
|
||||||
|
- -u
|
||||||
|
- /exporter.py
|
||||||
|
- backup:/backup/etcd-snapshot-latest.db
|
||||||
|
- image:/backup/etcdimage
|
||||||
|
ports:
|
||||||
|
- containerPort: 8080
|
||||||
|
protocol: TCP
|
||||||
|
name: http
|
||||||
|
volumeMounts:
|
||||||
|
- name: pantherbackup
|
||||||
|
mountPath: /backup
|
||||||
|
readOnly: true
|
||||||
|
volumes:
|
||||||
|
- name: pantherbackup
|
||||||
|
hostPath:
|
||||||
|
path: /var/lib/wamblee/etcd
|
||||||
|
|
19
etcd-inmemory-monitoring/docker-compose.yaml
Normal file
19
etcd-inmemory-monitoring/docker-compose.yaml
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
services:
|
||||||
|
filemonitor:
|
||||||
|
image: docker.example.com/filemonitor:1.0
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
command:
|
||||||
|
- python3
|
||||||
|
- /exporter.py
|
||||||
|
# some paths for testing
|
||||||
|
- /data/x.txt
|
||||||
|
- /data/y.txt
|
||||||
|
ports:
|
||||||
|
- "8080:8080"
|
||||||
|
volumes:
|
||||||
|
# for testing.
|
||||||
|
- /home/user/downloads:/data
|
||||||
|
|
||||||
|
|
||||||
|
|
81
etcd-inmemory-monitoring/exporter.py
Normal file
81
etcd-inmemory-monitoring/exporter.py
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
import time
|
||||||
|
|
||||||
|
import prometheus_client
|
||||||
|
from prometheus_client import start_http_server, Gauge, Counter
|
||||||
|
import sys
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import stat
|
||||||
|
from http.server import HTTPServer
|
||||||
|
|
||||||
|
prometheus_client.REGISTRY.unregister(prometheus_client.GC_COLLECTOR)
|
||||||
|
prometheus_client.REGISTRY.unregister(prometheus_client.PLATFORM_COLLECTOR)
|
||||||
|
prometheus_client.REGISTRY.unregister(prometheus_client.PROCESS_COLLECTOR)
|
||||||
|
|
||||||
|
# Create a metric to track time spent and requests made.
|
||||||
|
FILE_TIME = Gauge("file_time_seconds", "File last modification time", labelnames=['path', 'type'])
|
||||||
|
FILE_SIZE = Gauge("file_size", "File size in bytes", labelnames=['path', 'type'])
|
||||||
|
|
||||||
|
|
||||||
|
class Handler(prometheus_client.MetricsHandler):
|
||||||
|
def do_GET(self) -> None:
|
||||||
|
for file in FILES.keys():
|
||||||
|
type = FILES[file]
|
||||||
|
try:
|
||||||
|
FILE_TIME.labels(file, type).set(0)
|
||||||
|
FILE_SIZE.labels(file, type).set(0)
|
||||||
|
# follow symlinks
|
||||||
|
stats = os.stat(path=file)
|
||||||
|
if stat.S_ISREG(stats.st_mode):
|
||||||
|
FILE_TIME.labels(file, type).set(stats.st_mtime)
|
||||||
|
FILE_SIZE.labels(file, type).set(stats.st_size)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return super().do_GET()
|
||||||
|
|
||||||
|
|
||||||
|
# map of filename to type
|
||||||
|
FILES = {}
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
DEFAULT_PORT = 8080
|
||||||
|
parser = argparse.ArgumentParser(prog=sys.argv[0],
|
||||||
|
description=f"""Statistics on a (backup) file,
|
||||||
|
|
||||||
|
Usage: ${sys.argv[0]} [-p|--port <port>] <label1>:<filepath1> .... <labeln>:<filepathn>
|
||||||
|
|
||||||
|
Listens on port {DEFAULT_PORT} by default. It exposes statistics
|
||||||
|
on the monitored files to prometheus. Current metrics are
|
||||||
|
|
||||||
|
{FILE_TIME._name}{{path="/path/to/file"}}: file modification time in seconds since 1970
|
||||||
|
{FILE_SIZE._name}{{path="/path/to/file"}}: file size in bytes
|
||||||
|
|
||||||
|
If a path does not exist or is not a regular file then the value 0 is returned.
|
||||||
|
The exporter follow symlinks.
|
||||||
|
|
||||||
|
The synax of each file is fo theform <label>:<file> where <label> is the value of
|
||||||
|
the type label in the prometheus export.
|
||||||
|
|
||||||
|
""",
|
||||||
|
epilog="Have a lot of fun!",
|
||||||
|
formatter_class=argparse.RawTextHelpFormatter)
|
||||||
|
parser.add_argument("files", nargs="*", help="Files to monitor")
|
||||||
|
parser.add_argument("-p", "--port", type=int, default=DEFAULT_PORT, help="Port to listen on")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
filespecs = args.files
|
||||||
|
for filespec in filespecs:
|
||||||
|
ind = filespec.index(":")
|
||||||
|
fname = filespec[ind+1:]
|
||||||
|
label= filespec[:ind]
|
||||||
|
FILES[fname] = label
|
||||||
|
|
||||||
|
|
||||||
|
PORT = args.port
|
||||||
|
|
||||||
|
print(f"Monitoring files {FILES}")
|
||||||
|
# Start up the server to expose the metrics.
|
||||||
|
print(f"Listening on port {PORT}")
|
||||||
|
|
||||||
|
HTTPServer(('0.0.0.0', PORT), Handler).serve_forever()
|
13
etcd-inmemory-monitoring/kustomization.yaml
Normal file
13
etcd-inmemory-monitoring/kustomization.yaml
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
|
||||||
|
namespace: monitoring
|
||||||
|
|
||||||
|
|
||||||
|
generatorOptions:
|
||||||
|
disableNameSuffixHash: true
|
||||||
|
|
||||||
|
resources:
|
||||||
|
- deployment.yaml
|
||||||
|
- service.yaml
|
||||||
|
- servicemonitor.yaml
|
||||||
|
- alertingrules.yaml
|
||||||
|
|
17
etcd-inmemory-monitoring/service.yaml
Normal file
17
etcd-inmemory-monitoring/service.yaml
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: pantherbackupexporter
|
||||||
|
namespace: monitoring
|
||||||
|
labels:
|
||||||
|
app: pantherbackupmonitoring
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app: pantherbackupmonitoring
|
||||||
|
ports:
|
||||||
|
- port: 8080
|
||||||
|
targetPort: 8080
|
||||||
|
name: http
|
||||||
|
|
||||||
|
|
17
etcd-inmemory-monitoring/servicemonitor.yaml
Normal file
17
etcd-inmemory-monitoring/servicemonitor.yaml
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: pantherbackupmonitoring
|
||||||
|
namespace: monitoring
|
||||||
|
spec:
|
||||||
|
endpoints:
|
||||||
|
- honorLabels: true
|
||||||
|
path: /metrics
|
||||||
|
port: http
|
||||||
|
scheme: http
|
||||||
|
scrapeTimeout: 30s
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: pantherbackupmonitoring
|
||||||
|
targetLabels:
|
||||||
|
- app
|
Loading…
Reference in New Issue
Block a user