etcd monitoring code examples.
This commit is contained in:
parent
092c830f52
commit
f86bb5f55a
6
etcd-inmemory-monitoring/Dockerfile
Normal file
6
etcd-inmemory-monitoring/Dockerfile
Normal file
@ -0,0 +1,6 @@
|
||||
|
||||
FROM python:3.8
|
||||
RUN pip install prometheus_client
|
||||
|
||||
COPY exporter.py /
|
||||
|
3
etcd-inmemory-monitoring/README
Normal file
3
etcd-inmemory-monitoring/README
Normal file
@ -0,0 +1,3 @@
|
||||
https://github.com/prometheus/client_python
|
||||
pip install prometheus-client
|
||||
|
64
etcd-inmemory-monitoring/alertingrules.yaml
Normal file
64
etcd-inmemory-monitoring/alertingrules.yaml
Normal file
@ -0,0 +1,64 @@
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
name: wamblee-cluster-monitoring
|
||||
namespace: monitoring
|
||||
spec:
|
||||
groups:
|
||||
- name: wamblee-org
|
||||
partial_response_strategy: ""
|
||||
rules:
|
||||
- alert: etcdBackupAbsentOrTooOld
|
||||
annotations:
|
||||
description: 'etcd backup is too old or not present'
|
||||
summary: etcd cluster backup is too old or not present
|
||||
expr: |-
|
||||
time() -
|
||||
max(max_over_time(file_time_seconds{job="pantherbackupexporter",type="backup"}[60m]))
|
||||
> 3600
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: etcdBackupFileTooSmall
|
||||
annotations:
|
||||
description: 'etcd backup is too small or not present'
|
||||
summary: etcd cluster backup is too small or not present
|
||||
expr: |-
|
||||
max(max_over_time(file_size{job="pantherbackupexporter",type="backup"}[60m]))
|
||||
<
|
||||
10000000
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: etcdImageFileTooOld
|
||||
annotations:
|
||||
description: 'etcd image file is too old or not present'
|
||||
summary: etcd image file is too old or not present
|
||||
expr: |-
|
||||
time() -
|
||||
max(max_over_time(file_time_seconds{job="pantherbackupexporter",type="image"}[60m]))
|
||||
> 3600
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: etcdImageFileTooSmall
|
||||
annotations:
|
||||
description: 'etcd image file is too small or not present'
|
||||
summary: etcd cluster image file is too small or not present
|
||||
expr: |-
|
||||
max(max_over_time(file_size{job="pantherbackupexporter",type="image"}[60m]))
|
||||
<
|
||||
10
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: etcdDiskAlmostFull
|
||||
annotations:
|
||||
description: 'etcd high disk usage'
|
||||
summary: etcd high disk usage
|
||||
expr: |-
|
||||
node_filesystem_free_bytes{mountpoint="/var/lib/etcd"}/
|
||||
node_filesystem_size_bytes{mountpoint="/var/lib/etcd"} < 0.30
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
47
etcd-inmemory-monitoring/deployment.yaml
Normal file
47
etcd-inmemory-monitoring/deployment.yaml
Normal file
@ -0,0 +1,47 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: pantherbackupmonitoring
|
||||
namespace: monitoring
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: pantherbackupmonitoring
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: pantherbackupmonitoring
|
||||
prometheus-scrapable: "true"
|
||||
spec:
|
||||
terminationGracePeriodSeconds: 0
|
||||
tolerations:
|
||||
- effect: NoSchedule
|
||||
key: node-role.kubernetes.io/control-plane
|
||||
operator: Exists
|
||||
- effect: NoSchedule
|
||||
key: node-role.kubernetes.io/master
|
||||
operator: Exists
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: panther
|
||||
containers:
|
||||
- name: exporter
|
||||
image: docker.example.com/filemonitor:1.0
|
||||
args:
|
||||
- python3
|
||||
- -u
|
||||
- /exporter.py
|
||||
- backup:/backup/etcd-snapshot-latest.db
|
||||
- image:/backup/etcdimage
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
protocol: TCP
|
||||
name: http
|
||||
volumeMounts:
|
||||
- name: pantherbackup
|
||||
mountPath: /backup
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: pantherbackup
|
||||
hostPath:
|
||||
path: /var/lib/wamblee/etcd
|
||||
|
19
etcd-inmemory-monitoring/docker-compose.yaml
Normal file
19
etcd-inmemory-monitoring/docker-compose.yaml
Normal file
@ -0,0 +1,19 @@
|
||||
services:
|
||||
filemonitor:
|
||||
image: docker.example.com/filemonitor:1.0
|
||||
build:
|
||||
context: .
|
||||
command:
|
||||
- python3
|
||||
- /exporter.py
|
||||
# some paths for testing
|
||||
- /data/x.txt
|
||||
- /data/y.txt
|
||||
ports:
|
||||
- "8080:8080"
|
||||
volumes:
|
||||
# for testing.
|
||||
- /home/user/downloads:/data
|
||||
|
||||
|
||||
|
81
etcd-inmemory-monitoring/exporter.py
Normal file
81
etcd-inmemory-monitoring/exporter.py
Normal file
@ -0,0 +1,81 @@
|
||||
import time
|
||||
|
||||
import prometheus_client
|
||||
from prometheus_client import start_http_server, Gauge, Counter
|
||||
import sys
|
||||
import argparse
|
||||
import os
|
||||
import stat
|
||||
from http.server import HTTPServer
|
||||
|
||||
prometheus_client.REGISTRY.unregister(prometheus_client.GC_COLLECTOR)
|
||||
prometheus_client.REGISTRY.unregister(prometheus_client.PLATFORM_COLLECTOR)
|
||||
prometheus_client.REGISTRY.unregister(prometheus_client.PROCESS_COLLECTOR)
|
||||
|
||||
# Create a metric to track time spent and requests made.
|
||||
FILE_TIME = Gauge("file_time_seconds", "File last modification time", labelnames=['path', 'type'])
|
||||
FILE_SIZE = Gauge("file_size", "File size in bytes", labelnames=['path', 'type'])
|
||||
|
||||
|
||||
class Handler(prometheus_client.MetricsHandler):
|
||||
def do_GET(self) -> None:
|
||||
for file in FILES.keys():
|
||||
type = FILES[file]
|
||||
try:
|
||||
FILE_TIME.labels(file, type).set(0)
|
||||
FILE_SIZE.labels(file, type).set(0)
|
||||
# follow symlinks
|
||||
stats = os.stat(path=file)
|
||||
if stat.S_ISREG(stats.st_mode):
|
||||
FILE_TIME.labels(file, type).set(stats.st_mtime)
|
||||
FILE_SIZE.labels(file, type).set(stats.st_size)
|
||||
except:
|
||||
pass
|
||||
|
||||
return super().do_GET()
|
||||
|
||||
|
||||
# map of filename to type
|
||||
FILES = {}
|
||||
|
||||
if __name__ == '__main__':
|
||||
DEFAULT_PORT = 8080
|
||||
parser = argparse.ArgumentParser(prog=sys.argv[0],
|
||||
description=f"""Statistics on a (backup) file,
|
||||
|
||||
Usage: ${sys.argv[0]} [-p|--port <port>] <label1>:<filepath1> .... <labeln>:<filepathn>
|
||||
|
||||
Listens on port {DEFAULT_PORT} by default. It exposes statistics
|
||||
on the monitored files to prometheus. Current metrics are
|
||||
|
||||
{FILE_TIME._name}{{path="/path/to/file"}}: file modification time in seconds since 1970
|
||||
{FILE_SIZE._name}{{path="/path/to/file"}}: file size in bytes
|
||||
|
||||
If a path does not exist or is not a regular file then the value 0 is returned.
|
||||
The exporter follow symlinks.
|
||||
|
||||
The synax of each file is fo theform <label>:<file> where <label> is the value of
|
||||
the type label in the prometheus export.
|
||||
|
||||
""",
|
||||
epilog="Have a lot of fun!",
|
||||
formatter_class=argparse.RawTextHelpFormatter)
|
||||
parser.add_argument("files", nargs="*", help="Files to monitor")
|
||||
parser.add_argument("-p", "--port", type=int, default=DEFAULT_PORT, help="Port to listen on")
|
||||
args = parser.parse_args()
|
||||
|
||||
filespecs = args.files
|
||||
for filespec in filespecs:
|
||||
ind = filespec.index(":")
|
||||
fname = filespec[ind+1:]
|
||||
label= filespec[:ind]
|
||||
FILES[fname] = label
|
||||
|
||||
|
||||
PORT = args.port
|
||||
|
||||
print(f"Monitoring files {FILES}")
|
||||
# Start up the server to expose the metrics.
|
||||
print(f"Listening on port {PORT}")
|
||||
|
||||
HTTPServer(('0.0.0.0', PORT), Handler).serve_forever()
|
13
etcd-inmemory-monitoring/kustomization.yaml
Normal file
13
etcd-inmemory-monitoring/kustomization.yaml
Normal file
@ -0,0 +1,13 @@
|
||||
|
||||
namespace: monitoring
|
||||
|
||||
|
||||
generatorOptions:
|
||||
disableNameSuffixHash: true
|
||||
|
||||
resources:
|
||||
- deployment.yaml
|
||||
- service.yaml
|
||||
- servicemonitor.yaml
|
||||
- alertingrules.yaml
|
||||
|
17
etcd-inmemory-monitoring/service.yaml
Normal file
17
etcd-inmemory-monitoring/service.yaml
Normal file
@ -0,0 +1,17 @@
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: pantherbackupexporter
|
||||
namespace: monitoring
|
||||
labels:
|
||||
app: pantherbackupmonitoring
|
||||
spec:
|
||||
selector:
|
||||
app: pantherbackupmonitoring
|
||||
ports:
|
||||
- port: 8080
|
||||
targetPort: 8080
|
||||
name: http
|
||||
|
||||
|
17
etcd-inmemory-monitoring/servicemonitor.yaml
Normal file
17
etcd-inmemory-monitoring/servicemonitor.yaml
Normal file
@ -0,0 +1,17 @@
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: pantherbackupmonitoring
|
||||
namespace: monitoring
|
||||
spec:
|
||||
endpoints:
|
||||
- honorLabels: true
|
||||
path: /metrics
|
||||
port: http
|
||||
scheme: http
|
||||
scrapeTimeout: 30s
|
||||
selector:
|
||||
matchLabels:
|
||||
app: pantherbackupmonitoring
|
||||
targetLabels:
|
||||
- app
|
Loading…
Reference in New Issue
Block a user