etcd monitoring code examples.
This commit is contained in:
		
							parent
							
								
									092c830f52
								
							
						
					
					
						commit
						f86bb5f55a
					
				
							
								
								
									
										6
									
								
								etcd-inmemory-monitoring/Dockerfile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										6
									
								
								etcd-inmemory-monitoring/Dockerfile
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,6 @@ | ||||
| 
 | ||||
| FROM python:3.8 | ||||
| RUN pip install prometheus_client | ||||
| 
 | ||||
| COPY exporter.py / | ||||
| 
 | ||||
							
								
								
									
										3
									
								
								etcd-inmemory-monitoring/README
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								etcd-inmemory-monitoring/README
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,3 @@ | ||||
| https://github.com/prometheus/client_python | ||||
| pip install prometheus-client | ||||
| 
 | ||||
							
								
								
									
										64
									
								
								etcd-inmemory-monitoring/alertingrules.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										64
									
								
								etcd-inmemory-monitoring/alertingrules.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,64 @@ | ||||
| apiVersion: monitoring.coreos.com/v1 | ||||
| kind: PrometheusRule | ||||
| metadata: | ||||
|   name: wamblee-cluster-monitoring | ||||
|   namespace: monitoring | ||||
| spec: | ||||
|   groups: | ||||
|   - name: wamblee-org | ||||
|     partial_response_strategy: "" | ||||
|     rules: | ||||
|     - alert: etcdBackupAbsentOrTooOld | ||||
|       annotations: | ||||
|         description: 'etcd backup is too old or not present' | ||||
|         summary: etcd cluster backup is too old or not present | ||||
|       expr: |- | ||||
|         time() -  | ||||
|         max(max_over_time(file_time_seconds{job="pantherbackupexporter",type="backup"}[60m])) | ||||
|         > 3600 | ||||
|       for: 5m | ||||
|       labels: | ||||
|         severity: critical | ||||
|     - alert: etcdBackupFileTooSmall | ||||
|       annotations: | ||||
|         description: 'etcd backup is too small or not present' | ||||
|         summary: etcd cluster backup is too small or not present | ||||
|       expr: |- | ||||
|          max(max_over_time(file_size{job="pantherbackupexporter",type="backup"}[60m]))  | ||||
|          <  | ||||
|          10000000 | ||||
|       for: 5m | ||||
|       labels: | ||||
|         severity: critical | ||||
|     - alert: etcdImageFileTooOld | ||||
|       annotations: | ||||
|         description: 'etcd image file is too old or not present' | ||||
|         summary: etcd image file is too old or not present | ||||
|       expr: |- | ||||
|         time() -  | ||||
|         max(max_over_time(file_time_seconds{job="pantherbackupexporter",type="image"}[60m])) | ||||
|         > 3600 | ||||
|       for: 5m | ||||
|       labels: | ||||
|         severity: critical | ||||
|     - alert: etcdImageFileTooSmall | ||||
|       annotations: | ||||
|         description: 'etcd image file is too small or not present' | ||||
|         summary: etcd cluster image file is too small or not present | ||||
|       expr: |- | ||||
|          max(max_over_time(file_size{job="pantherbackupexporter",type="image"}[60m]))  | ||||
|          <  | ||||
|          10 | ||||
|       for: 5m | ||||
|       labels: | ||||
|         severity: critical | ||||
|     - alert: etcdDiskAlmostFull | ||||
|       annotations: | ||||
|         description: 'etcd high disk usage' | ||||
|         summary: etcd high disk usage | ||||
|       expr: |- | ||||
|         node_filesystem_free_bytes{mountpoint="/var/lib/etcd"}/ | ||||
|         node_filesystem_size_bytes{mountpoint="/var/lib/etcd"} < 0.30 | ||||
|       for: 5m | ||||
|       labels: | ||||
|         severity: critical | ||||
							
								
								
									
										47
									
								
								etcd-inmemory-monitoring/deployment.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										47
									
								
								etcd-inmemory-monitoring/deployment.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,47 @@ | ||||
| apiVersion: apps/v1 | ||||
| kind: Deployment | ||||
| metadata: | ||||
|   name: pantherbackupmonitoring | ||||
|   namespace: monitoring | ||||
| spec: | ||||
|   selector: | ||||
|     matchLabels: | ||||
|       app: pantherbackupmonitoring | ||||
|   template: | ||||
|     metadata: | ||||
|       labels: | ||||
|         app: pantherbackupmonitoring | ||||
|         prometheus-scrapable: "true" | ||||
|     spec: | ||||
|       terminationGracePeriodSeconds: 0 | ||||
|       tolerations: | ||||
|         - effect: NoSchedule | ||||
|           key: node-role.kubernetes.io/control-plane | ||||
|           operator: Exists | ||||
|         - effect: NoSchedule | ||||
|           key: node-role.kubernetes.io/master | ||||
|           operator: Exists | ||||
|       nodeSelector: | ||||
|         kubernetes.io/hostname: panther | ||||
|       containers: | ||||
|         - name: exporter | ||||
|           image: docker.example.com/filemonitor:1.0 | ||||
|           args: | ||||
|             - python3 | ||||
|             - -u | ||||
|             - /exporter.py  | ||||
|             - backup:/backup/etcd-snapshot-latest.db | ||||
|             - image:/backup/etcdimage | ||||
|           ports: | ||||
|             - containerPort: 8080 | ||||
|               protocol: TCP | ||||
|               name: http | ||||
|           volumeMounts: | ||||
|             - name: pantherbackup | ||||
|               mountPath: /backup | ||||
|               readOnly: true | ||||
|       volumes: | ||||
|         - name: pantherbackup | ||||
|           hostPath: | ||||
|             path: /var/lib/wamblee/etcd | ||||
| 
 | ||||
							
								
								
									
										19
									
								
								etcd-inmemory-monitoring/docker-compose.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										19
									
								
								etcd-inmemory-monitoring/docker-compose.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,19 @@ | ||||
| services: | ||||
|   filemonitor: | ||||
|     image: docker.example.com/filemonitor:1.0 | ||||
|     build: | ||||
|       context: . | ||||
|     command: | ||||
|       - python3 | ||||
|       - /exporter.py | ||||
|       # some paths for testing | ||||
|       - /data/x.txt | ||||
|       - /data/y.txt | ||||
|     ports: | ||||
|       - "8080:8080" | ||||
|     volumes: | ||||
|       # for testing. | ||||
|       - /home/user/downloads:/data | ||||
|      | ||||
| 
 | ||||
|   | ||||
							
								
								
									
										81
									
								
								etcd-inmemory-monitoring/exporter.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										81
									
								
								etcd-inmemory-monitoring/exporter.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,81 @@ | ||||
| import time | ||||
| 
 | ||||
| import prometheus_client | ||||
| from prometheus_client import start_http_server, Gauge, Counter | ||||
| import sys | ||||
| import argparse | ||||
| import os | ||||
| import stat | ||||
| from http.server import HTTPServer | ||||
| 
 | ||||
| prometheus_client.REGISTRY.unregister(prometheus_client.GC_COLLECTOR) | ||||
| prometheus_client.REGISTRY.unregister(prometheus_client.PLATFORM_COLLECTOR) | ||||
| prometheus_client.REGISTRY.unregister(prometheus_client.PROCESS_COLLECTOR) | ||||
| 
 | ||||
| # Create a metric to track time spent and requests made. | ||||
| FILE_TIME = Gauge("file_time_seconds", "File last modification time", labelnames=['path', 'type']) | ||||
| FILE_SIZE = Gauge("file_size", "File size in bytes", labelnames=['path', 'type']) | ||||
| 
 | ||||
| 
 | ||||
| class Handler(prometheus_client.MetricsHandler): | ||||
|     def do_GET(self) -> None: | ||||
|         for file in FILES.keys(): | ||||
|             type = FILES[file] | ||||
|             try: | ||||
|                 FILE_TIME.labels(file, type).set(0) | ||||
|                 FILE_SIZE.labels(file, type).set(0) | ||||
|                 # follow symlinks | ||||
|                 stats = os.stat(path=file) | ||||
|                 if stat.S_ISREG(stats.st_mode): | ||||
|                     FILE_TIME.labels(file, type).set(stats.st_mtime) | ||||
|                     FILE_SIZE.labels(file, type).set(stats.st_size) | ||||
|             except: | ||||
|                 pass | ||||
| 
 | ||||
|         return super().do_GET() | ||||
| 
 | ||||
| 
 | ||||
| # map of filename to type  | ||||
| FILES = {} | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     DEFAULT_PORT = 8080 | ||||
|     parser = argparse.ArgumentParser(prog=sys.argv[0], | ||||
|                                      description=f"""Statistics on a (backup) file, | ||||
|                              | ||||
|                             Usage: ${sys.argv[0]} [-p|--port <port>] <label1>:<filepath1> .... <labeln>:<filepathn> | ||||
|                              | ||||
|                             Listens on port {DEFAULT_PORT} by default. It exposes statistics | ||||
|                             on the monitored files to prometheus. Current metrics are | ||||
|                              | ||||
|                               {FILE_TIME._name}{{path="/path/to/file"}}: file modification time in seconds since 1970 | ||||
|                               {FILE_SIZE._name}{{path="/path/to/file"}}: file size in bytes | ||||
|                                | ||||
|                             If a path does not exist or is not a regular file then the value 0 is returned.   | ||||
|                             The exporter follow symlinks.  | ||||
|                              | ||||
|                             The synax of each file is fo theform <label>:<file> where <label> is the value of  | ||||
|                             the type label in the prometheus export.  | ||||
| 
 | ||||
|                             """, | ||||
|                                      epilog="Have a lot of fun!", | ||||
|                                      formatter_class=argparse.RawTextHelpFormatter) | ||||
|     parser.add_argument("files", nargs="*", help="Files to monitor") | ||||
|     parser.add_argument("-p", "--port", type=int, default=DEFAULT_PORT, help="Port to listen on") | ||||
|     args = parser.parse_args() | ||||
| 
 | ||||
|     filespecs = args.files | ||||
|     for filespec in filespecs: | ||||
|       ind = filespec.index(":") | ||||
|       fname = filespec[ind+1:] | ||||
|       label= filespec[:ind] | ||||
|       FILES[fname] = label | ||||
|        | ||||
|      | ||||
|     PORT = args.port | ||||
| 
 | ||||
|     print(f"Monitoring files {FILES}") | ||||
|     # Start up the server to expose the metrics. | ||||
|     print(f"Listening on port {PORT}") | ||||
| 
 | ||||
|     HTTPServer(('0.0.0.0', PORT), Handler).serve_forever() | ||||
							
								
								
									
										13
									
								
								etcd-inmemory-monitoring/kustomization.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								etcd-inmemory-monitoring/kustomization.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,13 @@ | ||||
| 
 | ||||
| namespace: monitoring | ||||
| 
 | ||||
| 
 | ||||
| generatorOptions: | ||||
|   disableNameSuffixHash: true | ||||
| 
 | ||||
| resources: | ||||
|   - deployment.yaml | ||||
|   - service.yaml  | ||||
|   - servicemonitor.yaml  | ||||
|   - alertingrules.yaml | ||||
| 
 | ||||
							
								
								
									
										17
									
								
								etcd-inmemory-monitoring/service.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										17
									
								
								etcd-inmemory-monitoring/service.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,17 @@ | ||||
| --- | ||||
| apiVersion: v1 | ||||
| kind: Service | ||||
| metadata: | ||||
|   name: pantherbackupexporter | ||||
|   namespace: monitoring | ||||
|   labels: | ||||
|     app: pantherbackupmonitoring | ||||
| spec: | ||||
|   selector:  | ||||
|     app: pantherbackupmonitoring | ||||
|   ports: | ||||
|     - port: 8080 | ||||
|       targetPort: 8080 | ||||
|       name: http | ||||
| 
 | ||||
| 
 | ||||
							
								
								
									
										17
									
								
								etcd-inmemory-monitoring/servicemonitor.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										17
									
								
								etcd-inmemory-monitoring/servicemonitor.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,17 @@ | ||||
| apiVersion: monitoring.coreos.com/v1 | ||||
| kind: ServiceMonitor | ||||
| metadata: | ||||
|   name: pantherbackupmonitoring | ||||
|   namespace: monitoring | ||||
| spec:  | ||||
|   endpoints: | ||||
|   - honorLabels: true | ||||
|     path: /metrics | ||||
|     port: http | ||||
|     scheme: http | ||||
|     scrapeTimeout: 30s | ||||
|   selector: | ||||
|     matchLabels: | ||||
|       app: pantherbackupmonitoring | ||||
|   targetLabels: | ||||
|     - app | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user