commit 6a75c512c4b840d5173cf34cc0e22bedb2cedcc5 Author: Erik Brakkee Date: Sun Feb 26 13:36:15 2023 +0100 first commit diff --git a/etcd-inmemory/CHANGELOG b/etcd-inmemory/CHANGELOG new file mode 100644 index 0000000..92e6cbe --- /dev/null +++ b/etcd-inmemory/CHANGELOG @@ -0,0 +1,8 @@ +* Wed Feb 15 2023 Erik Brakkee (erik@brakkee.org) +- Cleanup of the scripts. No longer sourcing the .bashrc + and using the docker wrapper script instead of the docker + alias. + +* Tue Feb 14 2023 Erik Brakkee (erik@brakkee.org) +- Scripts to be able to run etcd in memory. + diff --git a/etcd-inmemory/README.md b/etcd-inmemory/README.md new file mode 100644 index 0000000..8687ec3 --- /dev/null +++ b/etcd-inmemory/README.md @@ -0,0 +1,91 @@ +# Running etcd in memory + +This is an RPM for running etcd in memory for a kubeadm kubernetes install. It solves the issue that in a home setup, your disks will make +quite some noise because of etcd. See also the [post](https://brakkee.org/site/2023/02/14/silencing-kubernetes-at-home/) where this was described. + +This RPM has been tested on Centos Stream 8 with kubelet 1.24.10. It will +probably/hopefully work on other RHEL-like systems as well. + +# Disclaimer + +This is provided as an example and you can use at it your own risk. As an +administrator you are responsible at all times for the health of your +kubernetes cluster and all the data. Make sure that you have backups and can +restore from backup in case of problems, or try it out on a less critical cluster. + +# Requirements + +The cluster must be using containerd as the container runtime and etcd must be +running as a pod in the kubernetes cluster such as with a kubeadm cluster +setup. + +# Other container runtimes + +There is a docker script in the RPM that uses nerdctl to adapt to containerd. +It is possible to adapt this setup to another container runtime by replacing +the docker script with another implementation. Currently, only containerd is +supported. + +# Building the RPM + +Build the RPM using maven and install it on your controller node using yum/dnf. +This will provide the following: +* backups of etcd in 15 minute intervals in the /var/lib/wamblee/etcd + directory. This will also preserve a number of older backups. +* prior to shutdown of the kubelet service, an additional backup is taken. +* prior to startup of the kubelet a restore is done. + +In a production setup you would add a distribution management section to +the pom.xml and configure the maven release plugin to deploy to a +repository (e.g. nexus). + +# Setup + +After installing the RPM, wait until the first backups are appearing. +In the next step, drain the controller node and stop the kubelet. + +``` +kubectl drain NODENAME --ignore-daemonsets +systemctl stop kubelet +``` + +Then stop all running containers on the controller node: +``` +/opt/wamblee/etcd/bin/docker ps | +awk 'NR > 1 { print $1}' | +xargs /opt/wamblee/etcd/bin/docker stop +``` + +After the above steps, all services in your cluster should still be running. + +Now backup the contents of the /var/lib/etcd directory +``` +cd /var/lib/etcd +tar cvfz ~/etcd.tar.gz . +rm -rf /var/lib/etcd/* +``` + +Now in /etc/fstab, create an entry to mount /var/lib/etcd in memory: +``` +tmpfs /var/lib/etcd tmpfs defaults,,noatime,size=2g 0 0 +``` +Then remove all contents from the /var/lib/etcd directory and mount the ramdisk: +``` +rm -rf /var/lib/etcd/* +mount -a +``` + +Now you can start the kubelet again using `systemctl start kubelet`. After this, +you should see all the nodes as before: `kubectl get nodes`. + +After this, uncordon the controller node +``` +kubectl uncordon NODENAME +``` + +If anything goes wrong in the above steps, then drain the controller node +(it at all possible), stop the kubelet, and stop all containers, unmount +/var/lib/etcd and then +restore the etcd data from backup and start the kubelet again. + + diff --git a/etcd-inmemory/files/etc/cron.d/wamblee-etcd b/etcd-inmemory/files/etc/cron.d/wamblee-etcd new file mode 100644 index 0000000..b462139 --- /dev/null +++ b/etcd-inmemory/files/etc/cron.d/wamblee-etcd @@ -0,0 +1,10 @@ +SHELL=/bin/bash +PATH=/sbin:/bin:/usr/sbin:/usr/bin +HOME=/root +MAILTO=root + +*/15 * * * * root /opt/wamblee/etcd/bin/etcd-cron > /var/log/wamblee-etcd-backup 2>&1 +30 0 * * * root /opt/wamblee/etcd/bin/etcdctl defrag --cluster > /var/log/wamblee-etcd-defrag 2>&1 + + + diff --git a/etcd-inmemory/files/opt/wamblee/etcd/bin/docker b/etcd-inmemory/files/opt/wamblee/etcd/bin/docker new file mode 100755 index 0000000..870af32 --- /dev/null +++ b/etcd-inmemory/files/opt/wamblee/etcd/bin/docker @@ -0,0 +1,3 @@ +#!/bin/bash + +exec nerdctl -n k8s.io "$@" diff --git a/etcd-inmemory/files/opt/wamblee/etcd/bin/etcd-backup b/etcd-inmemory/files/opt/wamblee/etcd/bin/etcd-backup new file mode 100755 index 0000000..84ca28d --- /dev/null +++ b/etcd-inmemory/files/opt/wamblee/etcd/bin/etcd-backup @@ -0,0 +1,33 @@ +#!/bin/bash + +PATH=/opt/wamblee/etcd/bin:$PATH + +if [[ $# -ne 1 ]] +then + echo "Usage: $0 " 1>&2 + exit 1 +fi + +BACKUP="$1" + + +IMAGE="$( /opt/wamblee/etcd/bin/docker ps | awk '/\/etcd$/ { print $2}' )" +if [[ -z "$IMAGE" ]] +then + echo "$0: could not create backup" 1>&2 + exit 1 +fi + +docker run --network host \ + -v /etc/kubernetes/pki/etcd:/etc/kubernetes/pki/etcd \ + -v /var/lib/wamblee/etcd:/var/lib/wamblee/etcd \ + --rm "$IMAGE" sh -c "etcdctl \ + --endpoints=https://127.0.0.1:2379 \ + --cacert=/etc/kubernetes/pki/etcd/ca.crt \ + --cert=/etc/kubernetes/pki/etcd/healthcheck-client.crt \ + --key=/etc/kubernetes/pki/etcd/healthcheck-client.key \ + snapshot save /var/lib/wamblee/etcd/$BACKUP" + +echo "IMAGE=$IMAGE" > /var/lib/wamblee/etcd/etcdimage + +echo "Backup done at /var/lib/wamblee/etcd/$BACKUP" diff --git a/etcd-inmemory/files/opt/wamblee/etcd/bin/etcd-cron b/etcd-inmemory/files/opt/wamblee/etcd/bin/etcd-cron new file mode 100755 index 0000000..01dc6c3 --- /dev/null +++ b/etcd-inmemory/files/opt/wamblee/etcd/bin/etcd-cron @@ -0,0 +1,20 @@ +#!/bin/bash + +PATH=/opt/wamblee/etcd/bin:$PATH + +DATE="$( date +%Y-%m-%d_%H:%M:%S )" +DIR="$( date +%Y-%m-%d )" + +etcd-backup etcd-snapshot-latest.db.tmp +mv /var/lib/wamblee/etcd/etcd-snapshot-latest.db.tmp /var/lib/wamblee/etcd/etcd-snapshot-latest.db + +ln /var/lib/wamblee/etcd/etcd-snapshot-latest.db /var/lib/wamblee/etcd/etcd-backup-$DATE.db +mkdir -p /var/lib/wamblee/etcd/"$DIR" +if [[ ! -r /var/lib/wamblee/etcd/$DIR/etcd-backup.db ]] +then + ln /var/lib/wamblee/etcd/etcd-snapshot-latest.db /var/lib/wamblee/etcd/$DIR/etcd-backup.db +fi +ls -t /var/lib/wamblee/etcd/etcd-backup* | awk 'NR > 10' | xargs rm -f +find /var/lib/wamblee/etcd -mtime +31 | xargs rm -rf + + diff --git a/etcd-inmemory/files/opt/wamblee/etcd/bin/etcd-restore b/etcd-inmemory/files/opt/wamblee/etcd/bin/etcd-restore new file mode 100755 index 0000000..e913901 --- /dev/null +++ b/etcd-inmemory/files/opt/wamblee/etcd/bin/etcd-restore @@ -0,0 +1,42 @@ +#!/bin/bash + +PATH=/opt/wamblee/etcd/bin:$PATH + +if [[ $# -ne 1 ]] +then + ( + echo "Usage: $0 " + echo " must be a relative path to a backup below the /var/lib/wamblee/etcd directory" + ) 1>&2 + exit 1 +fi + +backup="$1" + +. /var/lib/wamblee/etcd/etcdimage +if [[ -z "$IMAGE" ]] +then + IMAGE="registry.k8s.io/etcd:3.5.6-0" + echo "ETCD image cannot be determined, using fall back $IMAGE" 1>&2 +fi + +echo "ETCD image: $IMAGE" + +set -e +rm -rf /var/lib/etcd.restored +mkdir -p /var/lib/etcd.restored +# using --network host to work around incompatibility of CNI versions +docker run --rm \ + --network host \ + -v '/var/lib/wamblee/etcd:/var/lib/wamblee/etcd' \ + -v '/var/lib/etcd.restored:/var/lib/etcd.restored' \ + --env ETCDCTL_API=3 \ + "$IMAGE" \ + /bin/sh -c "etcdctl snapshot restore /var/lib/wamblee/etcd/$backup --data-dir /var/lib/etcd.restored/data" + +mv /var/lib/etcd.restored/data/* /var/lib/etcd.restored +rmdir /var/lib/etcd.restored/data + +echo "" +echo "Restore is available at /var/lib/etcd.restored" + diff --git a/etcd-inmemory/files/opt/wamblee/etcd/bin/etcd-restore-to-tmpfs b/etcd-inmemory/files/opt/wamblee/etcd/bin/etcd-restore-to-tmpfs new file mode 100755 index 0000000..721d947 --- /dev/null +++ b/etcd-inmemory/files/opt/wamblee/etcd/bin/etcd-restore-to-tmpfs @@ -0,0 +1,38 @@ +#!/bin/bash + +PATH=/opt/wamblee/etcd/bin:$PATH + +echo "$0: verifying that etcd is not running" +if nc -z 127.0.0.1 2379 +then + echo "$0: etcd port 2379 is already open, skipping restore of data" 1>&2 + exit 0 +fi + +echo "$0: verifying that containerd is running" +if ! systemctl status containerd > /dev/null 2>&1 +then + echo "$0: containerd is not running, cannot perform restore" 1>&2 + exit 1 +fi + +echo "$0: verifying that /var/lib/etcd is empty" +size="$( du -s /var/lib/etcd | awk '{ print $1}' )" +if [[ "$size" -ne 0 ]] +then + echo "$0: /var/lib/etcd is not empty, assuming data left from previous etcd" 1>&2 + exit 0 +fi + +backupfile="$( cd /var/lib/wamblee/etcd; ls -rt *.db | tail -1 )" +echo "$0: Using backup file '$backupfile' for restore" + +etcd-restore "$backupfile" +if [[ $? -ne 0 ]] +then + echo "$0: restore of etcd failed" 1>&2 + exit 1 +fi +echo "$0: restore of etcd data finished" + +rsync -avz /var/lib/etcd.restored/ /var/lib/etcd/ diff --git a/etcd-inmemory/files/opt/wamblee/etcd/bin/etcdctl b/etcd-inmemory/files/opt/wamblee/etcd/bin/etcdctl new file mode 100755 index 0000000..822fc14 --- /dev/null +++ b/etcd-inmemory/files/opt/wamblee/etcd/bin/etcdctl @@ -0,0 +1,25 @@ +#!/bin/bash + +PATH=/opt/wamblee/etcd/bin:$PATH + +. /var/lib/wamblee/etcd/etcdimage +if [[ -z "$IMAGE" ]] +then + echo "ETCD image cannot be determined" 1>&2 + exit 1 +fi + +echo "ETCD image: $IMAGE" + +docker run --rm \ + --network host \ + -v /etc/kubernetes/pki/etcd:/etc/kubernetes/pki/etcd \ + -v /var/lib/wamblee/etcd:/var/lib/wamblee/etcd \ + $IMAGE \ + etcdctl \ + --endpoints=https://127.0.0.1:2379 \ + --cacert=/etc/kubernetes/pki/etcd/ca.crt \ + --cert=/etc/kubernetes/pki/etcd/healthcheck-client.crt \ + --key=/etc/kubernetes/pki/etcd/healthcheck-client.key "$@" + + diff --git a/etcd-inmemory/files/usr/lib/systemd/system/kubelet.service.d/wamblee-backup-etcd-post-stop.conf b/etcd-inmemory/files/usr/lib/systemd/system/kubelet.service.d/wamblee-backup-etcd-post-stop.conf new file mode 100644 index 0000000..6a26ded --- /dev/null +++ b/etcd-inmemory/files/usr/lib/systemd/system/kubelet.service.d/wamblee-backup-etcd-post-stop.conf @@ -0,0 +1,5 @@ + +[Service] +ExecStop=/opt/wamblee/etcd/bin/etcd-cron + + diff --git a/etcd-inmemory/files/usr/lib/systemd/system/kubelet.service.d/wamblee-restore-etcd-pre-start.conf b/etcd-inmemory/files/usr/lib/systemd/system/kubelet.service.d/wamblee-restore-etcd-pre-start.conf new file mode 100644 index 0000000..5845c8d --- /dev/null +++ b/etcd-inmemory/files/usr/lib/systemd/system/kubelet.service.d/wamblee-restore-etcd-pre-start.conf @@ -0,0 +1,7 @@ + +[Unit] +After=containerd.service + +[Service] +ExecStartPre=-/opt/wamblee/etcd/bin/etcd-restore-to-tmpfs + diff --git a/etcd-inmemory/pom.xml b/etcd-inmemory/pom.xml new file mode 100644 index 0000000..53f3ad9 --- /dev/null +++ b/etcd-inmemory/pom.xml @@ -0,0 +1,106 @@ + + + + + + 4.0.0 + + rpm + org.brakkee.blog + etcd-inmemory + 1.0.1-SNAPSHOT + etcd-inmemory + running etcd in-memory + + org.brakkee + + + + + + org.codehaus.mojo + rpm-maven-plugin + 2.0.1 + true + + CHANGELOG + Apache License 2.0, 2010 + org.wamblee.server + Erik Brakkee + x86_64 + + + + + /usr/lib/systemd/system/kubelet.service.d + 444 + root + root + false + + + files/usr/lib/systemd/system/kubelet.service.d + + + + + + + /etc/cron.d + 644 + root + root + false + true + + + files/etc/cron.d + + + + + + + /opt/wamblee/etcd/bin + 555 + root + root + false + + + files/opt/wamblee/etcd/bin + + + + + + + /var/lib/wamblee/etcd + 755 + root + root + + + + nerdctl + + + etcd-inmemory + + + + + + + + +