Lesson 11.2: Backup and Restore Strategies
root@dev-control-plane:/etc/kubernetes/manifests# pwd
/etc/kubernetes/manifests
root@dev-control-plane:/etc/kubernetes/manifests# cat etcd.yaml
apiVersion: v1
kind: Pod
metadata:
annotations:
kubeadm.kubernetes.io/etcd.advertise-client-urls: https://172.18.0.3:2379
creationTimestamp: null
labels:
component: etcd
tier: control-plane
name: etcd
namespace: kube-system
spec:
containers:
- command:
- etcd
- --advertise-client-urls=https://172.18.0.3:2379
- --cert-file=/etc/kubernetes/pki/etcd/server.crt
- --client-cert-auth=true
- --data-dir=/var/lib/etcd
- --experimental-initial-corrupt-check=true
- --experimental-watch-progress-notify-interval=5s
- --initial-advertise-peer-urls=https://172.18.0.3:2380
- --initial-cluster=dev-control-plane=https://172.18.0.3:2380
- --key-file=/etc/kubernetes/pki/etcd/server.key
- --listen-client-urls=https://127.0.0.1:2379,https://172.18.0.3:2379
- --listen-metrics-urls=http://127.0.0.1:2381
- --listen-peer-urls=https://172.18.0.3:2380
- --name=dev-control-plane
- --peer-cert-file=/etc/kubernetes/pki/etcd/peer.crt
- --peer-client-cert-auth=true
- --peer-key-file=/etc/kubernetes/pki/etcd/peer.key
- --peer-trusted-ca-file=/etc/kubernetes/pki/etcd/ca.crt
- --snapshot-count=10000
- --trusted-ca-file=/etc/kubernetes/pki/etcd/ca.crt
image: registry.k8s.io/etcd:3.5.16-0
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 8
httpGet:
host: 127.0.0.1
path: /livez
port: 2381
scheme: HTTP
initialDelaySeconds: 10
periodSeconds: 10
timeoutSeconds: 15
name: etcd
readinessProbe:
failureThreshold: 3
httpGet:
host: 127.0.0.1
path: /readyz
port: 2381
scheme: HTTP
periodSeconds: 1
timeoutSeconds: 15
resources:
requests:
cpu: 100m
memory: 100Mi
startupProbe:
failureThreshold: 24
httpGet:
host: 127.0.0.1
path: /readyz
port: 2381
scheme: HTTP
initialDelaySeconds: 10
periodSeconds: 10
timeoutSeconds: 15
volumeMounts:
- mountPath: /var/lib/etcd
name: etcd-data
- mountPath: /etc/kubernetes/pki/etcd
name: etcd-certs
hostNetwork: true
priority: 2000001000
priorityClassName: system-node-critical
securityContext:
seccompProfile:
type: RuntimeDefault
volumes:
- hostPath:
path: /etc/kubernetes/pki/etcd
type: DirectoryOrCreate
name: etcd-certs
- hostPath:
path: /var/lib/etcd
type: DirectoryOrCreate
name: etcd-data
status: {}
Install etcd client
root@dev-control-plane:~# apt-get update && apt-get install etcd-client -y
View options
root@dev-control-plane:~# ETCDCTL_API=3 etcdctl snapshot
NAME:
snapshot - Manages etcd node snapshots
USAGE:
etcdctl snapshot <subcommand> [flags]
API VERSION:
3.4
COMMANDS:
restore Restores an etcd member snapshot to an etcd directory
save Stores an etcd node backend snapshot to a given file
status Gets backend snapshot status of a given file
OPTIONS:
-h, --help[=false] help for snapshot
GLOBAL OPTIONS:
--cacert="" verify certificates of TLS-enabled secure servers using this CA bundle
--cert="" identify secure client using this TLS certificate file
--command-timeout=5s timeout for short running command (excluding dial timeout)
--debug[=false] enable client-side debug logging
--dial-timeout=2s dial timeout for client connections
-d, --discovery-srv="" domain name to query for SRV records describing cluster endpoints
--discovery-srv-name="" service name to query when using DNS discovery
--endpoints=[127.0.0.1:2379] gRPC endpoints
--hex[=false] print byte strings as hex encoded strings
--insecure-discovery[=true] accept insecure SRV records describing cluster endpoints
--insecure-skip-tls-verify[=false] skip server certificate verification (CAUTION: this option should be enabled only for testing purposes)
--insecure-transport[=true] disable transport security for client connections
--keepalive-time=2s keepalive time for client connections
--keepalive-timeout=6s keepalive timeout for client connections
--key="" identify secure client using this TLS key file
--password="" password for authentication (if this option is used, --user option shouldn't include password)
--user="" username[:password] for authentication (prompt if password is not supplied)
-w, --write-out="simple" set the output format (fields, json, protobuf, simple, table)
Setting as env variable
root@dev-control-plane:~# export ETCDCTL_API=3
Snapshot using etcdctl options
root@dev-control-plane:~# etcdctl --endpoints=https://127.0.0.1:2379 \
> --cacert=/etc/kubernetes/pki/etcd/ca.crt \
> --cert=/etc/kubernetes/pki/etcd/server.crt \
> --key=/etc/kubernetes/pki/etcd/server.key \
> snapshot save /opt/etcd-backup.db
{"level":"info","ts":1742096447.892546,"caller":"snapshot/v3_snapshot.go:119","msg":"created temporary db file","path":"/opt/etcd-backup.db.part"}
{"level":"info","ts":"2025-03-16T03:40:47.897Z","caller":"clientv3/maintenance.go:200","msg":"opened snapshot stream; downloading"}
{"level":"info","ts":1742096447.8971481,"caller":"snapshot/v3_snapshot.go:127","msg":"fetching snapshot","endpoint":"https://127.0.0.1:2379"}
{"level":"info","ts":"2025-03-16T03:40:47.922Z","caller":"clientv3/maintenance.go:208","msg":"completed snapshot read; closing"}
{"level":"info","ts":1742096447.9275408,"caller":"snapshot/v3_snapshot.go:142","msg":"fetched snapshot","endpoint":"https://127.0.0.1:2379","size":"5.1 MB","took":0.034611291}
{"level":"info","ts":1742096447.9276412,"caller":"snapshot/v3_snapshot.go:152","msg":"saved","path":"/opt/etcd-backup.db"}
Snapshot saved at /opt/etcd-backup.db
# Checking the size
root@dev-control-plane:~# du -sh /opt/etcd-backup.db
4.9M /opt/etcd-backup.db
root@dev-control-plane:~# etcdctl --write-out=table snapshot status /opt/etcd-backup.db
+----------+----------+------------+------------+
| HASH | REVISION | TOTAL KEYS | TOTAL SIZE |
+----------+----------+------------+------------+
| f28a8279 | 116596 | 1197 | 5.1 MB |
+----------+----------+------------+------------+
Deleting something`
root@dev-control-plane:~# kubectl delete svc hello-world
service "hello-world" deleted
root@dev-control-plane:~# kubectl delete svc hello-world2
service "hello-world2" deleted
root@dev-control-plane:~# kubectl get svc
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
kubernetes ClusterIP 10.96.0.1 <none> 443/TCP 6d23h
Restoting
root@dev-control-plane:~# etcdctl --endpoints=https://127.0.0.1:2379 --cacert=/etc/kubernetes/pki/etcd/ca.crt --cert=/etc/kubernetes/pki/etcd/server.crt --key=/etc/kubernetes/pki/etcd/server.key snapshot restore /opt/etcd-backup.db --data-dir=/var/lib/etcd-restore-from-backup
{"level":"info","ts":1742097064.4672978,"caller":"snapshot/v3_snapshot.go:296","msg":"restoring snapshot","path":"/opt/etcd-backup.db","wal-dir":"/var/lib/etcd-restore-from-backup/member/wal","data-dir":"/var/lib/etcd-restore-from-backup","snap-dir":"/var/lib/etcd-restore-from-backup/member/snap"}
{"level":"info","ts":1742097064.4904122,"caller":"mvcc/kvstore.go:388","msg":"restored last compact revision","meta-bucket-name":"meta","meta-bucket-name-key":"finishedCompactRev","restored-compact-revision":115805}
{"level":"info","ts":1742097064.4958644,"caller":"membership/cluster.go:392","msg":"added member","cluster-id":"cdf818194e3a8c32","local-member-id":"0","added-peer-id":"8e9e05c52164694d","added-peer-peer-urls":["http://localhost:2380"]}
{"level":"info","ts":1742097064.4998891,"caller":"snapshot/v3_snapshot.go:309","msg":"restored snapshot","path":"/opt/etcd-backup.db","wal-dir":"/var/lib/etcd-restore-from-backup/member/wal","data-dir":"/var/lib/etcd-restore-from-backup","snap-dir":"/var/lib/etcd-restore-from-backup/member/snap"}
root@dev-control-plane:/var/lib# cd etcd-restore-from-backup/
root@dev-control-plane:/var/lib/etcd-restore-from-backup# ls
member
# Changet the value of data dir , volumes and volume mount to new
root@dev-control-plane:/etc/kubernetes/manifests# pwd
/etc/kubernetes/manifests
root@dev-control-plane:/etc/kubernetes/manifests# vim etcd.yaml
root@dev-control-plane:/etc/kubernetes/manifests# cat etcd.yaml
apiVersion: v1
kind: Pod
metadata:
annotations:
kubeadm.kubernetes.io/etcd.advertise-client-urls: https://172.18.0.3:2379
creationTimestamp: null
labels:
component: etcd
tier: control-plane
name: etcd
namespace: kube-system
spec:
containers:
- command:
- etcd
- --advertise-client-urls=https://172.18.0.3:2379
- --cert-file=/etc/kubernetes/pki/etcd/server.crt
- --client-cert-auth=true
- --data-dir=/var/lib/etcd-restore-from-backup # Changed
- --experimental-initial-corrupt-check=true
- --experimental-watch-progress-notify-interval=5s
- --initial-advertise-peer-urls=https://172.18.0.3:2380
- --initial-cluster=dev-control-plane=https://172.18.0.3:2380
- --key-file=/etc/kubernetes/pki/etcd/server.key
- --listen-client-urls=https://127.0.0.1:2379,https://172.18.0.3:2379
- --listen-metrics-urls=http://127.0.0.1:2381
- --listen-peer-urls=https://172.18.0.3:2380
- --name=dev-control-plane
- --peer-cert-file=/etc/kubernetes/pki/etcd/peer.crt
- --peer-client-cert-auth=true
- --peer-key-file=/etc/kubernetes/pki/etcd/peer.key
- --peer-trusted-ca-file=/etc/kubernetes/pki/etcd/ca.crt
- --snapshot-count=10000
- --trusted-ca-file=/etc/kubernetes/pki/etcd/ca.crt
image: registry.k8s.io/etcd:3.5.16-0
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 8
httpGet:
host: 127.0.0.1
path: /livez
port: 2381
scheme: HTTP
initialDelaySeconds: 10
periodSeconds: 10
timeoutSeconds: 15
name: etcd
readinessProbe:
failureThreshold: 3
httpGet:
host: 127.0.0.1
path: /readyz
port: 2381
scheme: HTTP
periodSeconds: 1
timeoutSeconds: 15
resources:
requests:
cpu: 100m
memory: 100Mi
startupProbe:
failureThreshold: 24
httpGet:
host: 127.0.0.1
path: /readyz
port: 2381
scheme: HTTP
initialDelaySeconds: 10
periodSeconds: 10
timeoutSeconds: 15
volumeMounts:
- mountPath: /var/lib/etcd-restore-from-backup # Changed
name: etcd-data
- mountPath: /etc/kubernetes/pki/etcd
name: etcd-certs
hostNetwork: true
priority: 2000001000
priorityClassName: system-node-critical
securityContext:
seccompProfile:
type: RuntimeDefault
volumes:
- hostPath:
path: /etc/kubernetes/pki/etcd
type: DirectoryOrCreate
name: etcd-certs
- hostPath:
path: /var/lib/etcd-restore-from-backup # Changed
type: DirectoryOrCreate
name: etcd-data
status: {}
# Restart
root@dev-control-plane:/etc/kubernetes/manifests# ls
etcd.yaml kube-apiserver.yaml kube-controller-manager.yaml kube-scheduler.yaml
root@dev-control-plane:/etc/kubernetes/manifests# mv * /tmp/
root@dev-control-plane:/etc/kubernetes/manifests# ls /tmp
etcd.yaml kube-apiserver.yaml kube-controller-manager.yaml kube-scheduler.yaml
root@dev-control-plane:/etc/kubernetes/manifests# mv /tmp/*.yaml .
root@dev-control-plane:/etc/kubernetes/manifests# ls
etcd.yaml kube-apiserver.yaml kube-controller-manager.yaml kube-scheduler.yaml
# Etcd is running
root@dev-control-plane:/etc/kubernetes/manifests# kubectl get pods -n=kube-system | grep etcd
etcd-dev-control-plane 1/1 Running 0 17h
root@dev-control-plane:~# systemctl restart kubelet
root@dev-control-plane:~# systemctl daemon-reload
root@dev-control-plane:~# kubectl get pods -n=kube-system | grep etcd
etcd-dev-control-plane 1/1 Running 0 94s
# Check the etcd-data path if changed
root@dev-control-plane:~# kubectl describe pod -n=kube-system etcd-dev-control-plane
Name: etcd-dev-control-plane
Namespace: kube-system
Priority: 2000001000
Priority Class Name: system-node-critical
Node: dev-control-plane/172.18.0.3
Start Time: Sun, 16 Mar 2025 04:06:39 +0000
Labels: component=etcd
tier=control-plane
Annotations: kubeadm.kubernetes.io/etcd.advertise-client-urls: https://172.18.0.3:2379
kubernetes.io/config.hash: aa21150d0f2a2c19c766d77f37b9e316
kubernetes.io/config.mirror: aa21150d0f2a2c19c766d77f37b9e316
kubernetes.io/config.seen: 2025-03-16T04:04:09.095428447Z
kubernetes.io/config.source: file
Status: Running
SeccompProfile: RuntimeDefault
IP: 172.18.0.3
IPs:
IP: 172.18.0.3
Controlled By: Node/dev-control-plane
Containers:
etcd:
Container ID: containerd://4aa6882b32a3bc958e1ee8a83511a84849bcb4f1a7b7625c3eba0f6ffab9bf63
Image: registry.k8s.io/etcd:3.5.16-0
Image ID: sha256:7fc9d4aa817aa6a3e549f3cd49d1f7b496407be979fc36dd5f356d59ce8c3a82
Port: <none>
Host Port: <none>
Command:
etcd
--advertise-client-urls=https://172.18.0.3:2379
--cert-file=/etc/kubernetes/pki/etcd/server.crt
--client-cert-auth=true
--data-dir=/var/lib/etcd-restore-from-backup
--experimental-initial-corrupt-check=true
--experimental-watch-progress-notify-interval=5s
--initial-advertise-peer-urls=https://172.18.0.3:2380
--initial-cluster=dev-control-plane=https://172.18.0.3:2380
--key-file=/etc/kubernetes/pki/etcd/server.key
--listen-client-urls=https://127.0.0.1:2379,https://172.18.0.3:2379
--listen-metrics-urls=http://127.0.0.1:2381
--listen-peer-urls=https://172.18.0.3:2380
--name=dev-control-plane
--peer-cert-file=/etc/kubernetes/pki/etcd/peer.crt
--peer-client-cert-auth=true
--peer-key-file=/etc/kubernetes/pki/etcd/peer.key
--peer-trusted-ca-file=/etc/kubernetes/pki/etcd/ca.crt
--snapshot-count=10000
--trusted-ca-file=/etc/kubernetes/pki/etcd/ca.crt
State: Running
Started: Sun, 16 Mar 2025 04:04:09 +0000
Ready: True
Restart Count: 0
Requests:
cpu: 100m
memory: 100Mi
Liveness: http-get http://127.0.0.1:2381/livez delay=10s timeout=15s period=10s #success=1 #failure=8
Readiness: http-get http://127.0.0.1:2381/readyz delay=0s timeout=15s period=1s #success=1 #failure=3
Startup: http-get http://127.0.0.1:2381/readyz delay=10s timeout=15s period=10s #success=1 #failure=24
Environment: <none>
Mounts:
/etc/kubernetes/pki/etcd from etcd-certs (rw)
/var/lib/etcd-restore-from-backup from etcd-data (rw)
Conditions:
Type Status
PodReadyToStartContainers True
Initialized True
Ready True
ContainersReady True
PodScheduled True
Volumes:
etcd-certs:
Type: HostPath (bare host directory volume)
Path: /etc/kubernetes/pki/etcd
HostPathType: DirectoryOrCreate
etcd-data:
Type: HostPath (bare host directory volume)
Path: /var/lib/etcd-restore-from-backup
HostPathType: DirectoryOrCreate
QoS Class: Burstable
Node-Selectors: <none>
Tolerations: :NoExecute op=Exists
Events:
Type Reason Age From Message
---- ------ ---- ---- -------
Normal Killing 3m38s (x2 over 4m5s) kubelet Stopping container etcd
Normal Pulled 3m26s (x3 over 4m6s) kubelet Container image "registry.k8s.io/etcd:3.5.16-0" already present on machine
Normal Created 3m26s (x3 over 4m6s) kubelet Created container: etcd
Normal Started 3m26s (x3 over 4m6s) kubelet Started container etcd
Normal SandboxChanged 3m26s (x2 over 3m45s) kubelet Pod sandbox changed, it will be killed and re-created.
Check the previous resources available
root@dev-control-plane:/etc/kubernetes/manifests# kubectl get svc
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
hello-world ClusterIP 10.96.49.80 <none> 80/TCP 21h
hello-world2 NodePort 10.96.10.83 <none> 80:32059/TCP 20h
kubernetes ClusterIP 10.96.0.1 <none> 443/TCP 6d23h