tlaternet-server/configuration/services/backups.nix

229 lines
7.3 KiB
Nix
Raw Normal View History

2023-09-22 05:20:36 +01:00
{
config,
pkgs,
lib,
...
}: let
inherit (lib) types optional singleton;
mkShutdownScript = service:
pkgs.writeShellScript "backup-${service}-shutdown" ''
if systemctl is-active --quiet '${service}'; then
touch '/tmp/${service}-was-active'
systemctl stop '${service}'
fi
'';
mkRestartScript = service:
pkgs.writeShellScript "backup-${service}-restart" ''
if [ -f '/tmp/${service}-was-active' ]; then
rm '/tmp/${service}-was-active'
systemctl start '${service}'
fi
'';
writeScript = name: packages: text:
lib.getExe (pkgs.writeShellApplication {
inherit name text;
runtimeInputs = packages;
});
in {
options = {
services.backups = lib.mkOption {
description = lib.mdDoc ''
Configure restic backups with a specific tag.
'';
type = types.attrsOf (types.submodule ({
config,
name,
...
}: {
options = {
user = lib.mkOption {
type = types.str;
description = ''
The user as which to run the backup.
'';
};
paths = lib.mkOption {
type = types.listOf types.str;
description = ''
The paths to back up.
'';
};
tag = lib.mkOption {
type = types.str;
description = ''
The restic tag to mark the backup with.
'';
default = name;
};
preparation = {
packages = lib.mkOption {
type = types.listOf types.package;
default = [];
description = ''
The list of packages to make available in the
preparation script.
'';
};
text = lib.mkOption {
type = types.nullOr types.str;
default = null;
description = ''
The preparation script to run before the backup.
This should include things like database dumps and
enabling maintenance modes. If a service needs to be
shut down for backups, use `pauseServices` instead.
'';
};
};
cleanup = {
packages = lib.mkOption {
type = types.listOf types.package;
default = [];
description = ''
The list of packages to make available in the
cleanup script.
'';
};
text = lib.mkOption {
type = types.nullOr types.str;
default = null;
description = ''
The cleanup script to run after the backup.
This should do things like cleaning up database dumps
and disabling maintenance modes.
'';
};
};
pauseServices = lib.mkOption {
type = types.listOf types.str;
default = [];
description = ''
The systemd services that need to be shut down before
the backup can run. Services will be restarted after the
backup is complete.
This is intended to be used for services that do not
support hot backups.
'';
};
};
}));
};
};
config = lib.mkIf (config.services.backups != {}) {
systemd.services =
{
restic-prune = {
# Doesn't hurt to finish the ongoing prune
restartIfChanged = false;
environment = {
RESTIC_PASSWORD_FILE = config.sops.secrets."restic/local-backups".path;
RESTIC_REPOSITORY = "/var/lib/backups/";
};
path = with pkgs; [
restic
];
script = ''
# TODO(tlater): In an append-only setup, we should be
# careful with this; an attacker could delete backups by
# simply appending ad infinitum:
# https://restic.readthedocs.io/en/stable/060_forget.html#security-considerations-in-append-only-mode
restic forget --keep-last 3 --prune
restic check
'';
serviceConfig = {
DynamicUser = true;
Group = "backup";
CacheDirectory = "restic-prune";
CacheDirectoryMode = "0700";
ReadWritePaths = "/var/lib/backups/";
# Ensure we don't leave behind any files with the
# temporary UID of this service.
ExecStopPost = "+${pkgs.coreutils}/bin/chown -R root:backup /var/lib/backups/";
};
};
}
// lib.mapAttrs' (name: backup:
lib.nameValuePair "backup-${name}" {
# Don't want to restart mid-backup
restartIfChanged = false;
environment = {
RESTIC_CACHE_DIR = "%C/backup-${name}";
RESTIC_PASSWORD_FILE = config.sops.secrets."restic/local-backups".path;
# TODO(tlater): If I ever add more than one repo, service
# shutdown/restarting will potentially break if multiple
# backups for the same service overlap. A more clever
# sentinel file with reference counts would probably solve
# this.
RESTIC_REPOSITORY = "/var/lib/backups/";
};
serviceConfig = {
User = backup.user;
Group = "backup";
RuntimeDirectory = "backup-${name}";
CacheDirectory = "backup-${name}";
CacheDirectoryMode = "0700";
PrivateTmp = true;
ExecStart = [
(lib.concatStringsSep " " (["${pkgs.restic}/bin/restic" "backup" "--tag" name] ++ backup.paths))
];
ExecStartPre =
map (service: "+${mkShutdownScript service}") backup.pauseServices
++ singleton (writeScript "backup-${name}-repo-init" [pkgs.restic pkgs.coreutils] ''
restic snapshots || (restic init && chmod -R g+rwx "$RESTIC_REPOSITORY"/*)
'')
++ optional (backup.preparation.text != null)
(writeScript "backup-${name}-prepare" backup.preparation.packages backup.preparation.text);
# TODO(tlater): Add repo pruning/checking
ExecStopPost =
map (service: "+${mkRestartScript service}") backup.pauseServices
++ optional (backup.cleanup.text != null)
(writeScript "backup-${name}-cleanup" backup.cleanup.packages backup.cleanup.text);
};
})
config.services.backups;
systemd.timers =
{
restic-prune = {
wantedBy = ["timers.target"];
timerConfig.OnCalendar = "Thursday 03:00:00 UTC";
# Don't make this persistent, in case the server was offline
# for a while. This job cannot run at the same time as any
# of the backup jobs.
};
}
// lib.mapAttrs' (name: backup:
lib.nameValuePair "backup-${name}" {
wantedBy = ["timers.target"];
timerConfig = {
OnCalendar = "Wednesday 02:30:00 UTC";
RandomizedDelaySec = "1h";
FixedRandomDelay = true;
Persistent = true;
};
})
config.services.backups;
users.groups.backup = {};
systemd.tmpfiles.rules = [
"d /var/lib/backups/ 0770 root backup"
];
};
}