tlaternet-server/configuration/services/backups.nix

252 lines
7.6 KiB
Nix
Raw Permalink Normal View History

{
config,
pkgs,
lib,
...
}: let
inherit (lib) types optional singleton;
mkShutdownScript = service:
pkgs.writeShellScript "backup-${service}-shutdown" ''
if systemctl is-active --quiet '${service}'; then
touch '/tmp/${service}-was-active'
systemctl stop '${service}'
fi
'';
mkRestartScript = service:
pkgs.writeShellScript "backup-${service}-restart" ''
if [ -f '/tmp/${service}-was-active' ]; then
rm '/tmp/${service}-was-active'
systemctl start '${service}'
fi
'';
writeScript = name: packages: text:
lib.getExe (pkgs.writeShellApplication {
inherit name text;
runtimeInputs = packages;
});
2024-03-18 04:05:54 +00:00
# *NOT* a TOML file, for some reason quotes are interpreted
# *literally
rcloneConfig = pkgs.writeText "rclone.conf" ''
[storagebox]
type = sftp
user = u395933
host = u395933.your-storagebox.de
port = 23
key_file = ${config.sops.secrets."restic/storagebox-ssh-key".path}
shell_type = unix
'';
resticEnv = {
RESTIC_PASSWORD_FILE = config.sops.secrets."restic/storagebox-backups".path;
RESTIC_REPOSITORY = "rclone:storagebox:backups";
RCLONE_CONFIG = rcloneConfig;
};
in {
options = {
services.backups = lib.mkOption {
description = lib.mdDoc ''
Configure restic backups with a specific tag.
'';
type = types.attrsOf (types.submodule ({
config,
name,
...
}: {
options = {
user = lib.mkOption {
type = types.str;
description = ''
The user as which to run the backup.
'';
};
paths = lib.mkOption {
type = types.listOf types.str;
description = ''
The paths to back up.
'';
};
tag = lib.mkOption {
type = types.str;
description = ''
The restic tag to mark the backup with.
'';
default = name;
};
preparation = {
packages = lib.mkOption {
type = types.listOf types.package;
default = [];
description = ''
The list of packages to make available in the
preparation script.
'';
};
text = lib.mkOption {
type = types.nullOr types.str;
default = null;
description = ''
The preparation script to run before the backup.
This should include things like database dumps and
enabling maintenance modes. If a service needs to be
shut down for backups, use `pauseServices` instead.
'';
};
};
cleanup = {
packages = lib.mkOption {
type = types.listOf types.package;
default = [];
description = ''
The list of packages to make available in the
cleanup script.
'';
};
text = lib.mkOption {
type = types.nullOr types.str;
default = null;
description = ''
The cleanup script to run after the backup.
This should do things like cleaning up database dumps
and disabling maintenance modes.
'';
};
};
pauseServices = lib.mkOption {
type = types.listOf types.str;
default = [];
description = ''
The systemd services that need to be shut down before
the backup can run. Services will be restarted after the
backup is complete.
This is intended to be used for services that do not
support hot backups.
'';
};
};
}));
};
};
config = lib.mkIf (config.services.backups != {}) {
systemd.services =
{
restic-prune = {
# Doesn't hurt to finish the ongoing prune
restartIfChanged = false;
2024-03-18 04:05:54 +00:00
environment = resticEnv;
path = with pkgs; [
2024-03-18 04:05:54 +00:00
openssh
rclone
restic
];
script = ''
# TODO(tlater): In an append-only setup, we should be
# careful with this; an attacker could delete backups by
# simply appending ad infinitum:
# https://restic.readthedocs.io/en/stable/060_forget.html#security-considerations-in-append-only-mode
restic forget --keep-last 3 --prune
restic check
'';
serviceConfig = {
DynamicUser = true;
Group = "backup";
CacheDirectory = "restic-prune";
CacheDirectoryMode = "0700";
};
};
}
// lib.mapAttrs' (name: backup:
lib.nameValuePair "backup-${name}" {
# Don't want to restart mid-backup
restartIfChanged = false;
2024-03-18 04:05:54 +00:00
environment =
resticEnv
// {
RESTIC_CACHE_DIR = "%C/backup-${name}";
};
2024-03-18 04:05:54 +00:00
path = with pkgs; [
coreutils
openssh
rclone
restic
];
# TODO(tlater): If I ever add more than one repo, service
# shutdown/restarting will potentially break if multiple
# backups for the same service overlap. A more clever
# sentinel file with reference counts would probably solve
# this.
serviceConfig = {
User = backup.user;
Group = "backup";
RuntimeDirectory = "backup-${name}";
CacheDirectory = "backup-${name}";
CacheDirectoryMode = "0700";
PrivateTmp = true;
ExecStart = [
(lib.concatStringsSep " " (["${pkgs.restic}/bin/restic" "backup" "--tag" name] ++ backup.paths))
];
ExecStartPre =
map (service: "+${mkShutdownScript service}") backup.pauseServices
2024-03-18 04:05:54 +00:00
++ singleton (writeScript "backup-${name}-repo-init" [] ''
restic snapshots || restic init
'')
++ optional (backup.preparation.text != null)
(writeScript "backup-${name}-prepare" backup.preparation.packages backup.preparation.text);
# TODO(tlater): Add repo pruning/checking
ExecStopPost =
map (service: "+${mkRestartScript service}") backup.pauseServices
++ optional (backup.cleanup.text != null)
(writeScript "backup-${name}-cleanup" backup.cleanup.packages backup.cleanup.text);
};
})
config.services.backups;
systemd.timers =
{
restic-prune = {
wantedBy = ["timers.target"];
timerConfig.OnCalendar = "Thursday 03:00:00 UTC";
# Don't make this persistent, in case the server was offline
# for a while. This job cannot run at the same time as any
# of the backup jobs.
};
}
// lib.mapAttrs' (name: backup:
lib.nameValuePair "backup-${name}" {
wantedBy = ["timers.target"];
timerConfig = {
OnCalendar = "Wednesday 02:30:00 UTC";
RandomizedDelaySec = "1h";
FixedRandomDelay = true;
Persistent = true;
};
})
config.services.backups;
2024-03-18 04:05:54 +00:00
users = {
# This user is only used to own the ssh key, because apparently
# the ssh client checks file permissions and is stuck in 1980.
users.backup = {
group = "backup";
isSystemUser = true;
};
groups.backup = {};
};
};
}