From 2f2292c376c81664e76c3d9f9beb5314f5ace37a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tristan=20Dani=C3=ABl=20Maat?= Date: Fri, 22 Sep 2023 06:20:36 +0200 Subject: [PATCH] WIP: Add atomic backups with restic --- configuration/default.nix | 1 + configuration/services/backups.nix | 229 +++++++++++++++++++++++++++ configuration/services/conduit.nix | 10 ++ configuration/services/gitea.nix | 26 ++- configuration/services/nextcloud.nix | 29 ++++ configuration/services/starbound.nix | 8 + configuration/sops.nix | 6 + keys/production.yaml | 6 +- keys/staging.yaml | 6 +- 9 files changed, 316 insertions(+), 5 deletions(-) create mode 100644 configuration/services/backups.nix diff --git a/configuration/default.nix b/configuration/default.nix index 1f7a276..5d491af 100644 --- a/configuration/default.nix +++ b/configuration/default.nix @@ -14,6 +14,7 @@ "${modulesPath}/profiles/minimal.nix" (import ../modules) + ./services/backups.nix ./services/conduit.nix ./services/foundryvtt.nix ./services/gitea.nix diff --git a/configuration/services/backups.nix b/configuration/services/backups.nix new file mode 100644 index 0000000..3635a83 --- /dev/null +++ b/configuration/services/backups.nix @@ -0,0 +1,229 @@ +{ + config, + pkgs, + lib, + ... +}: let + inherit (lib) types optional singleton; + mkShutdownScript = service: + pkgs.writeShellScript "backup-${service}-shutdown" '' + if systemctl is-active --quiet '${service}'; then + touch '/tmp/${service}-was-active' + systemctl stop '${service}' + fi + ''; + mkRestartScript = service: + pkgs.writeShellScript "backup-${service}-restart" '' + if [ -f '/tmp/${service}-was-active' ]; then + rm '/tmp/${service}-was-active' + systemctl start '${service}' + fi + ''; + writeScript = name: packages: text: + lib.getExe (pkgs.writeShellApplication { + inherit name text; + runtimeInputs = packages; + }); +in { + options = { + services.backups = lib.mkOption { + description = lib.mdDoc '' + Configure restic backups with a specific tag. + ''; + type = types.attrsOf (types.submodule ({ + config, + name, + ... + }: { + options = { + user = lib.mkOption { + type = types.str; + description = '' + The user as which to run the backup. + ''; + }; + paths = lib.mkOption { + type = types.listOf types.str; + description = '' + The paths to back up. + ''; + }; + tag = lib.mkOption { + type = types.str; + description = '' + The restic tag to mark the backup with. + ''; + default = name; + }; + preparation = { + packages = lib.mkOption { + type = types.listOf types.package; + default = []; + description = '' + The list of packages to make available in the + preparation script. + ''; + }; + text = lib.mkOption { + type = types.nullOr types.str; + default = null; + description = '' + The preparation script to run before the backup. + + This should include things like database dumps and + enabling maintenance modes. If a service needs to be + shut down for backups, use `pauseServices` instead. + ''; + }; + }; + cleanup = { + packages = lib.mkOption { + type = types.listOf types.package; + default = []; + description = '' + The list of packages to make available in the + cleanup script. + ''; + }; + text = lib.mkOption { + type = types.nullOr types.str; + default = null; + description = '' + The cleanup script to run after the backup. + + This should do things like cleaning up database dumps + and disabling maintenance modes. + ''; + }; + }; + pauseServices = lib.mkOption { + type = types.listOf types.str; + default = []; + description = '' + The systemd services that need to be shut down before + the backup can run. Services will be restarted after the + backup is complete. + + This is intended to be used for services that do not + support hot backups. + ''; + }; + }; + })); + }; + }; + + config = lib.mkIf (config.services.backups != {}) { + systemd.services = + { + restic-prune = { + # Doesn't hurt to finish the ongoing prune + restartIfChanged = false; + + environment = { + RESTIC_PASSWORD_FILE = config.sops.secrets."restic/local-backups".path; + RESTIC_REPOSITORY = "/var/lib/backups/"; + RESTIC_CACHE_DIR = "%C/restic-prune"; + }; + + path = with pkgs; [ + restic + ]; + + script = '' + # TODO(tlater): In an append-only setup, we should be + # careful with this; an attacker could delete backups by + # simply appending ad infinitum: + # https://restic.readthedocs.io/en/stable/060_forget.html#security-considerations-in-append-only-mode + restic forget --keep-last 3 --prune + restic check + ''; + + serviceConfig = { + DynamicUser = true; + Group = "backup"; + + CacheDirectory = "restic-prune"; + CacheDirectoryMode = "0700"; + ReadWritePaths = "/var/lib/backups/"; + + # Ensure we don't leave behind any files with the + # temporary UID of this service. + ExecStopPost = "+${pkgs.coreutils}/bin/chown -R root:backup /var/lib/backups/"; + }; + }; + } + // lib.mapAttrs' (name: backup: + lib.nameValuePair "backup-${name}" { + # Don't want to restart mid-backup + restartIfChanged = false; + + environment = { + RESTIC_CACHE_DIR = "%C/backup-${name}"; + RESTIC_PASSWORD_FILE = config.sops.secrets."restic/local-backups".path; + # TODO(tlater): If I ever add more than one repo, service + # shutdown/restarting will potentially break if multiple + # backups for the same service overlap. A more clever + # sentinel file with reference counts would probably solve + # this. + RESTIC_REPOSITORY = "/var/lib/backups/"; + }; + + serviceConfig = { + User = backup.user; + Group = "backup"; + RuntimeDirectory = "backup-${name}"; + CacheDirectory = "backup-${name}"; + CacheDirectoryMode = "0700"; + PrivateTmp = true; + + ExecStart = [ + (lib.concatStringsSep " " (["${pkgs.restic}/bin/restic" "backup" "--tag" name] ++ backup.paths)) + ]; + + ExecStartPre = + map (service: "+${mkShutdownScript service}") backup.pauseServices + ++ singleton (writeScript "backup-${name}-repo-init" [pkgs.restic pkgs.coreutils] '' + restic snapshots || (restic init && chmod -R g+rwx "$RESTIC_REPOSITORY"/*) + '') + ++ optional (backup.preparation.text != null) + (writeScript "backup-${name}-prepare" backup.preparation.packages backup.preparation.text); + + # TODO(tlater): Add repo pruning/checking + ExecStopPost = + map (service: "+${mkRestartScript service}") backup.pauseServices + ++ optional (backup.cleanup.text != null) + (writeScript "backup-${name}-cleanup" backup.cleanup.packages backup.cleanup.text); + }; + }) + config.services.backups; + + systemd.timers = + { + restic-prune = { + wantedBy = ["timers.target"]; + timerConfig.OnCalendar = "Thursday 03:00:00 UTC"; + # Don't make this persistent, in case the server was offline + # for a while. This job cannot run at the same time as any + # of the backup jobs. + }; + } + // lib.mapAttrs' (name: backup: + lib.nameValuePair "backup-${name}" { + wantedBy = ["timers.target"]; + timerConfig = { + OnCalendar = "Wednesday 02:30:00 UTC"; + RandomizedDelaySec = "1h"; + FixedRandomDelay = true; + Persistent = true; + }; + }) + config.services.backups; + + users.groups.backup = {}; + + systemd.tmpfiles.rules = [ + "d /var/lib/backups/ 0770 root backup" + ]; + }; +} diff --git a/configuration/services/conduit.nix b/configuration/services/conduit.nix index 2d341b7..3f8fd40 100644 --- a/configuration/services/conduit.nix +++ b/configuration/services/conduit.nix @@ -231,4 +231,14 @@ in { }; }; }; + + services.backups.conduit = { + user = "root"; + paths = [ + "/var/lib/private/matrix-conduit/" + ]; + # Other services store their data in conduit, so no other services + # need to be shut down currently. + pauseServices = ["conduit.service"]; + }; } diff --git a/configuration/services/gitea.nix b/configuration/services/gitea.nix index c636078..27353f6 100644 --- a/configuration/services/gitea.nix +++ b/configuration/services/gitea.nix @@ -1,4 +1,8 @@ -{config, ...}: let +{ + pkgs, + config, + ... +}: let domain = "gitea.${config.services.nginx.domain}"; in { services.gitea = { @@ -52,4 +56,24 @@ in { enabled = true ''; }; + + services.backups.gitea = { + user = "gitea"; + paths = [ + "/var/lib/gitea/gitea-db.sql" + "/var/lib/gitea/repositories/" + "/var/lib/gitea/data/" + "/var/lib/gitea/custom/" + # Conf is backed up via nix + ]; + preparation = { + packages = [config.services.postgresql.package]; + text = "pg_dump ${config.services.gitea.database.name} --file=/var/lib/gitea/gitea-db.sql"; + }; + cleanup = { + packages = [pkgs.coreutils]; + text = "rm /var/lib/gitea/gitea-db.sql"; + }; + pauseServices = ["gitea.service"]; + }; } diff --git a/configuration/services/nextcloud.nix b/configuration/services/nextcloud.nix index 7b87697..fbca607 100644 --- a/configuration/services/nextcloud.nix +++ b/configuration/services/nextcloud.nix @@ -74,4 +74,33 @@ in { bantime = 86400 ''; }; + + services.backups.nextcloud = { + user = "nextcloud"; + paths = [ + "/var/lib/nextcloud/nextcloud-db.sql" + "/var/lib/nextcloud/data/" + "/var/lib/nextcloud/config/config.php" + ]; + preparation = { + packages = [ + config.services.postgresql.package + config.services.nextcloud.occ + ]; + text = '' + nextcloud-occ maintenance:mode --on + pg_dump ${config.services.nextcloud.config.dbname} --file=/var/lib/nextcloud/nextcloud-db.sql + ''; + }; + cleanup = { + packages = [ + pkgs.coreutils + config.services.nextcloud.occ + ]; + text = '' + rm /var/lib/nextcloud/nextcloud-db.sql + nextcloud-occ maintenance:mode --off + ''; + }; + }; } diff --git a/configuration/services/starbound.nix b/configuration/services/starbound.nix index 419f0e4..6da890e 100644 --- a/configuration/services/starbound.nix +++ b/configuration/services/starbound.nix @@ -110,4 +110,12 @@ in { # ProtectHome = "read-only"; # See further up }; }; + + services.backups.starbound = { + user = "root"; + paths = [ + "/var/lib/private/starbound/storage/universe/" + ]; + pauseServices = ["starbound.service"]; + }; } diff --git a/configuration/sops.nix b/configuration/sops.nix index 4becfd4..269fa58 100644 --- a/configuration/sops.nix +++ b/configuration/sops.nix @@ -19,6 +19,12 @@ mode = "0440"; }; + "restic/local-backups" = { + owner = "root"; + group = "backup"; + mode = "0440"; + }; + "turn/env" = {}; "turn/secret" = { owner = "turnserver"; diff --git a/keys/production.yaml b/keys/production.yaml index 6ef9ef7..f8d259d 100644 --- a/keys/production.yaml +++ b/keys/production.yaml @@ -7,6 +7,8 @@ heisenbridge: hs-token: ENC[AES256_GCM,data:u52WpkQFd/J7JFoE/rfNluebyZQLOokvkVdL7+AEAvrhJhrkJli1ztkD79lbC+6tGUH4tT3T+nX9wvGKnrRUQg==,iv:as+9fVuvMg2IoE2WIKD9mHi+znhNcWRh5Zq+yr0xcDQ=,tag:mZ7fh7U0MfgI8hyq/28Bcg==,type:str] wireguard: server-key: ENC[AES256_GCM,data:mXb7ZznJHf5CgV8rI4uzPBATMRbmd7LimgtCkQM9kAjbIaGwUBqJZBN3fXs=,iv:3Po1Orinzov9rnEm9cLzgJY1PeD+5Jl9115MriABHh8=,tag:E/2CjDO1JCvJzxCnqKcNyw==,type:str] +restic: + local-backups: ENC[AES256_GCM,data:NLNVlR9G9bLSZOkMoPvkbBbAZlKkmiUbdWHOFDnaefuy9wNLH53ctOIyS0rSsQLaJCSBTpgPSWIIXUSuzoK/eA==,iv:DzuujmyJJP4GiE5z7KOOGUEzUgOwmtf/7UYhwkyLe9g=,tag:cElFhpVC7S6HYlB6UyN7PQ==,type:str] turn: env: ENC[AES256_GCM,data:kt5nhVo9pb/ZbPUEcqSYXxN9YMgQKnFb5VRfFFS/qoIaJ73uD2fuJKqcxAyVRrdLqnSAWSQBgTgunBzdP7xqLAK2qt8DYAQWHkIe9uxFbSXZpdmw,iv:9lq6SFwTFN4GGm6gPiJpUMasMdnHVF6XLGYrsyG3kjU=,tag:428Qf9DOiiHt/Wjb188b8g==,type:str] secret: ENC[AES256_GCM,data:si7ee6Xfhdgdyzbp6aQpF7pz3TmTBb7iQ82lRPVXNDg9JfHI+lbmgAsSnRLX5qMCA6P9R045sSMosqidL8QwRg==,iv:SrhpZKK8D45yxCEfDb9P3TwtA14+qEI+wcRqcN/a6pw=,tag:PiwV+mOL9xHJgJft6sc61g==,type:str] @@ -19,8 +21,8 @@ sops: azure_kv: [] hc_vault: [] age: [] - lastmodified: "2023-04-23T17:34:53Z" - mac: ENC[AES256_GCM,data:UaGB4uwmYGVbKud5KrvdKeYTnYrs8nnQsT590KIS/b/9JhpQo5JXFtHsm1AteEBg9ygmY6tYKDcK4AXwz/uR/m3CW5If03dBNG8F9Uy3dPL5KaebC/EsNVIaRavWTbSZgqhnBgYeM+HkeQPskSWuwviSNU0D7d1n98Q89Y0kQfA=,iv:kEsRh8hb1amd2qozyxwYHCHdX80c2mO5Mm7npKX3DKc=,tag:p5GPd0OZvowghT92pxxXeA==,type:str] + lastmodified: "2023-09-23T18:55:44Z" + mac: ENC[AES256_GCM,data:psqgXozY9L7nduZ11GF+mbIrZ4RUySqBixkWL5z0cYeoLA3URb/dr028LCmNgQS9l8aJVsjVkyLBJIU/8wmiUNqRy/VI5iqV5mu+sxXhUVwFL0dAAWP1lOKwwT5uGK89/ioqkphgzuWD37vGe2vYddKkJF0M+zlz12fqkMjaisU=,iv:UyRoJbfuGU3K/Mp5DQ1kY0Z+nKSSo46BGNAcxt+vAvc=,tag:HkP6+qxQ8J/xAYJXYoG/6g==,type:str] pgp: - created_at: "2022-10-12T00:46:51Z" enc: | diff --git a/keys/staging.yaml b/keys/staging.yaml index 49b5a6a..a6b0849 100644 --- a/keys/staging.yaml +++ b/keys/staging.yaml @@ -7,6 +7,8 @@ heisenbridge: hs-token: ENC[AES256_GCM,data:VBwvwomv0Xg=,iv:q6INtJ+rg+QiXj8uBdBzQYQZUBBXp+9odxDHwvu8Jxc=,tag:XKhm8nxygAkKaiVPJ2Fcdg==,type:str] wireguard: server-key: ENC[AES256_GCM,data:FvY897XdKoa/mckE8JQLCkklsnYD6Wz1wpsu5t3uhEnW3iarnDQxF9msuYU=,iv:jqGXfekM+Vs+J9b5nlZ5Skd1ZKHajoUo2Dc4tMYPm1w=,tag:EehikjI/FCU8wqtpvJRamQ==,type:str] +restic: + local-backups: ENC[AES256_GCM,data:3QjEv03t7wE=,iv:y/6Lv4eUbZZfGPwUONykz8VNL62cAJuWaJy9yk3aAmk=,tag:wMlGsepuG9JjwtUKGWSibw==,type:str] turn: env: ENC[AES256_GCM,data:xjIz/AY109lyiL5N01p5T3HcYco/rM5CJSRTtg==,iv:16bW6OpyOK/QL0QPGQp/Baa9xyT8E3ZsYkwqmjuofk0=,tag:J5re3uKxIykw3YunvQWBgg==,type:str] secret: ENC[AES256_GCM,data:eQ7dAocoZtg=,iv:fgzjTPv30WqTKlLy+yMn5MsKQgjhPnwlGFFwYEg3gWs=,tag:1ze33U1NBkgMX/9SiaBNQg==,type:str] @@ -19,8 +21,8 @@ sops: azure_kv: [] hc_vault: [] age: [] - lastmodified: "2023-04-23T17:35:16Z" - mac: ENC[AES256_GCM,data:4cW8k6o3jET8k+yJGyApjOyuSUQb+d+4wX/RTNnpbt+867sExQrZUrOMif/u8S4WmcKVSJgvrzuxK9hpDPYhJ1d/5YuHH1Dyj7QDRdhbZYHhkpPus0ZVTEpSknZzx2eWH1ch/fyJJknlrBlfb/tz50Dv+w9mhkL7qteaIq+Vmsc=,iv:YMfAuGwu1kAM0wGkq3kzVMnC72yo7ZT04BuEwoLRPIA=,tag:6I1VRzteRaLuxN+sfLA5Mw==,type:str] + lastmodified: "2023-09-22T21:07:02Z" + mac: ENC[AES256_GCM,data:gItC41S8MInLmikdH1okhPs+FVf8sCF/iQeJ5reigBunHkOngoc6nOFANyAcNZETszzhgTLXXtmVNEjW46v6K7D6nmoi/zwpedUxwzMwDC5I28VTMDHVMAThYSGtdo6kig8i2pi8rzEQd1DStxMv3TWML5y6DDTlFsd3lfudaHA=,iv:zXebvIVPR76GwUhpactwRgF/eEmx2OBkT18E8lkwzRA=,tag:6HyISACbFCGlpIIgkFeA/A==,type:str] pgp: - created_at: "2022-10-12T16:48:23Z" enc: |