WIP: Add monitoring services

This commit is contained in:
Tristan Daniël Maat 2022-01-20 22:47:36 +00:00
parent e9c9dbfa41
commit 7b9a77d087
Signed by: tlater
GPG key ID: 49670FD774E43268
2 changed files with 214 additions and 1 deletions

View file

@ -1,9 +1,12 @@
{ config, pkgs, lib, ... }:
{
let inherit (lib.attrsets) mapAttrs;
in {
imports = [
./services/gitea.nix
./services/minecraft.nix
./services/monitoring.nix
./services/nextcloud.nix
./services/webserver.nix
./ids.nix
@ -67,6 +70,13 @@
recommendedProxySettings = true;
clientMaxBodySize = "10G";
domain = "tlater.net";
commonHttpConfig = ''
log_format custom '$remote_addr - $remote_user [$time_local] '
'"$request" $status $body_bytes_sent '
'"$http_referrer" "$http_user_agent" '
'$upstream_response_time $request_length $request_time';
access_log /var/log/nginx/access.log custom;
'';
virtualHosts = let
host = port: extra:
@ -83,9 +93,20 @@
"${domain}" = host 3002 { serverAliases = [ "www.${domain}" ]; };
"gitea.${domain}" = host 3000 { };
"nextcloud.${domain}" = host 3001 { };
"grafana.${domain}" = host 3003 { };
};
};
# Allow nginxlog group users to read the nginx log
users.groups.nginxlog.gid = null;
systemd.services.nginx.serviceConfig = {
SupplementaryGroups = [ "nginxlog" ];
LogsDirectoryMode = lib.mkOverride 99 "0751";
ExecStartPost = [
"+${pkgs.coreutils}/bin/chown nginx:nginxlog \${LOGS_DIRECTORY}/access.log \${LOGS_DIRECTORY}/error.log"
];
};
security.acme = {
email = "tm@tlater.net";
acceptTerms = true;

View file

@ -0,0 +1,192 @@
{ config, lib, pkgs, ... }:
let
inherit (builtins) attrNames concatStringsSep;
inherit (lib) stringAfter;
inherit (lib.attrsets) filterAttrs mapAttrsToList;
inherit (pkgs) openssl writeText;
domain = "grafana.${config.services.nginx.domain}";
keydir = "/run/tempsecrets.d";
certdir = "/run/tempcerts.d";
nonTlsExporters =
filterAttrs (_: exporter: exporter.enable && exporter.extraFlags == [ ])
config.services.prometheus.exporters;
tlsExporters =
filterAttrs (_: exporter: exporter.enable && exporter.extraFlags != [ ])
config.services.prometheus.exporters;
in {
services.grafana = {
inherit domain;
enable = true;
port = 3003;
security = {
adminUser = "tlater";
adminPasswordFile = "/run/secrets/grafana-admin-pass";
};
extraOptions = {
# All services grafana is allowed to source from
SECURITY_DATA_SOURCE_PROXY_WHITELIST = "localhost:4000";
# We want this to always go through the nixos config
SECURITY_DISABLE_INITIAL_ADMIN_CREATION = "true";
# Our nginx host only forwards this through https, so we can use
# cookie_secure
SECURITY_COOKIE_SECURE = "true";
# These security settings aren't set by default yet, but
# probably will be in the future
SECURITY_COOKIE_SAMESITE = "true";
SECURITY_X_XSS_PROTECTION = "true";
};
provision = {
enable = true;
datasources = [{
name = "Prometheus";
type = "prometheus";
url = "https://localhost:4000";
jsonData = {
tlsAuth = true;
tlsAuthWithCACert = true;
};
# Currently, Grafana doesn't support specifying key/cert from
# a file, which makes this very tricky to automate.
#
# We'd need to set jsonSecureData, which would be
# world-readable, and completely break authentication.
#
# See this discussion:
# https://github.com/grafana/grafana/discussions/44296
#
# For now, hand-add key/cert every time the server restarts,
# if this becomes more permanent, maybe write a script that
# updates the key via API?
editable = true;
}];
};
};
services.prometheus = let
# See https://github.com/prometheus/exporter-toolkit/blob/master/docs/web-configuration.md#web-configuration
makeTlsConfig = client: server:
writeText "web.yml" ''
tls_server_config:
key_file: ${keydir}/${server}.pem
cert_file: ${certdir}/${server}.pem
client_auth_type: RequireAndVerifyClientCert
client_ca_file: ${certdir}/${client}.pem
'';
in {
enable = true;
port = 4000;
extraFlags =
[ "--web.config.file=${makeTlsConfig "grafana" "prometheus"}" ];
# From the documentation:
#
# > When credentials are stored in external files (password_file,
# > bearer_token_file, etc), they will not be visible to promtool
# > and it will report errors, despite a correct configuration.
checkConfig = false;
exporters = {
node = {
enable = true;
enabledCollectors = [ "systemd" ];
port = 4001;
extraFlags =
[ "--web.config=${makeTlsConfig "prometheus" "node-exporter"}" ];
};
nginxlog = {
enable = true;
group = "nginxlog";
port = 4002;
# Note: No way to enable TLS/auth here
settings.namespaces = [{
name = "nginx";
format = concatStringsSep " " [
"$remote_addr - $remote_user [$time_local]"
''"$request" $status $body_bytes_sent''
''"$http_referrer" "$http_user_agent"''
"$upstream_response_time $request_length $request_time"
];
source.files = [ "/var/log/nginx/access.log" ];
}];
};
nextcloud = {
enable = true;
port = 4003;
url = "https://127.0.0.1:3001";
};
};
scrapeConfigs = (mapAttrsToList (name: exporter: {
job_name = name;
scheme = "https";
tls_config = {
ca_file = "${certdir}/${name}-exporter.pem";
cert_file = "${certdir}/prometheus.pem";
key_file = "${keydir}/prometheus.pem";
server_name = "localhost";
};
static_configs =
[{ targets = [ "127.0.0.1:${toString exporter.port}" ]; }];
}) tlsExporters) ++ mapAttrsToList (name: exporter: {
job_name = name;
scheme = "http";
static_configs =
[{ targets = [ "127.0.0.1:${toString exporter.port}" ]; }];
}) nonTlsExporters;
};
system.activationScripts = {
# This will seem a bit strange, and it probably *is*; The
# keys/certs here are only used for the various prometheus/grafana
# services to authenticate against each other.
#
# Since they aren't used to actually encrypt anything but
# communication that happens once, it's not necessary to keep the
# keys around. They're only used internally, and frequently
# switching them doesn't cause any issues. In fact, a single-use
# key protocol would probably be more secure.
#
# Sadly, neither of these services support anything more usable
# than https, so we need to generate keys. We opt to regenerate
# them at each system activation.
#
# CN=localhost is not really a risk here - this only matters if an
# attacker can spoof a service on the correct port somehow, in
# which case they either have root or full access to that server's
# user anyway. Since we use TLS auth, no secrets would be leaked,
# so in the worst case this exploit would enable an attacker to
# DoS that specific data source... Which they could do by taking
# over the service already anyway.
setupMonitoringAuth = let
opensslBin = "${openssl}/bin/openssl";
services = [ "grafana" "prometheus" ]
++ (map (name: "${name}-exporter") (attrNames tlsExporters));
in stringAfter ([ "specialfs" "users" "groups" ]) (''
[ -e /run/current-system ] || echo setting up monitoring secrets...
specialMount ramfs '${keydir}' nodev,nosuid,mode=0751 ramfs
specialMount ramfs '${certdir}' nodev,nosuid,mode=0751 ramfs
'' + concatStringsSep "\n" (map (service: ''
${opensslBin} req -batch -x509 -newkey ed25519 -nodes \
-subj '/CN=localhost' \
-addext "subjectAltName = DNS:localhost" \
-keyout '${keydir}/${service}.pem' \
-out '${certdir}/${service}.pem'
chown ${service}:${service} '${keydir}/${service}.pem'
chmod u=r '${keydir}/${service}.pem'
chmod =r '${certdir}/${service}.pem'
'') services));
};
}