From 94ae490284e58dcb966553e54ac688f8b3e62c5e Mon Sep 17 00:00:00 2001 From: Jens L Date: Thu, 20 Apr 2023 19:46:49 +0300 Subject: [PATCH] lifecycle: migrate internal healthcheck to use go (#5322) Signed-off-by: Jens Langhammer --- Dockerfile | 2 +- cmd/server/healthcheck.go | 77 +++++++++++++++++++++ cmd/server/main.go | 130 ++--------------------------------- cmd/server/server.go | 141 ++++++++++++++++++++++++++++++++++++++ go.mod | 3 + go.sum | 3 + lifecycle/ak | 28 +++----- 7 files changed, 239 insertions(+), 145 deletions(-) create mode 100644 cmd/server/healthcheck.go create mode 100644 cmd/server/server.go diff --git a/Dockerfile b/Dockerfile index 5ea8639ef..288e0fa24 100644 --- a/Dockerfile +++ b/Dockerfile @@ -83,7 +83,7 @@ RUN apt-get update && \ # Required for runtime apt-get install -y --no-install-recommends libxmlsec1-openssl libmaxminddb0 && \ # Required for bootstrap & healtcheck - apt-get install -y --no-install-recommends curl runit && \ + apt-get install -y --no-install-recommends runit && \ pip install --no-cache-dir -r /requirements.txt && \ apt-get remove --purge -y build-essential pkg-config libxmlsec1-dev && \ apt-get autoremove --purge -y && \ diff --git a/cmd/server/healthcheck.go b/cmd/server/healthcheck.go new file mode 100644 index 000000000..68618052e --- /dev/null +++ b/cmd/server/healthcheck.go @@ -0,0 +1,77 @@ +package main + +import ( + "fmt" + "net/http" + "os" + "path" + "strings" + "time" + + log "github.com/sirupsen/logrus" + "github.com/spf13/cobra" + "goauthentik.io/internal/config" + "goauthentik.io/internal/utils/web" +) + +var workerHeartbeat = path.Join(os.TempDir(), "authentik-worker") + +const workerThreshold = 30 + +var healthcheckCmd = &cobra.Command{ + Use: "healthcheck", + Run: func(cmd *cobra.Command, args []string) { + if len(args) < 1 { + os.Exit(1) + } + mode := args[0] + config.Get() + exitCode := 1 + log.WithField("mode", mode).Debug("checking health") + switch strings.ToLower(mode) { + case "server": + exitCode = checkServer() + case "worker": + exitCode = checkWorker() + default: + log.Warn("Invalid mode") + } + os.Exit(exitCode) + }, +} + +func init() { + rootCmd.AddCommand(healthcheckCmd) +} + +func checkServer() int { + h := &http.Client{ + Transport: web.NewUserAgentTransport("goauthentik.io/healthcheck", http.DefaultTransport), + } + url := fmt.Sprintf("http://%s/-/health/ready/", config.Get().Listen.HTTP) + res, err := h.Head(url) + if err != nil { + log.WithError(err).Warning("failed to send healthcheck request") + return 1 + } + if res.StatusCode >= 400 { + log.WithField("status", res.StatusCode).Warning("unhealthy status code") + return 1 + } + log.Debug("successfully checked health") + return 0 +} + +func checkWorker() int { + stat, err := os.Stat(workerHeartbeat) + if err != nil { + log.WithError(err).Warning("failed to check worker heartbeat file") + return 1 + } + delta := time.Since(stat.ModTime()).Seconds() + if delta > workerThreshold { + log.WithField("threshold", workerThreshold).WithField("delta", delta).Warning("Worker hasn't updated heartbeat in threshold") + return 1 + } + return 0 +} diff --git a/cmd/server/main.go b/cmd/server/main.go index 9a5e33389..37cb47758 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -1,132 +1,10 @@ package main -import ( - "fmt" - "net/http" - "net/url" - "time" - - "github.com/getsentry/sentry-go" - log "github.com/sirupsen/logrus" - "goauthentik.io/internal/common" - "goauthentik.io/internal/config" - "goauthentik.io/internal/constants" - "goauthentik.io/internal/debug" - "goauthentik.io/internal/gounicorn" - "goauthentik.io/internal/outpost/ak" - "goauthentik.io/internal/outpost/proxyv2" - sentryutils "goauthentik.io/internal/utils/sentry" - webutils "goauthentik.io/internal/utils/web" - "goauthentik.io/internal/web" - "goauthentik.io/internal/web/tenant_tls" -) - -var running = true +import "os" func main() { - log.SetLevel(log.DebugLevel) - log.SetFormatter(&log.JSONFormatter{ - FieldMap: log.FieldMap{ - log.FieldKeyMsg: "event", - log.FieldKeyTime: "timestamp", - }, - DisableHTMLEscape: true, - }) - debug.EnableDebugServer() - l := log.WithField("logger", "authentik.root") - - if config.Get().ErrorReporting.Enabled { - err := sentry.Init(sentry.ClientOptions{ - Dsn: config.Get().ErrorReporting.SentryDSN, - AttachStacktrace: true, - EnableTracing: true, - TracesSampler: sentryutils.SamplerFunc(config.Get().ErrorReporting.SampleRate), - Release: fmt.Sprintf("authentik@%s", constants.VERSION), - Environment: config.Get().ErrorReporting.Environment, - HTTPTransport: webutils.NewUserAgentTransport(constants.UserAgent(), http.DefaultTransport), - IgnoreErrors: []string{ - http.ErrAbortHandler.Error(), - }, - }) - if err != nil { - l.WithError(err).Warning("failed to init sentry") - } - } - - ex := common.Init() - defer common.Defer() - - u, _ := url.Parse("http://localhost:8000") - - g := gounicorn.New() - defer func() { - l.Info("shutting down gunicorn") - g.Kill() - }() - ws := web.NewWebServer(g) - g.HealthyCallback = func() { - if !config.Get().Outposts.DisableEmbeddedOutpost { - go attemptProxyStart(ws, u) - } - } - go web.RunMetricsServer() - go attemptStartBackend(g) - ws.Start() - <-ex - running = false - l.Info("shutting down webserver") - go ws.Shutdown() -} - -func attemptStartBackend(g *gounicorn.GoUnicorn) { - for { - if !running { - return - } - err := g.Start() - log.WithField("logger", "authentik.router").WithError(err).Warning("gunicorn process died, restarting") - } -} - -func attemptProxyStart(ws *web.WebServer, u *url.URL) { - maxTries := 100 - attempt := 0 - l := log.WithField("logger", "authentik.server") - for { - l.Debug("attempting to init outpost") - ac := ak.NewAPIController(*u, config.Get().SecretKey) - if ac == nil { - attempt += 1 - time.Sleep(1 * time.Second) - if attempt > maxTries { - break - } - continue - } - // Init tenant_tls here too since it requires an API Client, - // so we just re-use the same one as the outpost uses - tw := tenant_tls.NewWatcher(ac.Client) - go tw.Start() - ws.TenantTLS = tw - ac.AddRefreshHandler(func() { - tw.Check() - }) - - srv := proxyv2.NewProxyServer(ac) - ws.ProxyServer = srv - ac.Server = srv - l.Debug("attempting to start outpost") - err := ac.StartBackgroundTasks() - if err != nil { - l.WithError(err).Warning("outpost failed to start") - attempt += 1 - time.Sleep(15 * time.Second) - if attempt > maxTries { - break - } - continue - } else { - select {} - } + err := rootCmd.Execute() + if err != nil { + os.Exit(1) } } diff --git a/cmd/server/server.go b/cmd/server/server.go new file mode 100644 index 000000000..b734a99d4 --- /dev/null +++ b/cmd/server/server.go @@ -0,0 +1,141 @@ +package main + +import ( + "fmt" + "net/http" + "net/url" + "time" + + "github.com/getsentry/sentry-go" + log "github.com/sirupsen/logrus" + "github.com/spf13/cobra" + "goauthentik.io/internal/common" + "goauthentik.io/internal/config" + "goauthentik.io/internal/constants" + "goauthentik.io/internal/debug" + "goauthentik.io/internal/gounicorn" + "goauthentik.io/internal/outpost/ak" + "goauthentik.io/internal/outpost/proxyv2" + sentryutils "goauthentik.io/internal/utils/sentry" + webutils "goauthentik.io/internal/utils/web" + "goauthentik.io/internal/web" + "goauthentik.io/internal/web/tenant_tls" +) + +var running = true + +var rootCmd = &cobra.Command{ + Use: "authentik", + Short: "Start authentik instance", + Version: constants.FullVersion(), + PersistentPreRun: func(cmd *cobra.Command, args []string) { + log.SetLevel(log.DebugLevel) + log.SetFormatter(&log.JSONFormatter{ + FieldMap: log.FieldMap{ + log.FieldKeyMsg: "event", + log.FieldKeyTime: "timestamp", + }, + DisableHTMLEscape: true, + }) + }, + Run: func(cmd *cobra.Command, args []string) { + debug.EnableDebugServer() + l := log.WithField("logger", "authentik.root") + + if config.Get().ErrorReporting.Enabled { + err := sentry.Init(sentry.ClientOptions{ + Dsn: config.Get().ErrorReporting.SentryDSN, + AttachStacktrace: true, + EnableTracing: true, + TracesSampler: sentryutils.SamplerFunc(config.Get().ErrorReporting.SampleRate), + Release: fmt.Sprintf("authentik@%s", constants.VERSION), + Environment: config.Get().ErrorReporting.Environment, + HTTPTransport: webutils.NewUserAgentTransport(constants.UserAgent(), http.DefaultTransport), + IgnoreErrors: []string{ + http.ErrAbortHandler.Error(), + }, + }) + if err != nil { + l.WithError(err).Warning("failed to init sentry") + } + } + + ex := common.Init() + defer common.Defer() + + u, _ := url.Parse("http://localhost:8000") + + g := gounicorn.New() + defer func() { + l.Info("shutting down gunicorn") + g.Kill() + }() + ws := web.NewWebServer(g) + g.HealthyCallback = func() { + if !config.Get().Outposts.DisableEmbeddedOutpost { + go attemptProxyStart(ws, u) + } + } + go web.RunMetricsServer() + go attemptStartBackend(g) + ws.Start() + <-ex + running = false + l.Info("shutting down webserver") + go ws.Shutdown() + + }, +} + +func attemptStartBackend(g *gounicorn.GoUnicorn) { + for { + if !running { + return + } + err := g.Start() + log.WithField("logger", "authentik.router").WithError(err).Warning("gunicorn process died, restarting") + } +} + +func attemptProxyStart(ws *web.WebServer, u *url.URL) { + maxTries := 100 + attempt := 0 + l := log.WithField("logger", "authentik.server") + for { + l.Debug("attempting to init outpost") + ac := ak.NewAPIController(*u, config.Get().SecretKey) + if ac == nil { + attempt += 1 + time.Sleep(1 * time.Second) + if attempt > maxTries { + break + } + continue + } + // Init tenant_tls here too since it requires an API Client, + // so we just re-use the same one as the outpost uses + tw := tenant_tls.NewWatcher(ac.Client) + go tw.Start() + ws.TenantTLS = tw + ac.AddRefreshHandler(func() { + tw.Check() + }) + + srv := proxyv2.NewProxyServer(ac) + ws.ProxyServer = srv + ac.Server = srv + l.Debug("attempting to start outpost") + err := ac.StartBackgroundTasks() + if err != nil { + l.WithError(err).Warning("outpost failed to start") + attempt += 1 + time.Sleep(15 * time.Second) + if attempt > maxTries { + break + } + continue + } else { + select {} + } + } +} diff --git a/go.mod b/go.mod index fbe14d27f..cb64b7b41 100644 --- a/go.mod +++ b/go.mod @@ -24,6 +24,7 @@ require ( github.com/pires/go-proxyproto v0.7.0 github.com/prometheus/client_golang v1.15.0 github.com/sirupsen/logrus v1.9.0 + github.com/spf13/cobra v0.0.3 github.com/stretchr/testify v1.8.2 goauthentik.io/api/v3 v3.2023041.3 golang.org/x/exp v0.0.0-20230210204819-062eb4c674ab @@ -55,6 +56,7 @@ require ( github.com/go-openapi/swag v0.22.3 // indirect github.com/go-openapi/validate v0.22.1 // indirect github.com/golang/protobuf v1.5.3 // indirect + github.com/inconshreveable/mousetrap v1.0.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/mailru/easyjson v0.7.7 // indirect github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect @@ -67,6 +69,7 @@ require ( github.com/prometheus/common v0.42.0 // indirect github.com/prometheus/procfs v0.9.0 // indirect github.com/rogpeppe/go-internal v1.10.0 // indirect + github.com/spf13/pflag v1.0.3 // indirect go.mongodb.org/mongo-driver v1.11.3 // indirect go.opentelemetry.io/otel v1.14.0 // indirect go.opentelemetry.io/otel/trace v1.14.0 // indirect diff --git a/go.sum b/go.sum index 8fa785c57..b8257edca 100644 --- a/go.sum +++ b/go.sum @@ -220,6 +220,7 @@ github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/ad github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= +github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/jellydator/ttlcache/v3 v3.0.1 h1:cHgCSMS7TdQcoprXnWUptJZzyFsqs18Lt8VVhRuZYVU= github.com/jellydator/ttlcache/v3 v3.0.1/go.mod h1:WwTaEmcXQ3MTjOm4bsZoDFiCu/hMvNWLO1w67RXz6h4= @@ -294,7 +295,9 @@ github.com/sirupsen/logrus v1.4.1/go.mod h1:ni0Sbl8bgC9z8RoU9G6nDWqqs/fq4eDPysMB github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/sirupsen/logrus v1.9.0 h1:trlNQbNUG3OdDrDil03MCb1H2o9nJ1x4/5LYw7byDE0= github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= +github.com/spf13/cobra v0.0.3 h1:ZlrZ4XsMRm04Fr5pSFxBgfND2EBVa1nLpiy1stUsX/8= github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ= +github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg= github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= diff --git a/lifecycle/ak b/lifecycle/ak index 2dec9c95e..97b1f523c 100755 --- a/lifecycle/ak +++ b/lifecycle/ak @@ -1,6 +1,5 @@ #!/bin/bash -e MODE_FILE="${TMPDIR}/authentik-mode" -WORKER_HEARTBEAT="${TMPDIR}/authentik-worker" function log { printf '{"event": "%s", "level": "info", "logger": "bootstrap"}\n' "$@" > /dev/stderr @@ -38,6 +37,14 @@ function check_if_root { exec chpst -u authentik:$GROUP env HOME=/authentik $1 } +function run_authentik { + if [[ -x "$(command -v authentik)" ]]; then + exec authentik $@ + else + exec go run -v ./cmd/server/ $@ + fi +} + function set_mode { echo $1 > $MODE_FILE trap cleanup EXIT @@ -56,11 +63,7 @@ if [[ "$1" == "server" ]]; then if [[ ! -z "${AUTHENTIK_BOOTSTRAP_PASSWORD}" || ! -z "${AUTHENTIK_BOOTSTRAP_TOKEN}" ]]; then python -m manage bootstrap_tasks fi - if [[ -x "$(command -v authentik)" ]]; then - exec authentik - else - exec go run -v ./cmd/server/ - fi + run_authentik elif [[ "$1" == "worker" ]]; then wait_for_db set_mode "worker" @@ -77,18 +80,7 @@ elif [[ "$1" == "test-all" ]]; then chown authentik:authentik /unittest.xml check_if_root "python -m manage test authentik" elif [[ "$1" == "healthcheck" ]]; then - mode=$(cat $MODE_FILE) - if [[ $mode == "server" ]]; then - exec curl --user-agent "goauthentik.io lifecycle Healthcheck" -I http://localhost:9000/-/health/ready/ - elif [[ $mode == "worker" ]]; then - mtime=$(date -r $WORKER_HEARTBEAT +"%s") - time=$(date +"%s") - if [ "$(( $time - $mtime ))" -gt "30" ]; then - log "Worker hasn't updated heartbeat in 30 seconds" - exit 1 - fi - exit 0 - fi + run_authentik healthcheck $(cat $MODE_FILE) elif [[ "$1" == "dump_config" ]]; then exec python -m authentik.lib.config elif [[ "$1" == "debug" ]]; then