From b7b42e832ad711ebb5af5c03e1c8e8a471c6bde0 Mon Sep 17 00:00:00 2001
From: tobi <31960611+tsmethurst@users.noreply.github.com>
Date: Mon, 25 Mar 2024 18:05:14 +0100
Subject: [PATCH] [feature] Add healthcheck endpoints `/livez` and `/readyz`
(#2783)
* [feature] Add healthcheck endpoints `/livez` and `/readyz`
* use select that returns no data
---
cmd/gotosocial/action/server/server.go | 2 +
cmd/gotosocial/action/testrig/testrig.go | 2 +
docs/advanced/healthchecks.md | 48 +++++++++++++++
docs/api/swagger.yaml | 38 ++++++++++++
internal/api/health.go | 51 ++++++++++++++++
internal/api/health/health.go | 48 +++++++++++++++
internal/api/health/live.go | 54 +++++++++++++++++
internal/api/health/ready.go | 74 ++++++++++++++++++++++++
internal/db/basic.go | 4 +-
internal/db/bundb/basic.go | 10 +++-
mkdocs.yml | 1 +
11 files changed, 328 insertions(+), 4 deletions(-)
create mode 100644 docs/advanced/healthchecks.md
create mode 100644 internal/api/health.go
create mode 100644 internal/api/health/health.go
create mode 100644 internal/api/health/live.go
create mode 100644 internal/api/health/ready.go
diff --git a/cmd/gotosocial/action/server/server.go b/cmd/gotosocial/action/server/server.go
index 420264e97..1886cd885 100644
--- a/cmd/gotosocial/action/server/server.go
+++ b/cmd/gotosocial/action/server/server.go
@@ -309,6 +309,7 @@ var Start action.GTSAction = func(ctx context.Context) error {
authModule = api.NewAuth(dbService, processor, idp, routerSession, sessionName) // auth/oauth paths
clientModule = api.NewClient(dbService, processor) // api client endpoints
metricsModule = api.NewMetrics() // Metrics endpoints
+ healthModule = api.NewHealth(dbService.Ready) // Health check endpoints
fileserverModule = api.NewFileserver(processor) // fileserver endpoints
wellKnownModule = api.NewWellKnown(processor) // .well-known endpoints
nodeInfoModule = api.NewNodeInfo(processor) // nodeinfo endpoint
@@ -340,6 +341,7 @@ var Start action.GTSAction = func(ctx context.Context) error {
authModule.Route(router, clLimit, clThrottle, gzip)
clientModule.Route(router, clLimit, clThrottle, gzip)
metricsModule.Route(router, clLimit, clThrottle, gzip)
+ healthModule.Route(router, clLimit, clThrottle)
fileserverModule.Route(router, fsMainLimit, fsThrottle)
fileserverModule.RouteEmojis(router, instanceAccount.ID, fsEmojiLimit, fsThrottle)
wellKnownModule.Route(router, gzip, s2sLimit, s2sThrottle)
diff --git a/cmd/gotosocial/action/testrig/testrig.go b/cmd/gotosocial/action/testrig/testrig.go
index 3401734a0..0769b8878 100644
--- a/cmd/gotosocial/action/testrig/testrig.go
+++ b/cmd/gotosocial/action/testrig/testrig.go
@@ -224,6 +224,7 @@ var Start action.GTSAction = func(ctx context.Context) error {
authModule = api.NewAuth(state.DB, processor, idp, routerSession, sessionName) // auth/oauth paths
clientModule = api.NewClient(state.DB, processor) // api client endpoints
metricsModule = api.NewMetrics() // Metrics endpoints
+ healthModule = api.NewHealth(state.DB.Ready) // Health check endpoints
fileserverModule = api.NewFileserver(processor) // fileserver endpoints
wellKnownModule = api.NewWellKnown(processor) // .well-known endpoints
nodeInfoModule = api.NewNodeInfo(processor) // nodeinfo endpoint
@@ -235,6 +236,7 @@ var Start action.GTSAction = func(ctx context.Context) error {
authModule.Route(router)
clientModule.Route(router)
metricsModule.Route(router)
+ healthModule.Route(router)
fileserverModule.Route(router)
fileserverModule.RouteEmojis(router, instanceAccount.ID)
wellKnownModule.Route(router)
diff --git a/docs/advanced/healthchecks.md b/docs/advanced/healthchecks.md
new file mode 100644
index 000000000..30f90f7f2
--- /dev/null
+++ b/docs/advanced/healthchecks.md
@@ -0,0 +1,48 @@
+# Health Checks
+
+GoToSocial exposes two health check HTTP endpoints: `/readyz` and `/livez`.
+
+These can be used to check whether GoToSocial is reachable and able to make simple database queries.
+
+`/livez` will always return a 200 OK response with no body, in response to both GET and HEAD requests. This is useful to check if the GoToSocial service is alive.
+
+`/readyz` will return a 200 OK response with no body, in response to both GET and HEAD requests, if GoToSocial is able to run a very simple SELECT query against the configured database backend. If an error occurs while running the SELECT, the error will be logged, and 500 Internal Server Error will be returned, with no body.
+
+You can use the above endpoints to implement health checks in container runtimes / orchestration systems.
+
+For example, in a Docker setup, you could add the following to your docker-compose.yaml:
+
+```yaml
+healthcheck:
+ test: wget --no-verbose --tries=1 --spider http://localhost:8080/readyz || exit 1
+ interval: 120s
+ retries: 5
+ start_period: 30s
+ timeout: 10s
+```
+
+The above health check will start after 30 seconds, and check every two minutes whether the service is available by doing a HEAD request to `/readyz`. If the check fails five times in a row, the service will be reported as unhealthy. You can use this in whatever orchestration system you are using to force the container to restart.
+
+!!! warning
+ When doing database migrations on slow hardware, migration might take longer than the 10 minutes afforded by the above health check.
+
+ On such a system, you may want to increase the interval or number of retries of the health check to ensure that you don't stop GoToSocial in the middle of a migration (which is a very bad thing to do!).
+
+!!! tip
+ Though the health check endpoints don't reveal any sensitive info, and run only very simple queries, you may want to avoid exposing them to the outside world. You could do this in nginx, for example, by adding the following snippet to your `server` stanza:
+
+ ```nginx
+ location /livez {
+ return 404;
+ }
+ location /readyz {
+ return 404;
+ }
+ ```
+
+ This will cause nginx to intercept these requests *before* they are passed to GoToSocial, and just return 404 Not Found.
+
+References:
+
+- [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#healthcheck)
+- [Compose file reference](https://docs.docker.com/compose/compose-file/compose-file-v3/#healthcheck)
diff --git a/docs/api/swagger.yaml b/docs/api/swagger.yaml
index 51d8b6e78..e962c6724 100644
--- a/docs/api/swagger.yaml
+++ b/docs/api/swagger.yaml
@@ -7878,6 +7878,23 @@ paths:
summary: View instance information.
tags:
- instance
+ /livez:
+ get:
+ operationId: liveGet
+ responses:
+ "200":
+ description: OK
+ summary: Returns code 200 with no body if GoToSocial is "live", ie., able to respond to HTTP requests.
+ tags:
+ - health
+ head:
+ operationId: liveHead
+ responses:
+ "200":
+ description: OK
+ summary: Returns code 200 if GoToSocial is "live", ie., able to respond to HTTP requests.
+ tags:
+ - health
/nodeinfo/2.0:
get:
description: 'See: https://nodeinfo.diaspora.software/schema.html'
@@ -7892,6 +7909,27 @@ paths:
summary: Returns a compliant nodeinfo response to node info queries.
tags:
- nodeinfo
+ /readyz:
+ get:
+ description: If GtS is not ready, 500 Internal Error will be returned, and an error will be logged (but not returned to the caller, to avoid leaking internals).
+ operationId: readyGet
+ responses:
+ "200":
+ description: OK
+ "500":
+ description: Not ready. Check logs for error message.
+ summary: Returns code 200 with no body if GoToSocial is "ready", ie., able to connect to the database backend and do a simple SELECT.
+ tags:
+ - health
+ head:
+ description: If GtS is not ready, 500 Internal Error will be returned, and an error will be logged (but not returned to the caller, to avoid leaking internals).
+ operationId: readyHead
+ responses:
+ "200":
+ description: OK
+ summary: Returns code 200 with no body if GoToSocial is "ready", ie., able to connect to the database backend and do a simple SELECT.
+ tags:
+ - health
/users/{username}/collections/featured:
get:
description: |-
diff --git a/internal/api/health.go b/internal/api/health.go
new file mode 100644
index 000000000..e1dfd1924
--- /dev/null
+++ b/internal/api/health.go
@@ -0,0 +1,51 @@
+// GoToSocial
+// Copyright (C) GoToSocial Authors admin@gotosocial.org
+// SPDX-License-Identifier: AGPL-3.0-or-later
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program. If not, see .
+
+package api
+
+import (
+ "context"
+
+ "github.com/gin-gonic/gin"
+ "github.com/superseriousbusiness/gotosocial/internal/api/health"
+ "github.com/superseriousbusiness/gotosocial/internal/middleware"
+ "github.com/superseriousbusiness/gotosocial/internal/router"
+)
+
+type Health struct {
+ health *health.Module
+}
+
+func (mt *Health) Route(r *router.Router, m ...gin.HandlerFunc) {
+ // Create new group on top level prefix.
+ healthGroup := r.AttachGroup("")
+ healthGroup.Use(m...)
+ healthGroup.Use(
+ middleware.CacheControl(middleware.CacheControlConfig{
+ // Never cache health responses.
+ Directives: []string{"no-store"},
+ }),
+ )
+
+ mt.health.Route(healthGroup.Handle)
+}
+
+func NewHealth(readyF func(context.Context) error) *Health {
+ return &Health{
+ health: health.New(readyF),
+ }
+}
diff --git a/internal/api/health/health.go b/internal/api/health/health.go
new file mode 100644
index 000000000..cf81f550a
--- /dev/null
+++ b/internal/api/health/health.go
@@ -0,0 +1,48 @@
+// GoToSocial
+// Copyright (C) GoToSocial Authors admin@gotosocial.org
+// SPDX-License-Identifier: AGPL-3.0-or-later
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program. If not, see .
+
+package health
+
+import (
+ "context"
+ "net/http"
+
+ "github.com/gin-gonic/gin"
+)
+
+const (
+ LivePath = "/livez"
+ ReadyPath = "/readyz"
+)
+
+type Module struct {
+ readyF func(context.Context) error
+}
+
+func New(readyF func(context.Context) error) *Module {
+ return &Module{
+ readyF: readyF,
+ }
+}
+
+func (m *Module) Route(attachHandler func(method string, path string, f ...gin.HandlerFunc) gin.IRoutes) {
+ attachHandler(http.MethodGet, LivePath, m.LiveGETRequest)
+ attachHandler(http.MethodHead, LivePath, m.LiveHEADRequest)
+
+ attachHandler(http.MethodGet, ReadyPath, m.ReadyGETRequest)
+ attachHandler(http.MethodHead, ReadyPath, m.ReadyHEADRequest)
+}
diff --git a/internal/api/health/live.go b/internal/api/health/live.go
new file mode 100644
index 000000000..d8841b913
--- /dev/null
+++ b/internal/api/health/live.go
@@ -0,0 +1,54 @@
+// GoToSocial
+// Copyright (C) GoToSocial Authors admin@gotosocial.org
+// SPDX-License-Identifier: AGPL-3.0-or-later
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program. If not, see .
+
+package health
+
+import (
+ "net/http"
+
+ "github.com/gin-gonic/gin"
+)
+
+// LiveGETRequest swagger:operation GET /livez liveGet
+//
+// Returns code 200 with no body if GoToSocial is "live", ie., able to respond to HTTP requests.
+//
+// ---
+// tags:
+// - health
+//
+// responses:
+// '200':
+// description: OK
+func (m *Module) LiveGETRequest(c *gin.Context) {
+ c.Status(http.StatusOK)
+}
+
+// LiveHEADRequest swagger:operation HEAD /livez liveHead
+//
+// Returns code 200 if GoToSocial is "live", ie., able to respond to HTTP requests.
+//
+// ---
+// tags:
+// - health
+//
+// responses:
+// '200':
+// description: OK
+func (m *Module) LiveHEADRequest(c *gin.Context) {
+ c.Status(http.StatusOK)
+}
diff --git a/internal/api/health/ready.go b/internal/api/health/ready.go
new file mode 100644
index 000000000..70e3a324a
--- /dev/null
+++ b/internal/api/health/ready.go
@@ -0,0 +1,74 @@
+// GoToSocial
+// Copyright (C) GoToSocial Authors admin@gotosocial.org
+// SPDX-License-Identifier: AGPL-3.0-or-later
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program. If not, see .
+
+package health
+
+import (
+ "net/http"
+
+ "github.com/superseriousbusiness/gotosocial/internal/gtserror"
+
+ "github.com/gin-gonic/gin"
+)
+
+func (m *Module) ready(c *gin.Context) {
+ if err := m.readyF(c.Request.Context()); err != nil {
+ // Set error on the gin context so
+ // it's logged by the logging middleware.
+ errWithCode := gtserror.NewErrorInternalError(err)
+ c.Error(errWithCode) //nolint:errcheck
+ c.Status(http.StatusInternalServerError)
+ } else {
+ c.Status(http.StatusOK)
+ }
+}
+
+// ReadyGETRequest swagger:operation GET /readyz readyGet
+//
+// Returns code 200 with no body if GoToSocial is "ready", ie., able to connect to the database backend and do a simple SELECT.
+//
+// If GtS is not ready, 500 Internal Error will be returned, and an error will be logged (but not returned to the caller, to avoid leaking internals).
+//
+// ---
+// tags:
+// - health
+//
+// responses:
+// '200':
+// description: OK
+// '500':
+// description: Not ready. Check logs for error message.
+func (m *Module) ReadyGETRequest(c *gin.Context) {
+ m.ready(c)
+}
+
+// ReadyHEADRequest swagger:operation HEAD /readyz readyHead
+//
+// Returns code 200 with no body if GoToSocial is "ready", ie., able to connect to the database backend and do a simple SELECT.
+//
+// If GtS is not ready, 500 Internal Error will be returned, and an error will be logged (but not returned to the caller, to avoid leaking internals).
+//
+// ---
+// tags:
+// - health
+//
+// responses:
+// '200':
+// description: OK
+func (m *Module) ReadyHEADRequest(c *gin.Context) {
+ m.ready(c)
+}
diff --git a/internal/db/basic.go b/internal/db/basic.go
index 3a8e2af8d..4c27b7ea6 100644
--- a/internal/db/basic.go
+++ b/internal/db/basic.go
@@ -33,8 +33,8 @@ type Basic interface {
// If the database implementation doesn't need to be stopped, this can just return nil.
Close() error
- // IsHealthy should return nil if the database connection is healthy, or an error if not.
- IsHealthy(ctx context.Context) error
+ // Ready returns nil if the database connection is ready, or an error if not.
+ Ready(ctx context.Context) error
// GetByID gets one entry by its id. In a database like postgres, this might be the 'id' field of the entry,
// for other implementations (for example, in-memory) it might just be the key of a map.
diff --git a/internal/db/bundb/basic.go b/internal/db/bundb/basic.go
index 7b523f309..82212fc42 100644
--- a/internal/db/bundb/basic.go
+++ b/internal/db/bundb/basic.go
@@ -124,8 +124,14 @@ func (b *basicDB) DropTable(ctx context.Context, i interface{}) error {
return err
}
-func (b *basicDB) IsHealthy(ctx context.Context) error {
- return b.db.PingContext(ctx)
+func (b *basicDB) Ready(ctx context.Context) error {
+ if _, err := b.db.
+ NewRaw("SELECT NULL FROM ? LIMIT 0", bun.Ident("instances")).
+ Exec(ctx); err != nil {
+ return err
+ }
+
+ return nil
}
func (b *basicDB) Close() error {
diff --git a/mkdocs.yml b/mkdocs.yml
index 38dc7d449..1b479304f 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -108,6 +108,7 @@ nav:
- "advanced/security/index.md"
- "advanced/security/sandboxing.md"
- "advanced/security/firewall.md"
+ - "advanced/healthchecks.md"
- "advanced/tracing.md"
- "advanced/metrics.md"