Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

metrics, tuning in tests, db cleanups, fix concurrency issue #1895

Merged
merged 8 commits into from
Apr 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions hscontrol/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ func (h *Headscale) deleteExpireEphemeralNodes(milliSeconds int64) {
for range ticker.C {
var removed []types.NodeID
var changed []types.NodeID
if err := h.db.DB.Transaction(func(tx *gorm.DB) error {
if err := h.db.Write(func(tx *gorm.DB) error {
removed, changed = db.DeleteExpiredEphemeralNodes(tx, h.cfg.EphemeralNodeInactivityTimeout)

return nil
Expand Down Expand Up @@ -263,7 +263,7 @@ func (h *Headscale) expireExpiredMachines(intervalMs int64) {
var changed bool

for range ticker.C {
if err := h.db.DB.Transaction(func(tx *gorm.DB) error {
if err := h.db.Write(func(tx *gorm.DB) error {
lastCheck, update, changed = db.ExpireExpiredNodes(tx, lastCheck)

return nil
Expand Down Expand Up @@ -452,6 +452,7 @@ func (h *Headscale) ensureUnixSocketIsAbsent() error {

func (h *Headscale) createRouter(grpcMux *grpcRuntime.ServeMux) *mux.Router {
router := mux.NewRouter()
router.Use(prometheusMiddleware)
router.PathPrefix("/debug/pprof/").Handler(http.DefaultServeMux)

router.HandleFunc(ts2021UpgradePath, h.NoiseUpgradeHandler).Methods(http.MethodPost)
Expand Down Expand Up @@ -508,7 +509,7 @@ func (h *Headscale) Serve() error {

// Fetch an initial DERP Map before we start serving
h.DERPMap = derp.GetDERPMap(h.cfg.DERP)
h.mapper = mapper.NewMapper(h.db, h.cfg, h.DERPMap, h.nodeNotifier.ConnectedMap())
h.mapper = mapper.NewMapper(h.db, h.cfg, h.DERPMap, h.nodeNotifier)

if h.cfg.DERP.ServerEnabled {
// When embedded DERP is enabled we always need a STUN server
Expand Down
31 changes: 3 additions & 28 deletions hscontrol/auth.go
Original file line number Diff line number Diff line change
Expand Up @@ -273,8 +273,6 @@ func (h *Headscale) handleAuthKey(
Err(err).
Msg("Cannot encode message")
http.Error(writer, "Internal server error", http.StatusInternalServerError)
nodeRegistrations.WithLabelValues("new", util.RegisterMethodAuthKey, "error", pak.User.Name).
Inc()

return
}
Expand All @@ -294,13 +292,6 @@ func (h *Headscale) handleAuthKey(
Str("node", registerRequest.Hostinfo.Hostname).
Msg("Failed authentication via AuthKey")

if pak != nil {
nodeRegistrations.WithLabelValues("new", util.RegisterMethodAuthKey, "error", pak.User.Name).
Inc()
} else {
nodeRegistrations.WithLabelValues("new", util.RegisterMethodAuthKey, "error", "unknown").Inc()
}

return
}

Expand Down Expand Up @@ -404,24 +395,20 @@ func (h *Headscale) handleAuthKey(
Caller().
Err(err).
Msg("could not register node")
nodeRegistrations.WithLabelValues("new", util.RegisterMethodAuthKey, "error", pak.User.Name).
Inc()
http.Error(writer, "Internal server error", http.StatusInternalServerError)

return
}
}

err = h.db.DB.Transaction(func(tx *gorm.DB) error {
h.db.Write(func(tx *gorm.DB) error {
return db.UsePreAuthKey(tx, pak)
})
if err != nil {
log.Error().
Caller().
Err(err).
Msg("Failed to use pre-auth key")
nodeRegistrations.WithLabelValues("new", util.RegisterMethodAuthKey, "error", pak.User.Name).
Inc()
http.Error(writer, "Internal server error", http.StatusInternalServerError)

return
Expand All @@ -440,14 +427,10 @@ func (h *Headscale) handleAuthKey(
Str("node", registerRequest.Hostinfo.Hostname).
Err(err).
Msg("Cannot encode message")
nodeRegistrations.WithLabelValues("new", util.RegisterMethodAuthKey, "error", pak.User.Name).
Inc()
http.Error(writer, "Internal server error", http.StatusInternalServerError)

return
}
nodeRegistrations.WithLabelValues("new", util.RegisterMethodAuthKey, "success", pak.User.Name).
Inc()
writer.Header().Set("Content-Type", "application/json; charset=utf-8")
writer.WriteHeader(http.StatusOK)
_, err = writer.Write(respBody)
Expand Down Expand Up @@ -563,7 +546,7 @@ func (h *Headscale) handleNodeLogOut(
}

if node.IsEphemeral() {
changedNodes, err := h.db.DeleteNode(&node, h.nodeNotifier.ConnectedMap())
changedNodes, err := h.db.DeleteNode(&node, h.nodeNotifier.LikelyConnectedMap())
if err != nil {
log.Error().
Err(err).
Expand Down Expand Up @@ -616,14 +599,10 @@ func (h *Headscale) handleNodeWithValidRegistration(
Caller().
Err(err).
Msg("Cannot encode message")
nodeRegistrations.WithLabelValues("update", "web", "error", node.User.Name).
Inc()
http.Error(writer, "Internal server error", http.StatusInternalServerError)

return
}
nodeRegistrations.WithLabelValues("update", "web", "success", node.User.Name).
Inc()

writer.Header().Set("Content-Type", "application/json; charset=utf-8")
writer.WriteHeader(http.StatusOK)
Expand Down Expand Up @@ -654,7 +633,7 @@ func (h *Headscale) handleNodeKeyRefresh(
Str("node", node.Hostname).
Msg("We have the OldNodeKey in the database. This is a key refresh")

err := h.db.DB.Transaction(func(tx *gorm.DB) error {
err := h.db.Write(func(tx *gorm.DB) error {
return db.NodeSetNodeKey(tx, &node, registerRequest.NodeKey)
})
if err != nil {
Expand Down Expand Up @@ -737,14 +716,10 @@ func (h *Headscale) handleNodeExpiredOrLoggedOut(
Caller().
Err(err).
Msg("Cannot encode message")
nodeRegistrations.WithLabelValues("reauth", "web", "error", node.User.Name).
Inc()
http.Error(writer, "Internal server error", http.StatusInternalServerError)

return
}
nodeRegistrations.WithLabelValues("reauth", "web", "success", node.User.Name).
Inc()

writer.Header().Set("Content-Type", "application/json; charset=utf-8")
writer.WriteHeader(http.StatusOK)
Expand Down
1 change: 0 additions & 1 deletion hscontrol/auth_noise.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ func (ns *noiseServer) NoiseRegistrationHandler(
Caller().
Err(err).
Msg("Cannot parse RegisterRequest")
nodeRegistrations.WithLabelValues("unknown", "web", "error", "unknown").Inc()
http.Error(writer, "Internal error", http.StatusInternalServerError)

return
Expand Down
9 changes: 5 additions & 4 deletions hscontrol/db/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"github.com/juanfont/headscale/hscontrol/types"
"github.com/juanfont/headscale/hscontrol/util"
"github.com/patrickmn/go-cache"
"github.com/puzpuzpuz/xsync/v3"
"github.com/rs/zerolog/log"
"gorm.io/gorm"
"tailscale.com/tailcfg"
Expand Down Expand Up @@ -260,19 +261,19 @@ func NodeSetExpiry(tx *gorm.DB,
return tx.Model(&types.Node{}).Where("id = ?", nodeID).Update("expiry", expiry).Error
}

func (hsdb *HSDatabase) DeleteNode(node *types.Node, isConnected types.NodeConnectedMap) ([]types.NodeID, error) {
func (hsdb *HSDatabase) DeleteNode(node *types.Node, isLikelyConnected *xsync.MapOf[types.NodeID, bool]) ([]types.NodeID, error) {
return Write(hsdb.DB, func(tx *gorm.DB) ([]types.NodeID, error) {
return DeleteNode(tx, node, isConnected)
return DeleteNode(tx, node, isLikelyConnected)
})
}

// DeleteNode deletes a Node from the database.
// Caller is responsible for notifying all of change.
func DeleteNode(tx *gorm.DB,
node *types.Node,
isConnected types.NodeConnectedMap,
isLikelyConnected *xsync.MapOf[types.NodeID, bool],
) ([]types.NodeID, error) {
changed, err := deleteNodeRoutes(tx, node, isConnected)
changed, err := deleteNodeRoutes(tx, node, isLikelyConnected)
if err != nil {
return changed, err
}
Expand Down
3 changes: 2 additions & 1 deletion hscontrol/db/node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/juanfont/headscale/hscontrol/policy"
"github.com/juanfont/headscale/hscontrol/types"
"github.com/juanfont/headscale/hscontrol/util"
"github.com/puzpuzpuz/xsync/v3"
"gopkg.in/check.v1"
"tailscale.com/tailcfg"
"tailscale.com/types/key"
Expand Down Expand Up @@ -120,7 +121,7 @@ func (s *Suite) TestHardDeleteNode(c *check.C) {
}
db.DB.Save(&node)

_, err = db.DeleteNode(&node, types.NodeConnectedMap{})
_, err = db.DeleteNode(&node, xsync.NewMapOf[types.NodeID, bool]())
c.Assert(err, check.IsNil)

_, err = db.getNode(user.Name, "testnode3")
Expand Down
4 changes: 2 additions & 2 deletions hscontrol/db/preauth_keys_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ func (*Suite) TestEphemeralKeyReusable(c *check.C) {
_, err = db.getNode("test7", "testest")
c.Assert(err, check.IsNil)

db.DB.Transaction(func(tx *gorm.DB) error {
db.Write(func(tx *gorm.DB) error {
DeleteExpiredEphemeralNodes(tx, time.Second*20)
return nil
})
Expand Down Expand Up @@ -181,7 +181,7 @@ func (*Suite) TestEphemeralKeyNotReusable(c *check.C) {
_, err = db.getNode("test7", "testest")
c.Assert(err, check.IsNil)

db.DB.Transaction(func(tx *gorm.DB) error {
db.Write(func(tx *gorm.DB) error {
DeleteExpiredEphemeralNodes(tx, time.Second*20)
return nil
})
Expand Down
37 changes: 20 additions & 17 deletions hscontrol/db/routes.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (

"github.com/juanfont/headscale/hscontrol/policy"
"github.com/juanfont/headscale/hscontrol/types"
"github.com/puzpuzpuz/xsync/v3"
"github.com/rs/zerolog/log"
"gorm.io/gorm"
"tailscale.com/util/set"
Expand Down Expand Up @@ -126,7 +127,7 @@ func EnableRoute(tx *gorm.DB, id uint64) (*types.StateUpdate, error) {

func DisableRoute(tx *gorm.DB,
id uint64,
isConnected types.NodeConnectedMap,
isLikelyConnected *xsync.MapOf[types.NodeID, bool],
) ([]types.NodeID, error) {
route, err := GetRoute(tx, id)
if err != nil {
Expand All @@ -147,7 +148,7 @@ func DisableRoute(tx *gorm.DB,
return nil, err
}

update, err = failoverRouteTx(tx, isConnected, route)
update, err = failoverRouteTx(tx, isLikelyConnected, route)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -182,17 +183,17 @@ func DisableRoute(tx *gorm.DB,

func (hsdb *HSDatabase) DeleteRoute(
id uint64,
isConnected types.NodeConnectedMap,
isLikelyConnected *xsync.MapOf[types.NodeID, bool],
) ([]types.NodeID, error) {
return Write(hsdb.DB, func(tx *gorm.DB) ([]types.NodeID, error) {
return DeleteRoute(tx, id, isConnected)
return DeleteRoute(tx, id, isLikelyConnected)
})
}

func DeleteRoute(
tx *gorm.DB,
id uint64,
isConnected types.NodeConnectedMap,
isLikelyConnected *xsync.MapOf[types.NodeID, bool],
) ([]types.NodeID, error) {
route, err := GetRoute(tx, id)
if err != nil {
Expand All @@ -207,7 +208,7 @@ func DeleteRoute(
// https://github.com/juanfont/headscale/issues/804#issuecomment-1399314002
var update []types.NodeID
if !route.IsExitRoute() {
update, err = failoverRouteTx(tx, isConnected, route)
update, err = failoverRouteTx(tx, isLikelyConnected, route)
if err != nil {
return nil, nil
}
Expand Down Expand Up @@ -252,7 +253,7 @@ func DeleteRoute(
return update, nil
}

func deleteNodeRoutes(tx *gorm.DB, node *types.Node, isConnected types.NodeConnectedMap) ([]types.NodeID, error) {
func deleteNodeRoutes(tx *gorm.DB, node *types.Node, isLikelyConnected *xsync.MapOf[types.NodeID, bool]) ([]types.NodeID, error) {
routes, err := GetNodeRoutes(tx, node)
if err != nil {
return nil, fmt.Errorf("getting node routes: %w", err)
Expand All @@ -266,7 +267,7 @@ func deleteNodeRoutes(tx *gorm.DB, node *types.Node, isConnected types.NodeConne

// TODO(kradalby): This is a bit too aggressive, we could probably
// figure out which routes needs to be failed over rather than all.
chn, err := failoverRouteTx(tx, isConnected, &routes[i])
chn, err := failoverRouteTx(tx, isLikelyConnected, &routes[i])
if err != nil {
return changed, fmt.Errorf("failing over route after delete: %w", err)
}
Expand Down Expand Up @@ -409,7 +410,7 @@ func SaveNodeRoutes(tx *gorm.DB, node *types.Node) (bool, error) {
// If needed, the failover will be attempted.
func FailoverNodeRoutesIfNeccessary(
tx *gorm.DB,
isConnected types.NodeConnectedMap,
isLikelyConnected *xsync.MapOf[types.NodeID, bool],
node *types.Node,
) (*types.StateUpdate, error) {
nodeRoutes, err := GetNodeRoutes(tx, node)
Expand All @@ -430,12 +431,12 @@ nodeRouteLoop:
if route.IsPrimary {
// if we have a primary route, and the node is connected
// nothing needs to be done.
if conn, ok := isConnected[route.Node.ID]; conn && ok {
if val, ok := isLikelyConnected.Load(route.Node.ID); ok && val {
continue nodeRouteLoop
}

// if not, we need to failover the route
failover := failoverRoute(isConnected, &route, routes)
failover := failoverRoute(isLikelyConnected, &route, routes)
if failover != nil {
err := failover.save(tx)
if err != nil {
Expand Down Expand Up @@ -477,7 +478,7 @@ nodeRouteLoop:
// If the given route was not primary, it returns early.
func failoverRouteTx(
tx *gorm.DB,
isConnected types.NodeConnectedMap,
isLikelyConnected *xsync.MapOf[types.NodeID, bool],
r *types.Route,
) ([]types.NodeID, error) {
if r == nil {
Expand All @@ -500,7 +501,7 @@ func failoverRouteTx(
return nil, fmt.Errorf("getting routes by prefix: %w", err)
}

fo := failoverRoute(isConnected, r, routes)
fo := failoverRoute(isLikelyConnected, r, routes)
if fo == nil {
return nil, nil
}
Expand Down Expand Up @@ -538,7 +539,7 @@ func (f *failover) save(tx *gorm.DB) error {
}

func failoverRoute(
isConnected types.NodeConnectedMap,
isLikelyConnected *xsync.MapOf[types.NodeID, bool],
routeToReplace *types.Route,
altRoutes types.Routes,

Expand Down Expand Up @@ -570,9 +571,11 @@ func failoverRoute(
continue
}

if isConnected != nil && isConnected[route.Node.ID] {
newPrimary = &altRoutes[idx]
break
if isLikelyConnected != nil {
if val, ok := isLikelyConnected.Load(route.Node.ID); ok && val {
newPrimary = &altRoutes[idx]
break
}
}
}

Expand Down
Loading
Loading