Skip to content

Commit

Permalink
Devstack cancellations (#3487)
Browse files Browse the repository at this point in the history
When exiting devstack, there are often errors logged that are
unnecessary, or in some cases delay when the process actuall exits.

This commit checks some obvious context cancellation errors and gives
the code a chance to exit without logging and continuing.

Should also fix #3484
  • Loading branch information
rossjones committed Feb 19, 2024
1 parent 5e8d721 commit 0565aea
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 9 deletions.
7 changes: 1 addition & 6 deletions pkg/devstack/devstack.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ func Setup(
if err != nil {
return nil, fmt.Errorf("failed to get ipfs swarm addresses: %w", err)
}

// Only use a single address as libp2p seems to have concurrency issues, like two nodes not able to finish
// connecting/joining topics, when using multiple addresses for a single host.
// All the IPFS nodes are running within the same process, so connecting over localhost will be fine.
Expand Down Expand Up @@ -373,12 +374,6 @@ func (stack *DevStack) PrintNodeInfo(ctx context.Context, fsRepo *repo.FsRepo, c
devStackIPFSSwarmAddress := ""
var devstackPeerAddrs []string

// TODO remove this it's wrong and never printed, nothing sets the env vars its printing
logString += `
-----------------------------------------
-----------------------------------------
`

requesterOnlyNodes := 0
computeOnlyNodes := 0
hybridNodes := 0
Expand Down
4 changes: 4 additions & 0 deletions pkg/ipfs/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,10 @@ func (cl Client) SwarmAddresses(ctx context.Context) ([]string, error) {
return nil, err
}

if len(multiAddresses) == 0 {
return nil, fmt.Errorf("no swarm addresses found")
}

// It's common for callers to this function to use the result to connect to another IPFS node.
// This sorts the addresses so IPv4 localhost is first, with the aim of using the localhost connection during tests
// and so avoid any unneeded network hops. Other callers to this either sort the list themselves or just output the
Expand Down
7 changes: 7 additions & 0 deletions pkg/ipfs/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,10 @@ func NewNodeWithConfig(ctx context.Context, cm *system.CleanupManager, cfg types

// TODO if cfg.PrivateInternal is true do we actually want to connect to peers?
if err = connectToPeers(ctx, api, ipfsNode, cfg.GetSwarmAddresses()); err != nil {
if err == context.Canceled {
log.Ctx(ctx).Debug().Msg("gracefully shutting down ipfs node due to context cancellation")
return nil, nil
}
log.Ctx(ctx).Error().Msgf("ipfs node failed to connect to peers: %s", err)
}

Expand Down Expand Up @@ -343,6 +347,9 @@ func connectToPeers(ctx context.Context, api icore.CoreAPI, node *core.IpfsNode,
go func(peerInfo peer.AddrInfo) {
defer wg.Done()
if err := api.Swarm().Connect(ctx, peerInfo); err != nil {
if err == context.Canceled {
return // context was canceled, nothing to do
}
anyErr = err
log.Ctx(ctx).Debug().Err(err).Msgf("failed to connect to ipfs peer %s, skipping", peerInfo.ID)
}
Expand Down
10 changes: 7 additions & 3 deletions pkg/routing/node_info_publisher.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,22 +96,26 @@ func (n *NodeInfoPublisher) publishBackgroundTask(ctx context.Context, interval
ticker := time.NewTicker(interval)
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
func() {
ctx, span := system.NewSpan(ctx, system.GetTracer(), "pkg/routing.NodeInfoPublisher.publishBackgroundTask") //nolint:govet
defer span.End()

err := n.Publish(ctx)
if err != nil {
log.Ctx(ctx).Err(err).Msg("failed to publish node info")
if err == context.Canceled {
log.Ctx(ctx).Debug().Msg("gracefully shutting down ipfs node due to context cancellation")
} else {
log.Ctx(ctx).Err(err).Msg("failed to publish node info")
}
}
}()
case <-n.stopChannel:
log.Ctx(ctx).Debug().Msg("stopped publishing node info")
ticker.Stop()
return
case <-ctx.Done():
return
}
}
}
Expand Down

0 comments on commit 0565aea

Please sign in to comment.