Skip to content

Commit

Permalink
Improve ranking error output
Browse files Browse the repository at this point in the history
- Show the number of nodes that were available even if unsuitable,
  because users often misinterpret "0 available" as meaning there is a
  connection error
- Summarise nodes when they all print the same output rather than
  listing all of them
  • Loading branch information
simonwo committed Apr 8, 2024
1 parent 28b18bb commit c956c49
Showing 1 changed file with 15 additions and 7 deletions.
22 changes: 15 additions & 7 deletions pkg/orchestrator/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ package orchestrator

import (
"fmt"
"strings"

"github.com/bacalhau-project/bacalhau/pkg/util/idgen"
"github.com/samber/lo"
)

// ErrSchedulerNotFound is returned when the scheduler is not found for a given evaluation type
Expand Down Expand Up @@ -33,16 +35,22 @@ func NewErrNotEnoughNodes(requestedNodes int, availableNodes []NodeRank) ErrNotE
}

func (e ErrNotEnoughNodes) Error() string {
nodeErrors := ""
available := 0
for _, rank := range e.AvailableNodes {
if rank.MeetsRequirement() {
available += 1
suitable := lo.CountBy(e.AvailableNodes, func(rank NodeRank) bool { return rank.MeetsRequirement() })
reasons := lo.GroupBy(e.AvailableNodes, func(rank NodeRank) string { return rank.Reason })

var message strings.Builder
fmt.Fprint(&message, "not enough nodes to run job. ")
fmt.Fprintf(&message, "requested: %d, available: %d, suitable: %d.", e.RequestedNodes, len(e.AvailableNodes), suitable)
for reason, nodes := range reasons {
fmt.Fprint(&message, "\n• ")
if len(nodes) > 1 {
fmt.Fprintf(&message, "%d of %d nodes", len(nodes), len(e.AvailableNodes))
} else {
nodeErrors += fmt.Sprintf("\n\tNode %s: %s", idgen.ShortNodeID(rank.NodeInfo.ID()), rank.Reason)
fmt.Fprintf(&message, "Node %s", idgen.ShortNodeID(nodes[0].NodeInfo.ID()))
}
fmt.Fprintf(&message, ": %s", reason)
}
return fmt.Sprintf("not enough nodes to run job. requested: %d, available: %d. %s", e.RequestedNodes, available, nodeErrors)
return message.String()
}

// ErrNoMatchingNodes is returned when no matching nodes in the network to run a job
Expand Down

0 comments on commit c956c49

Please sign in to comment.