Skip to content

Commit

Permalink
race safe (#19)
Browse files Browse the repository at this point in the history
* now race safe

* enable race in Github pipeline

* Microseconds isn't enough on github pipeline

* invoke cancelFunc to avoid context leak
  • Loading branch information
haitch committed Sep 6, 2022
1 parent c59d69d commit c35056b
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 79 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/go.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
run: go build -v .

- name: Test
run: go test -coverprofile=coverage.txt -covermode=atomic ./...
run: go test -race -coverprofile=coverage.txt -covermode=atomic ./...

- name: Codecov
uses: codecov/[email protected]
4 changes: 2 additions & 2 deletions error_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ func TestTimeoutCase(t *testing.T) {
ctx, cancelFunc := newTestContextWithTimeout(t, 3*time.Second)
defer cancelFunc()

tsk := asynctask.Start(ctx, getCountingTask(10, 200*time.Millisecond))
_, err := tsk.WaitWithTimeout(ctx, 300*time.Millisecond)
tsk := asynctask.Start(ctx, getCountingTask(10, "timeoutCase", countingTaskDefaultStepLatency))
_, err := tsk.WaitWithTimeout(ctx, 30*time.Millisecond)
assert.True(t, errors.Is(err, context.DeadlineExceeded), "expecting DeadlineExceeded")

// the last Wait error should affect running task
Expand Down
40 changes: 27 additions & 13 deletions task.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,30 +19,33 @@ type Task[T any] struct {
err error
cancelFunc context.CancelFunc
waitGroup *sync.WaitGroup
mutex *sync.Mutex
}

// State return state of the task.
func (t *Task[T]) State() State {
t.mutex.Lock()
defer t.mutex.Unlock()
return t.state
}

// Cancel abort the task execution
// !! only if the function provided handles context cancel.
func (t *Task[T]) Cancel() {
if !t.state.IsTerminalState() {
t.cancelFunc()

var result T
t.finish(StateCanceled, &result, ErrCanceled)
// Cancel the task by cancel the context.
// !! this rely on the task function to check context cancellation and proper context handling.
func (t *Task[T]) Cancel() bool {
if !t.finished() {
t.finish(StateCanceled, nil, ErrCanceled)
return true
}

return false
}

// Wait block current thread/routine until task finished or failed.
// context passed in can terminate the wait, through context cancellation
// but won't terminate the task (unless it's same context)
func (t *Task[T]) Wait(ctx context.Context) error {
// return immediately if task already in terminal state.
if t.state.IsTerminalState() {
if t.finished() {
return t.err
}

Expand All @@ -64,7 +67,7 @@ func (t *Task[T]) Wait(ctx context.Context) error {
// timeout only stop waiting, taks will remain running.
func (t *Task[T]) WaitWithTimeout(ctx context.Context, timeout time.Duration) (*T, error) {
// return immediately if task already in terminal state.
if t.state.IsTerminalState() {
if t.finished() {
return t.result, t.err
}

Expand All @@ -90,13 +93,14 @@ func Start[T any](ctx context.Context, task AsyncFunc[T]) *Task[T] {
ctx, cancel := context.WithCancel(ctx)
wg := &sync.WaitGroup{}
wg.Add(1)
mutex := &sync.Mutex{}

var result T
record := &Task[T]{
state: StateRunning,
result: &result,
result: nil,
cancelFunc: cancel,
waitGroup: wg,
mutex: mutex,
}

go runAndTrackGenericTask(ctx, record, task)
Expand All @@ -113,6 +117,7 @@ func NewCompletedTask[T any](value *T) *Task[T] {
// nil cancelFunc and waitGroup should be protected with IsTerminalState()
cancelFunc: nil,
waitGroup: nil,
mutex: &sync.Mutex{},
}
}

Expand All @@ -138,9 +143,18 @@ func runAndTrackGenericTask[T any](ctx context.Context, record *Task[T], task fu

func (t *Task[T]) finish(state State, result *T, err error) {
// only update state and result if not yet canceled
if !t.state.IsTerminalState() {
if !t.finished() {
t.mutex.Lock()
t.cancelFunc() // cancel the context
t.state = state
t.result = result
t.err = err
t.mutex.Unlock()
}
}

func (t *Task[T]) finished() bool {
t.mutex.Lock()
defer t.mutex.Unlock()
return t.state.IsTerminalState()
}
37 changes: 22 additions & 15 deletions task_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package asynctask_test

import (
"context"
"fmt"
"testing"
"time"

Expand All @@ -10,6 +11,7 @@ import (
)

const testContextKey string = "testing"
const countingTaskDefaultStepLatency time.Duration = 20 * time.Millisecond

func newTestContext(t *testing.T) context.Context {
return context.WithValue(context.TODO(), testContextKey, t)
Expand All @@ -19,18 +21,20 @@ func newTestContextWithTimeout(t *testing.T, timeout time.Duration) (context.Con
return context.WithTimeout(context.WithValue(context.TODO(), testContextKey, t), timeout)
}

func getCountingTask(countTo int, sleepInterval time.Duration) asynctask.AsyncFunc[int] {
func getCountingTask(countTo int, taskId string, sleepInterval time.Duration) asynctask.AsyncFunc[int] {
return func(ctx context.Context) (*int, error) {
t := ctx.Value(testContextKey).(*testing.T)

result := 0
for i := 0; i < countTo; i++ {
select {
case <-time.After(sleepInterval):
t.Logf(" working %d", i)
t.Logf("[%s]: counting %d", taskId, i)
result = i
case <-ctx.Done():
t.Log("work canceled")
// testing.Logf would cause DataRace error when test is already finished: https://github.com/golang/go/issues/40343
// leave minor time buffer before exit test to finish this last logging at least.
t.Logf("[%s]: work canceled", taskId)
return &result, nil
}
}
Expand All @@ -43,7 +47,7 @@ func TestEasyGenericCase(t *testing.T) {
ctx, cancelFunc := newTestContextWithTimeout(t, 3*time.Second)
defer cancelFunc()

t1 := asynctask.Start(ctx, getCountingTask(10, 200*time.Millisecond))
t1 := asynctask.Start(ctx, getCountingTask(10, "easyTask", countingTaskDefaultStepLatency))
assert.Equal(t, asynctask.StateRunning, t1.State(), "Task should queued to Running")

rawResult, err := t1.Result(ctx)
Expand All @@ -62,26 +66,27 @@ func TestEasyGenericCase(t *testing.T) {
assert.NotNil(t, rawResult)
assert.Equal(t, *rawResult, 9)

assert.True(t, elapsed.Microseconds() < 3, "Second wait should return immediately")
// Result should be returned immediately
assert.True(t, elapsed.Milliseconds() < 1, fmt.Sprintf("Second wait should have return immediately: %s", elapsed))
}

func TestCancelFuncOnGeneric(t *testing.T) {
t.Parallel()
ctx, cancelFunc := newTestContextWithTimeout(t, 3*time.Second)
defer cancelFunc()

t1 := asynctask.Start(ctx, getCountingTask(10, 200*time.Millisecond))
t1 := asynctask.Start(ctx, getCountingTask(10, "cancelTask", countingTaskDefaultStepLatency))
assert.Equal(t, asynctask.StateRunning, t1.State(), "Task should queued to Running")

time.Sleep(time.Second * 1)
time.Sleep(countingTaskDefaultStepLatency)
t1.Cancel()

_, err := t1.Result(ctx)
assert.Equal(t, asynctask.ErrCanceled, err, "should return reason of error")
assert.Equal(t, asynctask.StateCanceled, t1.State(), "Task should remain in cancel state")

// I can cancel again, and nothing changes
time.Sleep(time.Second * 1)
time.Sleep(countingTaskDefaultStepLatency)
t1.Cancel()
_, err = t1.Result(ctx)
assert.Equal(t, asynctask.ErrCanceled, err, "should return reason of error")
Expand All @@ -101,11 +106,11 @@ func TestConsistentResultAfterCancelGenericTask(t *testing.T) {
ctx, cancelFunc := newTestContextWithTimeout(t, 3*time.Second)
defer cancelFunc()

t1 := asynctask.Start(ctx, getCountingTask(10, 200*time.Millisecond))
t2 := asynctask.Start(ctx, getCountingTask(10, 200*time.Millisecond))
t1 := asynctask.Start(ctx, getCountingTask(10, "consistentTask1", countingTaskDefaultStepLatency))
t2 := asynctask.Start(ctx, getCountingTask(10, "consistentTask2", countingTaskDefaultStepLatency))
assert.Equal(t, asynctask.StateRunning, t1.State(), "Task should queued to Running")

time.Sleep(time.Second * 1)
time.Sleep(countingTaskDefaultStepLatency)
start := time.Now()
t1.Cancel()
duration := time.Since(start)
Expand Down Expand Up @@ -150,22 +155,24 @@ func TestCrazyCaseGeneric(t *testing.T) {
ctx, cancelFunc := newTestContextWithTimeout(t, 3*time.Second)
defer cancelFunc()

numOfTasks := 8000 // if you have --race switch on: limit on 8128 simultaneously alive goroutines is exceeded, dying
numOfTasks := 100 // if you have --race switch on: limit on 8128 simultaneously alive goroutines is exceeded, dying
tasks := map[int]*asynctask.Task[int]{}
for i := 0; i < numOfTasks; i++ {
tasks[i] = asynctask.Start(ctx, getCountingTask(10, 200*time.Millisecond))
tasks[i] = asynctask.Start(ctx, getCountingTask(10, fmt.Sprintf("CrazyTask%d", i), countingTaskDefaultStepLatency))
}

time.Sleep(200 * time.Millisecond)
// sleep 1 step, then cancel task with even number
time.Sleep(countingTaskDefaultStepLatency)
for i := 0; i < numOfTasks; i += 2 {
tasks[i].Cancel()
}

time.Sleep(time.Duration(numOfTasks) * 2 * time.Microsecond)
for i := 0; i < numOfTasks; i += 1 {
rawResult, err := tasks[i].Result(ctx)

if i%2 == 0 {
assert.Equal(t, asynctask.ErrCanceled, err, "should be canceled")
assert.Equal(t, asynctask.ErrCanceled, err, fmt.Sprintf("task %s should be canceled, but it finished with %+v", fmt.Sprintf("CrazyTask%d", i), rawResult))
assert.Equal(t, *rawResult, 0)
} else {
assert.NoError(t, err)
Expand Down
36 changes: 8 additions & 28 deletions wait_all.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package asynctask
import (
"context"
"fmt"
"sync"
)

type Waitable interface {
Expand All @@ -21,14 +20,14 @@ type WaitAllOptions struct {
func WaitAll(ctx context.Context, options *WaitAllOptions, tasks ...Waitable) error {
tasksCount := len(tasks)

mutex := sync.Mutex{}
errorChClosed := false
errorCh := make(chan error, tasksCount)
// hard close channel
defer close(errorCh)
// when failFast enabled, we return on first error we see, while other task may still post error in this channel.
if !options.FailFast {
defer close(errorCh)
}

for _, tsk := range tasks {
go waitOne(ctx, tsk, errorCh, &errorChClosed, &mutex)
go waitOne(ctx, tsk, errorCh)
}

runningTasks := tasksCount
Expand All @@ -40,20 +39,17 @@ func WaitAll(ctx context.Context, options *WaitAllOptions, tasks ...Waitable) er
if err != nil {
// return immediately after receive first error.
if options.FailFast {
softCloseChannel(&mutex, &errorChClosed)
return err
}

errList = append(errList, err)
}
case <-ctx.Done():
softCloseChannel(&mutex, &errorChClosed)
return fmt.Errorf("WaitAll context canceled: %w", ctx.Err())
return fmt.Errorf("WaitAll %w", ctx.Err())
}

// are we finished yet?
if runningTasks == 0 {
softCloseChannel(&mutex, &errorChClosed)
break
}
}
Expand All @@ -69,23 +65,7 @@ func WaitAll(ctx context.Context, options *WaitAllOptions, tasks ...Waitable) er
return nil
}

func waitOne(ctx context.Context, tsk Waitable, errorCh chan<- error, errorChClosed *bool, mutex *sync.Mutex) {
func waitOne(ctx context.Context, tsk Waitable, errorCh chan<- error) {
err := tsk.Wait(ctx)

// why mutex?
// if all tasks start using same context (unittest is good example)
// and that context got canceled, all task fail at same time.
// first one went in and close the channel, while another one already went through gate check.
// raise a panic with send to closed channel.
mutex.Lock()
defer mutex.Unlock()
if !*errorChClosed {
errorCh <- err
}
}

func softCloseChannel(mutex *sync.Mutex, closed *bool) {
mutex.Lock()
defer mutex.Unlock()
*closed = true
errorCh <- err
}
Loading

0 comments on commit c35056b

Please sign in to comment.