diff --git a/heartbeat.go b/heartbeat.go index 3a63cb7..e3d817c 100644 --- a/heartbeat.go +++ b/heartbeat.go @@ -12,6 +12,7 @@ import ( "github.com/google/uuid" "github.com/hibiken/asynq/internal/base" "github.com/hibiken/asynq/internal/log" + "github.com/hibiken/asynq/internal/timeutil" ) // heartbeater is responsible for writing process info to redis periodically to @@ -19,6 +20,7 @@ import ( type heartbeater struct { logger *log.Logger broker base.Broker + clock timeutil.Clock // channel to communicate back to the long running "heartbeater" goroutine. done chan struct{} @@ -69,6 +71,7 @@ func newHeartbeater(params heartbeaterParams) *heartbeater { return &heartbeater{ logger: params.logger, broker: params.broker, + clock: timeutil.NewRealClock(), done: make(chan struct{}), interval: params.interval, @@ -100,6 +103,8 @@ type workerInfo struct { started time.Time // deadline the worker has to finish processing the task by. deadline time.Time + // lease the worker holds for the task. + lease *base.Lease } func (h *heartbeater) start(wg *sync.WaitGroup) { @@ -107,7 +112,7 @@ func (h *heartbeater) start(wg *sync.WaitGroup) { go func() { defer wg.Done() - h.started = time.Now() + h.started = h.clock.Now() h.beat() @@ -166,7 +171,12 @@ func (h *heartbeater) beat() { Started: w.started, Deadline: w.deadline, }) - idsByQueue[w.msg.Queue] = append(idsByQueue[w.msg.Queue], id) + // Check lease before adding to the set to make sure not to extend the lease if the lease is already expired. + if w.lease.IsValid() { + idsByQueue[w.msg.Queue] = append(idsByQueue[w.msg.Queue], id) + } else { + w.lease.NotifyExpiration() // notify processor if the lease is expired + } } // Note: Set TTL to be long enough so that it won't expire before we write again @@ -176,8 +186,15 @@ func (h *heartbeater) beat() { } for qname, ids := range idsByQueue { - if err := h.broker.ExtendLease(qname, ids...); err != nil { + expirationTime, err := h.broker.ExtendLease(qname, ids...) + if err != nil { h.logger.Errorf("could not extend lease for tasks %v: %v", ids, err) + continue + } + for _, id := range ids { + if l := h.workers[id].lease; !l.Reset(expirationTime) { + h.logger.Warnf("Lease reset failed for %s; lease deadline: %v", id, l.Deadline()) + } } } } diff --git a/heartbeat_test.go b/heartbeat_test.go index 97bd481..ea12c79 100644 --- a/heartbeat_test.go +++ b/heartbeat_test.go @@ -5,6 +5,7 @@ package asynq import ( + "context" "sync" "testing" "time" @@ -20,7 +21,7 @@ import ( // Test goes through a few phases. // -// Phase1: Simulate Server startup; Simulate starting tasks listed in startedTasks +// Phase1: Simulate Server startup; Simulate starting tasks listed in startedWorkers // Phase2: Simluate finishing tasks listed in finishedTasks // Phase3: Simulate Server shutdown; func TestHeartbeater(t *testing.T) { @@ -29,13 +30,28 @@ func TestHeartbeater(t *testing.T) { rdbClient := rdb.NewRDB(r) now := time.Now() - const elapsedTime = 42 * time.Second // simulated time elapsed between phase1 and phase2 + const elapsedTime = 10 * time.Second // simulated time elapsed between phase1 and phase2 + + clock := timeutil.NewSimulatedClock(time.Time{}) // time will be set in each test t1 := h.NewTaskMessageWithQueue("task1", nil, "default") t2 := h.NewTaskMessageWithQueue("task2", nil, "default") t3 := h.NewTaskMessageWithQueue("task3", nil, "default") + t4 := h.NewTaskMessageWithQueue("task4", nil, "custom") + t5 := h.NewTaskMessageWithQueue("task5", nil, "custom") + t6 := h.NewTaskMessageWithQueue("task6", nil, "default") + + // Note: intentionally set to time less than now.Add(rdb.LeaseDuration) to test lease extention is working. + lease1 := h.NewLeaseWithClock(now.Add(10*time.Second), clock) + lease2 := h.NewLeaseWithClock(now.Add(10*time.Second), clock) + lease3 := h.NewLeaseWithClock(now.Add(10*time.Second), clock) + lease4 := h.NewLeaseWithClock(now.Add(10*time.Second), clock) + lease5 := h.NewLeaseWithClock(now.Add(10*time.Second), clock) + lease6 := h.NewLeaseWithClock(now.Add(10*time.Second), clock) tests := []struct { + desc string + // Interval between heartbeats. interval time.Duration @@ -45,21 +61,22 @@ func TestHeartbeater(t *testing.T) { queues map[string]int concurrency int - active map[string][]*base.TaskMessage // initial active set state - lease map[string][]base.Z // initial lease set state - wantLease1 map[string][]base.Z // expected lease set state after starting all startedTasks - wantLease2 map[string][]base.Z // expected lease set state after finishing all finishedTasks - startedTasks []*base.TaskMessage // tasks to send via the started channel - finishedTasks []*base.TaskMessage // tasks to send via the finished channel + active map[string][]*base.TaskMessage // initial active set state + lease map[string][]base.Z // initial lease set state + wantLease1 map[string][]base.Z // expected lease set state after starting all startedWorkers + wantLease2 map[string][]base.Z // expected lease set state after finishing all finishedTasks + startedWorkers []*workerInfo // workerInfo to send via the started channel + finishedTasks []*base.TaskMessage // tasks to send via the finished channel startTime time.Time // simulated start time elapsedTime time.Duration // simulated time elapsed between starting and finishing processing tasks }{ { + desc: "With single queue", interval: 2 * time.Second, host: "localhost", pid: 45678, - queues: map[string]int{"default": 1}, // TODO: Test with multple queues + queues: map[string]int{"default": 1}, concurrency: 10, active: map[string][]*base.TaskMessage{ "default": {t1, t2, t3}, @@ -71,7 +88,11 @@ func TestHeartbeater(t *testing.T) { {Message: t3, Score: now.Add(10 * time.Second).Unix()}, }, }, - startedTasks: []*base.TaskMessage{t1, t2, t3}, + startedWorkers: []*workerInfo{ + {msg: t1, started: now, deadline: now.Add(2 * time.Minute), lease: lease1}, + {msg: t2, started: now, deadline: now.Add(2 * time.Minute), lease: lease2}, + {msg: t3, started: now, deadline: now.Add(2 * time.Minute), lease: lease3}, + }, finishedTasks: []*base.TaskMessage{t1, t2}, wantLease1: map[string][]base.Z{ "default": { @@ -88,6 +109,50 @@ func TestHeartbeater(t *testing.T) { startTime: now, elapsedTime: elapsedTime, }, + { + desc: "With multiple queue", + interval: 2 * time.Second, + host: "localhost", + pid: 45678, + queues: map[string]int{"default": 1, "custom": 2}, + concurrency: 10, + active: map[string][]*base.TaskMessage{ + "default": {t6}, + "custom": {t4, t5}, + }, + lease: map[string][]base.Z{ + "default": { + {Message: t6, Score: now.Add(10 * time.Second).Unix()}, + }, + "custom": { + {Message: t4, Score: now.Add(10 * time.Second).Unix()}, + {Message: t5, Score: now.Add(10 * time.Second).Unix()}, + }, + }, + startedWorkers: []*workerInfo{ + {msg: t6, started: now, deadline: now.Add(2 * time.Minute), lease: lease6}, + {msg: t4, started: now, deadline: now.Add(2 * time.Minute), lease: lease4}, + {msg: t5, started: now, deadline: now.Add(2 * time.Minute), lease: lease5}, + }, + finishedTasks: []*base.TaskMessage{t6, t5}, + wantLease1: map[string][]base.Z{ + "default": { + {Message: t6, Score: now.Add(rdb.LeaseDuration).Unix()}, + }, + "custom": { + {Message: t4, Score: now.Add(rdb.LeaseDuration).Unix()}, + {Message: t5, Score: now.Add(rdb.LeaseDuration).Unix()}, + }, + }, + wantLease2: map[string][]base.Z{ + "default": {}, + "custom": { + {Message: t4, Score: now.Add(elapsedTime).Add(rdb.LeaseDuration).Unix()}, + }, + }, + startTime: now, + elapsedTime: elapsedTime, + }, } timeCmpOpt := cmpopts.EquateApproxTime(10 * time.Millisecond) @@ -98,7 +163,7 @@ func TestHeartbeater(t *testing.T) { h.SeedAllActiveQueues(t, r, tc.active) h.SeedAllLease(t, r, tc.lease) - clock := timeutil.NewSimulatedClock(tc.startTime) + clock.SetTime(tc.startTime) rdbClient.SetClock(clock) srvState := &serverState{} @@ -115,6 +180,7 @@ func TestHeartbeater(t *testing.T) { starting: startingCh, finished: finishedCh, }) + hb.clock = clock // Change host and pid fields for testing purpose. hb.host = tc.host @@ -132,12 +198,8 @@ func TestHeartbeater(t *testing.T) { hb.start(&wg) // Simulate processor starting to work on tasks. - for _, msg := range tc.startedTasks { - startingCh <- &workerInfo{ - msg: msg, - started: now, - deadline: now.Add(30 * time.Minute), - } + for _, w := range tc.startedWorkers { + startingCh <- w } // Wait for heartbeater to write to redis @@ -145,13 +207,13 @@ func TestHeartbeater(t *testing.T) { ss, err := rdbClient.ListServers() if err != nil { - t.Errorf("could not read server info from redis: %v", err) + t.Errorf("%s: could not read server info from redis: %v", tc.desc, err) hb.shutdown() continue } if len(ss) != 1 { - t.Errorf("(*RDB).ListServers returned %d server info, want 1", len(ss)) + t.Errorf("%s: (*RDB).ListServers returned %d server info, want 1", tc.desc, len(ss)) hb.shutdown() continue } @@ -163,10 +225,10 @@ func TestHeartbeater(t *testing.T) { Concurrency: tc.concurrency, Started: now, Status: "active", - ActiveWorkerCount: len(tc.startedTasks), + ActiveWorkerCount: len(tc.startedWorkers), } if diff := cmp.Diff(wantInfo, ss[0], timeCmpOpt, ignoreOpt, ignoreFieldOpt); diff != "" { - t.Errorf("redis stored server status %+v, want %+v; (-want, +got)\n%s", ss[0], wantInfo, diff) + t.Errorf("%s: redis stored server status %+v, want %+v; (-want, +got)\n%s", tc.desc, ss[0], wantInfo, diff) hb.shutdown() continue } @@ -174,7 +236,13 @@ func TestHeartbeater(t *testing.T) { for qname, wantLease := range tc.wantLease1 { gotLease := h.GetLeaseEntries(t, r, qname) if diff := cmp.Diff(wantLease, gotLease, h.SortZSetEntryOpt); diff != "" { - t.Errorf("mismatch found in %q: (-want,+got):\n%s", base.LeaseKey(qname), diff) + t.Errorf("%s: mismatch found in %q: (-want,+got):\n%s", tc.desc, base.LeaseKey(qname), diff) + } + } + + for _, w := range tc.startedWorkers { + if want := now.Add(rdb.LeaseDuration); w.lease.Deadline() != want { + t.Errorf("%s: lease deadline for %v is set to %v, want %v", tc.desc, w.msg, w.lease.Deadline(), want) } } @@ -185,7 +253,7 @@ func TestHeartbeater(t *testing.T) { clock.AdvanceTime(tc.elapsedTime) // Simulate processor finished processing tasks. for _, msg := range tc.finishedTasks { - if err := rdbClient.Done(msg); err != nil { + if err := rdbClient.Done(context.Background(), msg); err != nil { t.Fatalf("RDB.Done failed: %v", err) } finishedCh <- msg @@ -196,7 +264,7 @@ func TestHeartbeater(t *testing.T) { for qname, wantLease := range tc.wantLease2 { gotLease := h.GetLeaseEntries(t, r, qname) if diff := cmp.Diff(wantLease, gotLease, h.SortZSetEntryOpt); diff != "" { - t.Errorf("mismatch found in %q: (-want,+got):\n%s", base.LeaseKey(qname), diff) + t.Errorf("%s: mismatch found in %q: (-want,+got):\n%s", tc.desc, base.LeaseKey(qname), diff) } } @@ -219,23 +287,23 @@ func TestHeartbeater(t *testing.T) { Concurrency: tc.concurrency, Started: now, Status: "closed", - ActiveWorkerCount: len(tc.startedTasks) - len(tc.finishedTasks), + ActiveWorkerCount: len(tc.startedWorkers) - len(tc.finishedTasks), } ss, err = rdbClient.ListServers() if err != nil { - t.Errorf("could not read server status from redis: %v", err) + t.Errorf("%s: could not read server status from redis: %v", tc.desc, err) hb.shutdown() continue } if len(ss) != 1 { - t.Errorf("(*RDB).ListServers returned %d server info, want 1", len(ss)) + t.Errorf("%s: (*RDB).ListServers returned %d server info, want 1", tc.desc, len(ss)) hb.shutdown() continue } if diff := cmp.Diff(wantInfo, ss[0], timeCmpOpt, ignoreOpt, ignoreFieldOpt); diff != "" { - t.Errorf("redis stored process status %+v, want %+v; (-want, +got)\n%s", ss[0], wantInfo, diff) + t.Errorf("%s: redis stored process status %+v, want %+v; (-want, +got)\n%s", tc.desc, ss[0], wantInfo, diff) hb.shutdown() continue } diff --git a/internal/asynqtest/asynqtest.go b/internal/asynqtest/asynqtest.go index 5c5a15a..7820d91 100644 --- a/internal/asynqtest/asynqtest.go +++ b/internal/asynqtest/asynqtest.go @@ -18,6 +18,7 @@ import ( "github.com/google/go-cmp/cmp/cmpopts" "github.com/google/uuid" "github.com/hibiken/asynq/internal/base" + "github.com/hibiken/asynq/internal/timeutil" ) // EquateInt64Approx returns a Comparer option that treats int64 values @@ -114,6 +115,13 @@ func NewTaskMessageWithQueue(taskType string, payload []byte, qname string) *bas } } +// NewLeaseWithClock returns a new lease with the given expiration time and clock. +func NewLeaseWithClock(expirationTime time.Time, clock timeutil.Clock) *base.Lease { + l := base.NewLease(expirationTime) + l.Clock = clock + return l +} + // JSON serializes the given key-value pairs into stream of bytes in JSON. func JSON(kv map[string]interface{}) []byte { b, err := json.Marshal(kv) diff --git a/internal/base/base.go b/internal/base/base.go index 4df0782..ce41fcb 100644 --- a/internal/base/base.go +++ b/internal/base/base.go @@ -615,7 +615,7 @@ type Lease struct { once sync.Once ch chan struct{} - clock timeutil.Clock + Clock timeutil.Clock mu sync.Mutex expireAt time.Time // guarded by mu @@ -625,7 +625,7 @@ func NewLease(expirationTime time.Time) *Lease { return &Lease{ ch: make(chan struct{}), expireAt: expirationTime, - clock: timeutil.NewRealClock(), + Clock: timeutil.NewRealClock(), } } @@ -670,7 +670,7 @@ func (l *Lease) Deadline() time.Time { // IsValid returns true if the lease's expieration time is in the future or equals to the current time, // returns false otherwise. func (l *Lease) IsValid() bool { - now := l.clock.Now() + now := l.Clock.Now() l.mu.Lock() defer l.mu.Unlock() return l.expireAt.After(now) || l.expireAt.Equal(now) diff --git a/internal/base/base_test.go b/internal/base/base_test.go index 956cf9a..075e3a0 100644 --- a/internal/base/base_test.go +++ b/internal/base/base_test.go @@ -651,7 +651,7 @@ func TestLeaseReset(t *testing.T) { clock := timeutil.NewSimulatedClock(now) l := NewLease(now.Add(30 * time.Second)) - l.clock = clock + l.Clock = clock // Check initial state if !l.IsValid() { @@ -686,7 +686,7 @@ func TestLeaseNotifyExpiration(t *testing.T) { clock := timeutil.NewSimulatedClock(now) l := NewLease(now.Add(30 * time.Second)) - l.clock = clock + l.Clock = clock select { case <-l.Done(): diff --git a/internal/testbroker/testbroker.go b/internal/testbroker/testbroker.go index c3be81a..ce35866 100644 --- a/internal/testbroker/testbroker.go +++ b/internal/testbroker/testbroker.go @@ -64,40 +64,40 @@ func (tb *TestBroker) EnqueueUnique(ctx context.Context, msg *base.TaskMessage, return tb.real.EnqueueUnique(ctx, msg, ttl) } -func (tb *TestBroker) Dequeue(qnames ...string) (*base.TaskMessage, error) { +func (tb *TestBroker) Dequeue(qnames ...string) (*base.TaskMessage, time.Time, error) { tb.mu.Lock() defer tb.mu.Unlock() if tb.sleeping { - return nil, errRedisDown + return nil, time.Time{}, errRedisDown } return tb.real.Dequeue(qnames...) } -func (tb *TestBroker) Done(msg *base.TaskMessage) error { +func (tb *TestBroker) Done(ctx context.Context, msg *base.TaskMessage) error { tb.mu.Lock() defer tb.mu.Unlock() if tb.sleeping { return errRedisDown } - return tb.real.Done(msg) + return tb.real.Done(ctx, msg) } -func (tb *TestBroker) MarkAsComplete(msg *base.TaskMessage) error { +func (tb *TestBroker) MarkAsComplete(ctx context.Context, msg *base.TaskMessage) error { tb.mu.Lock() defer tb.mu.Unlock() if tb.sleeping { return errRedisDown } - return tb.real.MarkAsComplete(msg) + return tb.real.MarkAsComplete(ctx, msg) } -func (tb *TestBroker) Requeue(msg *base.TaskMessage) error { +func (tb *TestBroker) Requeue(ctx context.Context, msg *base.TaskMessage) error { tb.mu.Lock() defer tb.mu.Unlock() if tb.sleeping { return errRedisDown } - return tb.real.Requeue(msg) + return tb.real.Requeue(ctx, msg) } func (tb *TestBroker) Schedule(ctx context.Context, msg *base.TaskMessage, processAt time.Time) error { @@ -118,22 +118,22 @@ func (tb *TestBroker) ScheduleUnique(ctx context.Context, msg *base.TaskMessage, return tb.real.ScheduleUnique(ctx, msg, processAt, ttl) } -func (tb *TestBroker) Retry(msg *base.TaskMessage, processAt time.Time, errMsg string, isFailure bool) error { +func (tb *TestBroker) Retry(ctx context.Context, msg *base.TaskMessage, processAt time.Time, errMsg string, isFailure bool) error { tb.mu.Lock() defer tb.mu.Unlock() if tb.sleeping { return errRedisDown } - return tb.real.Retry(msg, processAt, errMsg, isFailure) + return tb.real.Retry(ctx, msg, processAt, errMsg, isFailure) } -func (tb *TestBroker) Archive(msg *base.TaskMessage, errMsg string) error { +func (tb *TestBroker) Archive(ctx context.Context, msg *base.TaskMessage, errMsg string) error { tb.mu.Lock() defer tb.mu.Unlock() if tb.sleeping { return errRedisDown } - return tb.real.Archive(msg, errMsg) + return tb.real.Archive(ctx, msg, errMsg) } func (tb *TestBroker) ForwardIfReady(qnames ...string) error { @@ -163,11 +163,11 @@ func (tb *TestBroker) ListLeaseExpired(cutoff time.Time, qnames ...string) ([]*b return tb.real.ListLeaseExpired(cutoff, qnames...) } -func (tb *TestBroker) ExtendLease(qname string, ids ...string) error { +func (tb *TestBroker) ExtendLease(qname string, ids ...string) (time.Time, error) { tb.mu.Lock() defer tb.mu.Unlock() if tb.sleeping { - return errRedisDown + return time.Time{}, errRedisDown } return tb.real.ExtendLease(qname, ids...) } diff --git a/recoverer.go b/recoverer.go index 590bb8c..a0107b4 100644 --- a/recoverer.go +++ b/recoverer.go @@ -5,6 +5,7 @@ package asynq import ( + "context" "sync" "time" @@ -100,13 +101,13 @@ func (r *recoverer) recover() { func (r *recoverer) retry(msg *base.TaskMessage, err error) { delay := r.retryDelayFunc(msg.Retried, err, NewTask(msg.Type, msg.Payload)) retryAt := time.Now().Add(delay) - if err := r.broker.Retry(msg, retryAt, err.Error(), r.isFailureFunc(err)); err != nil { + if err := r.broker.Retry(context.Background(), msg, retryAt, err.Error(), r.isFailureFunc(err)); err != nil { r.logger.Warnf("recoverer: could not retry lease expired task: %v", err) } } func (r *recoverer) archive(msg *base.TaskMessage, err error) { - if err := r.broker.Archive(msg, err.Error()); err != nil { + if err := r.broker.Archive(context.Background(), msg, err.Error()); err != nil { r.logger.Warnf("recoverer: could not move task to archive: %v", err) } } diff --git a/syncer_test.go b/syncer_test.go index 8ffd575..d592baf 100644 --- a/syncer_test.go +++ b/syncer_test.go @@ -5,6 +5,7 @@ package asynq import ( + "context" "fmt" "sync" "testing" @@ -41,7 +42,7 @@ func TestSyncer(t *testing.T) { m := msg syncRequestCh <- &syncRequest{ fn: func() error { - return rdbClient.Done(m) + return rdbClient.Done(context.Background(), m) }, deadline: time.Now().Add(5 * time.Minute), }