2
0
mirror of https://github.com/hibiken/asynq.git synced 2024-09-20 02:55:54 +08:00

Update heartbeat to extend lease of active workers

This commit is contained in:
Ken Hibino 2022-02-14 07:17:51 -08:00
parent dfae8638e1
commit d7169cd445
8 changed files with 148 additions and 53 deletions

View File

@ -12,6 +12,7 @@ import (
"github.com/google/uuid"
"github.com/hibiken/asynq/internal/base"
"github.com/hibiken/asynq/internal/log"
"github.com/hibiken/asynq/internal/timeutil"
)
// heartbeater is responsible for writing process info to redis periodically to
@ -19,6 +20,7 @@ import (
type heartbeater struct {
logger *log.Logger
broker base.Broker
clock timeutil.Clock
// channel to communicate back to the long running "heartbeater" goroutine.
done chan struct{}
@ -69,6 +71,7 @@ func newHeartbeater(params heartbeaterParams) *heartbeater {
return &heartbeater{
logger: params.logger,
broker: params.broker,
clock: timeutil.NewRealClock(),
done: make(chan struct{}),
interval: params.interval,
@ -100,6 +103,8 @@ type workerInfo struct {
started time.Time
// deadline the worker has to finish processing the task by.
deadline time.Time
// lease the worker holds for the task.
lease *base.Lease
}
func (h *heartbeater) start(wg *sync.WaitGroup) {
@ -107,7 +112,7 @@ func (h *heartbeater) start(wg *sync.WaitGroup) {
go func() {
defer wg.Done()
h.started = time.Now()
h.started = h.clock.Now()
h.beat()
@ -166,7 +171,12 @@ func (h *heartbeater) beat() {
Started: w.started,
Deadline: w.deadline,
})
idsByQueue[w.msg.Queue] = append(idsByQueue[w.msg.Queue], id)
// Check lease before adding to the set to make sure not to extend the lease if the lease is already expired.
if w.lease.IsValid() {
idsByQueue[w.msg.Queue] = append(idsByQueue[w.msg.Queue], id)
} else {
w.lease.NotifyExpiration() // notify processor if the lease is expired
}
}
// Note: Set TTL to be long enough so that it won't expire before we write again
@ -176,8 +186,15 @@ func (h *heartbeater) beat() {
}
for qname, ids := range idsByQueue {
if err := h.broker.ExtendLease(qname, ids...); err != nil {
expirationTime, err := h.broker.ExtendLease(qname, ids...)
if err != nil {
h.logger.Errorf("could not extend lease for tasks %v: %v", ids, err)
continue
}
for _, id := range ids {
if l := h.workers[id].lease; !l.Reset(expirationTime) {
h.logger.Warnf("Lease reset failed for %s; lease deadline: %v", id, l.Deadline())
}
}
}
}

View File

@ -5,6 +5,7 @@
package asynq
import (
"context"
"sync"
"testing"
"time"
@ -20,7 +21,7 @@ import (
// Test goes through a few phases.
//
// Phase1: Simulate Server startup; Simulate starting tasks listed in startedTasks
// Phase1: Simulate Server startup; Simulate starting tasks listed in startedWorkers
// Phase2: Simluate finishing tasks listed in finishedTasks
// Phase3: Simulate Server shutdown;
func TestHeartbeater(t *testing.T) {
@ -29,13 +30,28 @@ func TestHeartbeater(t *testing.T) {
rdbClient := rdb.NewRDB(r)
now := time.Now()
const elapsedTime = 42 * time.Second // simulated time elapsed between phase1 and phase2
const elapsedTime = 10 * time.Second // simulated time elapsed between phase1 and phase2
clock := timeutil.NewSimulatedClock(time.Time{}) // time will be set in each test
t1 := h.NewTaskMessageWithQueue("task1", nil, "default")
t2 := h.NewTaskMessageWithQueue("task2", nil, "default")
t3 := h.NewTaskMessageWithQueue("task3", nil, "default")
t4 := h.NewTaskMessageWithQueue("task4", nil, "custom")
t5 := h.NewTaskMessageWithQueue("task5", nil, "custom")
t6 := h.NewTaskMessageWithQueue("task6", nil, "default")
// Note: intentionally set to time less than now.Add(rdb.LeaseDuration) to test lease extention is working.
lease1 := h.NewLeaseWithClock(now.Add(10*time.Second), clock)
lease2 := h.NewLeaseWithClock(now.Add(10*time.Second), clock)
lease3 := h.NewLeaseWithClock(now.Add(10*time.Second), clock)
lease4 := h.NewLeaseWithClock(now.Add(10*time.Second), clock)
lease5 := h.NewLeaseWithClock(now.Add(10*time.Second), clock)
lease6 := h.NewLeaseWithClock(now.Add(10*time.Second), clock)
tests := []struct {
desc string
// Interval between heartbeats.
interval time.Duration
@ -45,21 +61,22 @@ func TestHeartbeater(t *testing.T) {
queues map[string]int
concurrency int
active map[string][]*base.TaskMessage // initial active set state
lease map[string][]base.Z // initial lease set state
wantLease1 map[string][]base.Z // expected lease set state after starting all startedTasks
wantLease2 map[string][]base.Z // expected lease set state after finishing all finishedTasks
startedTasks []*base.TaskMessage // tasks to send via the started channel
finishedTasks []*base.TaskMessage // tasks to send via the finished channel
active map[string][]*base.TaskMessage // initial active set state
lease map[string][]base.Z // initial lease set state
wantLease1 map[string][]base.Z // expected lease set state after starting all startedWorkers
wantLease2 map[string][]base.Z // expected lease set state after finishing all finishedTasks
startedWorkers []*workerInfo // workerInfo to send via the started channel
finishedTasks []*base.TaskMessage // tasks to send via the finished channel
startTime time.Time // simulated start time
elapsedTime time.Duration // simulated time elapsed between starting and finishing processing tasks
}{
{
desc: "With single queue",
interval: 2 * time.Second,
host: "localhost",
pid: 45678,
queues: map[string]int{"default": 1}, // TODO: Test with multple queues
queues: map[string]int{"default": 1},
concurrency: 10,
active: map[string][]*base.TaskMessage{
"default": {t1, t2, t3},
@ -71,7 +88,11 @@ func TestHeartbeater(t *testing.T) {
{Message: t3, Score: now.Add(10 * time.Second).Unix()},
},
},
startedTasks: []*base.TaskMessage{t1, t2, t3},
startedWorkers: []*workerInfo{
{msg: t1, started: now, deadline: now.Add(2 * time.Minute), lease: lease1},
{msg: t2, started: now, deadline: now.Add(2 * time.Minute), lease: lease2},
{msg: t3, started: now, deadline: now.Add(2 * time.Minute), lease: lease3},
},
finishedTasks: []*base.TaskMessage{t1, t2},
wantLease1: map[string][]base.Z{
"default": {
@ -88,6 +109,50 @@ func TestHeartbeater(t *testing.T) {
startTime: now,
elapsedTime: elapsedTime,
},
{
desc: "With multiple queue",
interval: 2 * time.Second,
host: "localhost",
pid: 45678,
queues: map[string]int{"default": 1, "custom": 2},
concurrency: 10,
active: map[string][]*base.TaskMessage{
"default": {t6},
"custom": {t4, t5},
},
lease: map[string][]base.Z{
"default": {
{Message: t6, Score: now.Add(10 * time.Second).Unix()},
},
"custom": {
{Message: t4, Score: now.Add(10 * time.Second).Unix()},
{Message: t5, Score: now.Add(10 * time.Second).Unix()},
},
},
startedWorkers: []*workerInfo{
{msg: t6, started: now, deadline: now.Add(2 * time.Minute), lease: lease6},
{msg: t4, started: now, deadline: now.Add(2 * time.Minute), lease: lease4},
{msg: t5, started: now, deadline: now.Add(2 * time.Minute), lease: lease5},
},
finishedTasks: []*base.TaskMessage{t6, t5},
wantLease1: map[string][]base.Z{
"default": {
{Message: t6, Score: now.Add(rdb.LeaseDuration).Unix()},
},
"custom": {
{Message: t4, Score: now.Add(rdb.LeaseDuration).Unix()},
{Message: t5, Score: now.Add(rdb.LeaseDuration).Unix()},
},
},
wantLease2: map[string][]base.Z{
"default": {},
"custom": {
{Message: t4, Score: now.Add(elapsedTime).Add(rdb.LeaseDuration).Unix()},
},
},
startTime: now,
elapsedTime: elapsedTime,
},
}
timeCmpOpt := cmpopts.EquateApproxTime(10 * time.Millisecond)
@ -98,7 +163,7 @@ func TestHeartbeater(t *testing.T) {
h.SeedAllActiveQueues(t, r, tc.active)
h.SeedAllLease(t, r, tc.lease)
clock := timeutil.NewSimulatedClock(tc.startTime)
clock.SetTime(tc.startTime)
rdbClient.SetClock(clock)
srvState := &serverState{}
@ -115,6 +180,7 @@ func TestHeartbeater(t *testing.T) {
starting: startingCh,
finished: finishedCh,
})
hb.clock = clock
// Change host and pid fields for testing purpose.
hb.host = tc.host
@ -132,12 +198,8 @@ func TestHeartbeater(t *testing.T) {
hb.start(&wg)
// Simulate processor starting to work on tasks.
for _, msg := range tc.startedTasks {
startingCh <- &workerInfo{
msg: msg,
started: now,
deadline: now.Add(30 * time.Minute),
}
for _, w := range tc.startedWorkers {
startingCh <- w
}
// Wait for heartbeater to write to redis
@ -145,13 +207,13 @@ func TestHeartbeater(t *testing.T) {
ss, err := rdbClient.ListServers()
if err != nil {
t.Errorf("could not read server info from redis: %v", err)
t.Errorf("%s: could not read server info from redis: %v", tc.desc, err)
hb.shutdown()
continue
}
if len(ss) != 1 {
t.Errorf("(*RDB).ListServers returned %d server info, want 1", len(ss))
t.Errorf("%s: (*RDB).ListServers returned %d server info, want 1", tc.desc, len(ss))
hb.shutdown()
continue
}
@ -163,10 +225,10 @@ func TestHeartbeater(t *testing.T) {
Concurrency: tc.concurrency,
Started: now,
Status: "active",
ActiveWorkerCount: len(tc.startedTasks),
ActiveWorkerCount: len(tc.startedWorkers),
}
if diff := cmp.Diff(wantInfo, ss[0], timeCmpOpt, ignoreOpt, ignoreFieldOpt); diff != "" {
t.Errorf("redis stored server status %+v, want %+v; (-want, +got)\n%s", ss[0], wantInfo, diff)
t.Errorf("%s: redis stored server status %+v, want %+v; (-want, +got)\n%s", tc.desc, ss[0], wantInfo, diff)
hb.shutdown()
continue
}
@ -174,7 +236,13 @@ func TestHeartbeater(t *testing.T) {
for qname, wantLease := range tc.wantLease1 {
gotLease := h.GetLeaseEntries(t, r, qname)
if diff := cmp.Diff(wantLease, gotLease, h.SortZSetEntryOpt); diff != "" {
t.Errorf("mismatch found in %q: (-want,+got):\n%s", base.LeaseKey(qname), diff)
t.Errorf("%s: mismatch found in %q: (-want,+got):\n%s", tc.desc, base.LeaseKey(qname), diff)
}
}
for _, w := range tc.startedWorkers {
if want := now.Add(rdb.LeaseDuration); w.lease.Deadline() != want {
t.Errorf("%s: lease deadline for %v is set to %v, want %v", tc.desc, w.msg, w.lease.Deadline(), want)
}
}
@ -185,7 +253,7 @@ func TestHeartbeater(t *testing.T) {
clock.AdvanceTime(tc.elapsedTime)
// Simulate processor finished processing tasks.
for _, msg := range tc.finishedTasks {
if err := rdbClient.Done(msg); err != nil {
if err := rdbClient.Done(context.Background(), msg); err != nil {
t.Fatalf("RDB.Done failed: %v", err)
}
finishedCh <- msg
@ -196,7 +264,7 @@ func TestHeartbeater(t *testing.T) {
for qname, wantLease := range tc.wantLease2 {
gotLease := h.GetLeaseEntries(t, r, qname)
if diff := cmp.Diff(wantLease, gotLease, h.SortZSetEntryOpt); diff != "" {
t.Errorf("mismatch found in %q: (-want,+got):\n%s", base.LeaseKey(qname), diff)
t.Errorf("%s: mismatch found in %q: (-want,+got):\n%s", tc.desc, base.LeaseKey(qname), diff)
}
}
@ -219,23 +287,23 @@ func TestHeartbeater(t *testing.T) {
Concurrency: tc.concurrency,
Started: now,
Status: "closed",
ActiveWorkerCount: len(tc.startedTasks) - len(tc.finishedTasks),
ActiveWorkerCount: len(tc.startedWorkers) - len(tc.finishedTasks),
}
ss, err = rdbClient.ListServers()
if err != nil {
t.Errorf("could not read server status from redis: %v", err)
t.Errorf("%s: could not read server status from redis: %v", tc.desc, err)
hb.shutdown()
continue
}
if len(ss) != 1 {
t.Errorf("(*RDB).ListServers returned %d server info, want 1", len(ss))
t.Errorf("%s: (*RDB).ListServers returned %d server info, want 1", tc.desc, len(ss))
hb.shutdown()
continue
}
if diff := cmp.Diff(wantInfo, ss[0], timeCmpOpt, ignoreOpt, ignoreFieldOpt); diff != "" {
t.Errorf("redis stored process status %+v, want %+v; (-want, +got)\n%s", ss[0], wantInfo, diff)
t.Errorf("%s: redis stored process status %+v, want %+v; (-want, +got)\n%s", tc.desc, ss[0], wantInfo, diff)
hb.shutdown()
continue
}

View File

@ -18,6 +18,7 @@ import (
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/google/uuid"
"github.com/hibiken/asynq/internal/base"
"github.com/hibiken/asynq/internal/timeutil"
)
// EquateInt64Approx returns a Comparer option that treats int64 values
@ -114,6 +115,13 @@ func NewTaskMessageWithQueue(taskType string, payload []byte, qname string) *bas
}
}
// NewLeaseWithClock returns a new lease with the given expiration time and clock.
func NewLeaseWithClock(expirationTime time.Time, clock timeutil.Clock) *base.Lease {
l := base.NewLease(expirationTime)
l.Clock = clock
return l
}
// JSON serializes the given key-value pairs into stream of bytes in JSON.
func JSON(kv map[string]interface{}) []byte {
b, err := json.Marshal(kv)

View File

@ -615,7 +615,7 @@ type Lease struct {
once sync.Once
ch chan struct{}
clock timeutil.Clock
Clock timeutil.Clock
mu sync.Mutex
expireAt time.Time // guarded by mu
@ -625,7 +625,7 @@ func NewLease(expirationTime time.Time) *Lease {
return &Lease{
ch: make(chan struct{}),
expireAt: expirationTime,
clock: timeutil.NewRealClock(),
Clock: timeutil.NewRealClock(),
}
}
@ -670,7 +670,7 @@ func (l *Lease) Deadline() time.Time {
// IsValid returns true if the lease's expieration time is in the future or equals to the current time,
// returns false otherwise.
func (l *Lease) IsValid() bool {
now := l.clock.Now()
now := l.Clock.Now()
l.mu.Lock()
defer l.mu.Unlock()
return l.expireAt.After(now) || l.expireAt.Equal(now)

View File

@ -651,7 +651,7 @@ func TestLeaseReset(t *testing.T) {
clock := timeutil.NewSimulatedClock(now)
l := NewLease(now.Add(30 * time.Second))
l.clock = clock
l.Clock = clock
// Check initial state
if !l.IsValid() {
@ -686,7 +686,7 @@ func TestLeaseNotifyExpiration(t *testing.T) {
clock := timeutil.NewSimulatedClock(now)
l := NewLease(now.Add(30 * time.Second))
l.clock = clock
l.Clock = clock
select {
case <-l.Done():

View File

@ -64,40 +64,40 @@ func (tb *TestBroker) EnqueueUnique(ctx context.Context, msg *base.TaskMessage,
return tb.real.EnqueueUnique(ctx, msg, ttl)
}
func (tb *TestBroker) Dequeue(qnames ...string) (*base.TaskMessage, error) {
func (tb *TestBroker) Dequeue(qnames ...string) (*base.TaskMessage, time.Time, error) {
tb.mu.Lock()
defer tb.mu.Unlock()
if tb.sleeping {
return nil, errRedisDown
return nil, time.Time{}, errRedisDown
}
return tb.real.Dequeue(qnames...)
}
func (tb *TestBroker) Done(msg *base.TaskMessage) error {
func (tb *TestBroker) Done(ctx context.Context, msg *base.TaskMessage) error {
tb.mu.Lock()
defer tb.mu.Unlock()
if tb.sleeping {
return errRedisDown
}
return tb.real.Done(msg)
return tb.real.Done(ctx, msg)
}
func (tb *TestBroker) MarkAsComplete(msg *base.TaskMessage) error {
func (tb *TestBroker) MarkAsComplete(ctx context.Context, msg *base.TaskMessage) error {
tb.mu.Lock()
defer tb.mu.Unlock()
if tb.sleeping {
return errRedisDown
}
return tb.real.MarkAsComplete(msg)
return tb.real.MarkAsComplete(ctx, msg)
}
func (tb *TestBroker) Requeue(msg *base.TaskMessage) error {
func (tb *TestBroker) Requeue(ctx context.Context, msg *base.TaskMessage) error {
tb.mu.Lock()
defer tb.mu.Unlock()
if tb.sleeping {
return errRedisDown
}
return tb.real.Requeue(msg)
return tb.real.Requeue(ctx, msg)
}
func (tb *TestBroker) Schedule(ctx context.Context, msg *base.TaskMessage, processAt time.Time) error {
@ -118,22 +118,22 @@ func (tb *TestBroker) ScheduleUnique(ctx context.Context, msg *base.TaskMessage,
return tb.real.ScheduleUnique(ctx, msg, processAt, ttl)
}
func (tb *TestBroker) Retry(msg *base.TaskMessage, processAt time.Time, errMsg string, isFailure bool) error {
func (tb *TestBroker) Retry(ctx context.Context, msg *base.TaskMessage, processAt time.Time, errMsg string, isFailure bool) error {
tb.mu.Lock()
defer tb.mu.Unlock()
if tb.sleeping {
return errRedisDown
}
return tb.real.Retry(msg, processAt, errMsg, isFailure)
return tb.real.Retry(ctx, msg, processAt, errMsg, isFailure)
}
func (tb *TestBroker) Archive(msg *base.TaskMessage, errMsg string) error {
func (tb *TestBroker) Archive(ctx context.Context, msg *base.TaskMessage, errMsg string) error {
tb.mu.Lock()
defer tb.mu.Unlock()
if tb.sleeping {
return errRedisDown
}
return tb.real.Archive(msg, errMsg)
return tb.real.Archive(ctx, msg, errMsg)
}
func (tb *TestBroker) ForwardIfReady(qnames ...string) error {
@ -163,11 +163,11 @@ func (tb *TestBroker) ListLeaseExpired(cutoff time.Time, qnames ...string) ([]*b
return tb.real.ListLeaseExpired(cutoff, qnames...)
}
func (tb *TestBroker) ExtendLease(qname string, ids ...string) error {
func (tb *TestBroker) ExtendLease(qname string, ids ...string) (time.Time, error) {
tb.mu.Lock()
defer tb.mu.Unlock()
if tb.sleeping {
return errRedisDown
return time.Time{}, errRedisDown
}
return tb.real.ExtendLease(qname, ids...)
}

View File

@ -5,6 +5,7 @@
package asynq
import (
"context"
"sync"
"time"
@ -100,13 +101,13 @@ func (r *recoverer) recover() {
func (r *recoverer) retry(msg *base.TaskMessage, err error) {
delay := r.retryDelayFunc(msg.Retried, err, NewTask(msg.Type, msg.Payload))
retryAt := time.Now().Add(delay)
if err := r.broker.Retry(msg, retryAt, err.Error(), r.isFailureFunc(err)); err != nil {
if err := r.broker.Retry(context.Background(), msg, retryAt, err.Error(), r.isFailureFunc(err)); err != nil {
r.logger.Warnf("recoverer: could not retry lease expired task: %v", err)
}
}
func (r *recoverer) archive(msg *base.TaskMessage, err error) {
if err := r.broker.Archive(msg, err.Error()); err != nil {
if err := r.broker.Archive(context.Background(), msg, err.Error()); err != nil {
r.logger.Warnf("recoverer: could not move task to archive: %v", err)
}
}

View File

@ -5,6 +5,7 @@
package asynq
import (
"context"
"fmt"
"sync"
"testing"
@ -41,7 +42,7 @@ func TestSyncer(t *testing.T) {
m := msg
syncRequestCh <- &syncRequest{
fn: func() error {
return rdbClient.Done(m)
return rdbClient.Done(context.Background(), m)
},
deadline: time.Now().Add(5 * time.Minute),
}