mirror of
https://github.com/hibiken/asynq.git
synced 2024-12-25 07:12:17 +08:00
Add hearbeater
This commit is contained in:
parent
489e695433
commit
d03fa34eaf
@ -37,6 +37,7 @@ type Background struct {
|
|||||||
scheduler *scheduler
|
scheduler *scheduler
|
||||||
processor *processor
|
processor *processor
|
||||||
syncer *syncer
|
syncer *syncer
|
||||||
|
heartbeater *heartbeater
|
||||||
}
|
}
|
||||||
|
|
||||||
// Config specifies the background-task processing behavior.
|
// Config specifies the background-task processing behavior.
|
||||||
@ -109,9 +110,16 @@ func NewBackground(r RedisConnOpt, cfg *Config) *Background {
|
|||||||
}
|
}
|
||||||
qcfg := normalizeQueueCfg(queues)
|
qcfg := normalizeQueueCfg(queues)
|
||||||
|
|
||||||
|
host, err := os.Hostname()
|
||||||
|
if err != nil {
|
||||||
|
host = "unknown-host"
|
||||||
|
}
|
||||||
|
pid := os.Getpid()
|
||||||
|
|
||||||
|
rdb := rdb.NewRDB(createRedisClient(r))
|
||||||
syncRequestCh := make(chan *syncRequest)
|
syncRequestCh := make(chan *syncRequest)
|
||||||
syncer := newSyncer(syncRequestCh, 5*time.Second)
|
syncer := newSyncer(syncRequestCh, 5*time.Second)
|
||||||
rdb := rdb.NewRDB(createRedisClient(r))
|
heartbeater := newHeartbeater(rdb, 5*time.Second, host, pid, queues, n)
|
||||||
scheduler := newScheduler(rdb, 5*time.Second, qcfg)
|
scheduler := newScheduler(rdb, 5*time.Second, qcfg)
|
||||||
processor := newProcessor(rdb, n, qcfg, cfg.StrictPriority, delayFunc, syncRequestCh)
|
processor := newProcessor(rdb, n, qcfg, cfg.StrictPriority, delayFunc, syncRequestCh)
|
||||||
return &Background{
|
return &Background{
|
||||||
@ -119,6 +127,7 @@ func NewBackground(r RedisConnOpt, cfg *Config) *Background {
|
|||||||
scheduler: scheduler,
|
scheduler: scheduler,
|
||||||
processor: processor,
|
processor: processor,
|
||||||
syncer: syncer,
|
syncer: syncer,
|
||||||
|
heartbeater: heartbeater,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -165,6 +174,7 @@ func (bg *Background) Run(handler Handler) {
|
|||||||
sig := <-sigs
|
sig := <-sigs
|
||||||
if sig == syscall.SIGTSTP {
|
if sig == syscall.SIGTSTP {
|
||||||
bg.processor.stop()
|
bg.processor.stop()
|
||||||
|
bg.heartbeater.setState("stopped")
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
break
|
break
|
||||||
@ -184,6 +194,7 @@ func (bg *Background) start(handler Handler) {
|
|||||||
bg.running = true
|
bg.running = true
|
||||||
bg.processor.handler = handler
|
bg.processor.handler = handler
|
||||||
|
|
||||||
|
bg.heartbeater.start()
|
||||||
bg.syncer.start()
|
bg.syncer.start()
|
||||||
bg.scheduler.start()
|
bg.scheduler.start()
|
||||||
bg.processor.start()
|
bg.processor.start()
|
||||||
@ -202,6 +213,7 @@ func (bg *Background) stop() {
|
|||||||
// Note: processor and all worker goroutines need to be exited
|
// Note: processor and all worker goroutines need to be exited
|
||||||
// before shutting down syncer to avoid goroutine leak.
|
// before shutting down syncer to avoid goroutine leak.
|
||||||
bg.syncer.terminate()
|
bg.syncer.terminate()
|
||||||
|
bg.heartbeater.terminate()
|
||||||
|
|
||||||
bg.rdb.Close()
|
bg.rdb.Close()
|
||||||
bg.processor.handler = nil
|
bg.processor.handler = nil
|
||||||
|
78
heartbeat.go
Normal file
78
heartbeat.go
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
// Copyright 2020 Kentaro Hibino. All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT license
|
||||||
|
// that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package asynq
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/hibiken/asynq/internal/base"
|
||||||
|
"github.com/hibiken/asynq/internal/rdb"
|
||||||
|
)
|
||||||
|
|
||||||
|
// heartbeater is responsible for writing process status to redis periodically to
|
||||||
|
// indicate that the background worker process is up.
|
||||||
|
type heartbeater struct {
|
||||||
|
rdb *rdb.RDB
|
||||||
|
|
||||||
|
mu sync.Mutex
|
||||||
|
ps *base.ProcessStatus
|
||||||
|
|
||||||
|
// channel to communicate back to the long running "heartbeater" goroutine.
|
||||||
|
done chan struct{}
|
||||||
|
|
||||||
|
// interval between heartbeats.
|
||||||
|
interval time.Duration
|
||||||
|
}
|
||||||
|
|
||||||
|
func newHeartbeater(rdb *rdb.RDB, interval time.Duration, host string, pid int, queues map[string]uint, n int) *heartbeater {
|
||||||
|
ps := &base.ProcessStatus{
|
||||||
|
Concurrency: n,
|
||||||
|
Queues: queues,
|
||||||
|
Host: host,
|
||||||
|
PID: pid,
|
||||||
|
}
|
||||||
|
return &heartbeater{
|
||||||
|
rdb: rdb,
|
||||||
|
ps: ps,
|
||||||
|
done: make(chan struct{}),
|
||||||
|
interval: interval,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *heartbeater) terminate() {
|
||||||
|
logger.info("Heartbeater shutting down...")
|
||||||
|
// Signal the heartbeater goroutine to stop.
|
||||||
|
h.done <- struct{}{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *heartbeater) setState(state string) {
|
||||||
|
h.mu.Lock()
|
||||||
|
defer h.mu.Unlock()
|
||||||
|
h.ps.State = state
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *heartbeater) start() {
|
||||||
|
h.ps.Started = time.Now()
|
||||||
|
h.ps.State = "running"
|
||||||
|
go func() {
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-h.done:
|
||||||
|
logger.info("Heartbeater done")
|
||||||
|
return
|
||||||
|
case <-time.After(h.interval):
|
||||||
|
// Note: Set TTL to be long enough value so that it won't expire before we write again
|
||||||
|
// and short enough to expire quickly once process is shut down.
|
||||||
|
h.mu.Lock()
|
||||||
|
err := h.rdb.WriteProcessStatus(h.ps, h.interval*2)
|
||||||
|
h.mu.Unlock()
|
||||||
|
if err != nil {
|
||||||
|
logger.error("could not write heartbeat data: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
86
heartbeat_test.go
Normal file
86
heartbeat_test.go
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
// Copyright 2020 Kentaro Hibino. All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT license
|
||||||
|
// that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package asynq
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/google/go-cmp/cmp"
|
||||||
|
"github.com/google/go-cmp/cmp/cmpopts"
|
||||||
|
h "github.com/hibiken/asynq/internal/asynqtest"
|
||||||
|
"github.com/hibiken/asynq/internal/base"
|
||||||
|
"github.com/hibiken/asynq/internal/rdb"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestHeartbeater(t *testing.T) {
|
||||||
|
r := setup(t)
|
||||||
|
rdbClient := rdb.NewRDB(r)
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
interval time.Duration
|
||||||
|
host string
|
||||||
|
pid int
|
||||||
|
queues map[string]uint
|
||||||
|
concurrency int
|
||||||
|
}{
|
||||||
|
{time.Second, "some.address.ec2.aws.com", 45678, map[string]uint{"default": 1}, 10},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range tests {
|
||||||
|
h.FlushDB(t, r)
|
||||||
|
|
||||||
|
hb := newHeartbeater(rdbClient, tc.interval, tc.host, tc.pid, tc.queues, tc.concurrency)
|
||||||
|
|
||||||
|
want := &base.ProcessStatus{
|
||||||
|
Host: tc.host,
|
||||||
|
PID: tc.pid,
|
||||||
|
Queues: tc.queues,
|
||||||
|
Concurrency: tc.concurrency,
|
||||||
|
Started: time.Now(),
|
||||||
|
State: "running",
|
||||||
|
}
|
||||||
|
hb.start()
|
||||||
|
|
||||||
|
// allow for heartbeater to write to redis
|
||||||
|
time.Sleep(tc.interval * 2)
|
||||||
|
|
||||||
|
got, err := rdbClient.ReadProcessStatus(tc.host, tc.pid)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("could not read process status from redis: %v", err)
|
||||||
|
hb.terminate()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
var timeCmpOpt = cmpopts.EquateApproxTime(10 * time.Millisecond)
|
||||||
|
if diff := cmp.Diff(want, got, timeCmpOpt); diff != "" {
|
||||||
|
t.Errorf("redis stored process status %+v, want %+v; (-want, +got)\n%s", got, want, diff)
|
||||||
|
hb.terminate()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// state change
|
||||||
|
hb.setState("stopped")
|
||||||
|
|
||||||
|
// allow for heartbeater to write to redis
|
||||||
|
time.Sleep(tc.interval * 2)
|
||||||
|
|
||||||
|
want.State = "stopped"
|
||||||
|
got, err = rdbClient.ReadProcessStatus(tc.host, tc.pid)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("could not read process status from redis: %v", err)
|
||||||
|
hb.terminate()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if diff := cmp.Diff(want, got, timeCmpOpt); diff != "" {
|
||||||
|
t.Errorf("redis stored process status %+v, want %+v; (-want, +got)\n%s", got, want, diff)
|
||||||
|
hb.terminate()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
hb.terminate()
|
||||||
|
}
|
||||||
|
}
|
@ -6,6 +6,7 @@
|
|||||||
package base
|
package base
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@ -17,6 +18,7 @@ const DefaultQueueName = "default"
|
|||||||
|
|
||||||
// Redis keys
|
// Redis keys
|
||||||
const (
|
const (
|
||||||
|
psPrefix = "asynq:ps:" // HASH
|
||||||
processedPrefix = "asynq:processed:" // STRING - asynq:processed:<yyyy-mm-dd>
|
processedPrefix = "asynq:processed:" // STRING - asynq:processed:<yyyy-mm-dd>
|
||||||
failurePrefix = "asynq:failure:" // STRING - asynq:failure:<yyyy-mm-dd>
|
failurePrefix = "asynq:failure:" // STRING - asynq:failure:<yyyy-mm-dd>
|
||||||
QueuePrefix = "asynq:queues:" // LIST - asynq:queues:<qname>
|
QueuePrefix = "asynq:queues:" // LIST - asynq:queues:<qname>
|
||||||
@ -45,6 +47,11 @@ func FailureKey(t time.Time) string {
|
|||||||
return failurePrefix + t.UTC().Format("2006-01-02")
|
return failurePrefix + t.UTC().Format("2006-01-02")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ProcessStatusKey returns a redis key string for process status.
|
||||||
|
func ProcessStatusKey(hostname string, pid int) string {
|
||||||
|
return fmt.Sprintf("%s%s:%d", psPrefix, hostname, pid)
|
||||||
|
}
|
||||||
|
|
||||||
// TaskMessage is the internal representation of a task with additional metadata fields.
|
// TaskMessage is the internal representation of a task with additional metadata fields.
|
||||||
// Serialized data of this type gets written to redis.
|
// Serialized data of this type gets written to redis.
|
||||||
type TaskMessage struct {
|
type TaskMessage struct {
|
||||||
@ -69,3 +76,13 @@ type TaskMessage struct {
|
|||||||
// ErrorMsg holds the error message from the last failure.
|
// ErrorMsg holds the error message from the last failure.
|
||||||
ErrorMsg string
|
ErrorMsg string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ProcessStatus holds information about running background worker process.
|
||||||
|
type ProcessStatus struct {
|
||||||
|
Concurrency int
|
||||||
|
Queues map[string]uint
|
||||||
|
PID int
|
||||||
|
Host string
|
||||||
|
State string
|
||||||
|
Started time.Time
|
||||||
|
}
|
||||||
|
@ -60,3 +60,21 @@ func TestFailureKey(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestProcessStatusKey(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
hostname string
|
||||||
|
pid int
|
||||||
|
want string
|
||||||
|
}{
|
||||||
|
{"localhost", 9876, "asynq:ps:localhost:9876"},
|
||||||
|
{"127.0.0.1", 1234, "asynq:ps:127.0.0.1:1234"},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range tests {
|
||||||
|
got := ProcessStatusKey(tc.hostname, tc.pid)
|
||||||
|
if got != tc.want {
|
||||||
|
t.Errorf("ProcessStatusKey(%s, %d) = %s, want %s", tc.hostname, tc.pid, got, tc.want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -346,3 +346,29 @@ func (r *RDB) forwardSingle(src, dst string) error {
|
|||||||
return script.Run(r.client,
|
return script.Run(r.client,
|
||||||
[]string{src, dst}, now).Err()
|
[]string{src, dst}, now).Err()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// WriteProcessStatus writes process information to redis with expiration
|
||||||
|
// set to the value ttl.
|
||||||
|
func (r *RDB) WriteProcessStatus(ps *base.ProcessStatus, ttl time.Duration) error {
|
||||||
|
bytes, err := json.Marshal(ps)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
key := base.ProcessStatusKey(ps.Host, ps.PID)
|
||||||
|
return r.client.Set(key, string(bytes), ttl).Err()
|
||||||
|
}
|
||||||
|
|
||||||
|
// ReadProcessStatus reads process information stored in redis.
|
||||||
|
func (r *RDB) ReadProcessStatus(host string, pid int) (*base.ProcessStatus, error) {
|
||||||
|
key := base.ProcessStatusKey(host, pid)
|
||||||
|
data, err := r.client.Get(key).Result()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
var ps base.ProcessStatus
|
||||||
|
err = json.Unmarshal([]byte(data), &ps)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return &ps, nil
|
||||||
|
}
|
||||||
|
@ -738,3 +738,49 @@ func TestCheckAndEnqueue(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestReadWriteProcessStatus(t *testing.T) {
|
||||||
|
r := setup(t)
|
||||||
|
ps1 := &base.ProcessStatus{
|
||||||
|
Concurrency: 10,
|
||||||
|
Queues: map[string]uint{"default": 2, "email": 5, "low": 1},
|
||||||
|
PID: 98765,
|
||||||
|
Host: "localhost",
|
||||||
|
State: "running",
|
||||||
|
Started: time.Now(),
|
||||||
|
}
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
ps *base.ProcessStatus
|
||||||
|
ttl time.Duration
|
||||||
|
}{
|
||||||
|
{ps1, 5 * time.Second},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range tests {
|
||||||
|
h.FlushDB(t, r.client)
|
||||||
|
|
||||||
|
err := r.WriteProcessStatus(tc.ps, tc.ttl)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("r.WriteProcessStatus returned an error: %v", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
got, err := r.ReadProcessStatus(tc.ps.Host, tc.ps.PID)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("r.ReadProcessStatus returned an error: %v", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if diff := cmp.Diff(tc.ps, got); diff != "" {
|
||||||
|
t.Errorf("r.ReadProcessStatus(%q, %d) = %+v, want %+v; (-want,+got)\n%s",
|
||||||
|
tc.ps.Host, tc.ps.PID, got, tc.ps, diff)
|
||||||
|
}
|
||||||
|
|
||||||
|
key := base.ProcessStatusKey(tc.ps.Host, tc.ps.PID)
|
||||||
|
gotTTL := r.client.TTL(key).Val()
|
||||||
|
if !cmp.Equal(tc.ttl, gotTTL, timeCmpOpt) {
|
||||||
|
t.Errorf("redis TTL %q returned %v, want %v", key, gotTTL, tc.ttl)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -87,6 +87,7 @@ func TestSyncerRetry(t *testing.T) {
|
|||||||
t.Errorf("%q has length %d; want %d", base.InProgressQueue, l, len(inProgress))
|
t.Errorf("%q has length %d; want %d", base.InProgressQueue, l, len(inProgress))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FIXME: This assignment introduces data race and running the test with -race will fail.
|
||||||
// simualate failover.
|
// simualate failover.
|
||||||
rdbClient = rdb.NewRDB(goodClient)
|
rdbClient = rdb.NewRDB(goodClient)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user