diff --git a/models/bots/task.go b/models/bots/task.go index 1d511b1923..2678b39511 100644 --- a/models/bots/task.go +++ b/models/bots/task.go @@ -37,8 +37,8 @@ type Task struct { Attempt int64 RunnerID int64 `xorm:"index"` Result runnerv1.Result - Status Status `xorm:"index"` - Started timeutil.TimeStamp + Status Status `xorm:"index"` + Started timeutil.TimeStamp `xorm:"index"` Stopped timeutil.TimeStamp Token string `xorm:"-"` @@ -54,7 +54,7 @@ type Task struct { LogExpired bool // files that are too old will be deleted Created timeutil.TimeStamp `xorm:"created"` - Updated timeutil.TimeStamp `xorm:"updated"` + Updated timeutil.TimeStamp `xorm:"updated index"` } func init() { @@ -412,6 +412,57 @@ func UpdateTaskByState(state *runnerv1.TaskState) (*Task, error) { return task, nil } +func StopTask(ctx context.Context, task *Task, result runnerv1.Result) (*Task, error) { + ctx, commiter, err := db.TxContext() + if err != nil { + return nil, err + } + defer commiter.Close() + + e := db.GetEngine(ctx) + + now := timeutil.TimeStampNow() + task.Result = result + if task.Result != runnerv1.Result_RESULT_UNSPECIFIED { + task.Status = Status(task.Result) + task.Stopped = now + if _, err := UpdateRunJob(ctx, &RunJob{ + ID: task.JobID, + Status: task.Status, + Stopped: task.Stopped, + }, nil); err != nil { + return nil, err + } + } + + if _, err := e.ID(task.ID).Update(task); err != nil { + return nil, err + } + + if err := task.LoadAttributes(ctx); err != nil { + return nil, err + } + + for _, step := range task.Steps { + if step.Result == runnerv1.Result_RESULT_UNSPECIFIED { + step.Result = result + if step.Started == 0 { + step.Started = now + } + step.Stopped = now + } + if _, err := e.ID(step.ID).Update(step); err != nil { + return nil, err + } + } + + if err := commiter.Commit(); err != nil { + return nil, err + } + + return task, nil +} + func isSubset(set, subset []string) bool { m := make(map[string]struct{}, len(set)) for _, v := range set { diff --git a/models/bots/task_list.go b/models/bots/task_list.go new file mode 100644 index 0000000000..d5882d4c9e --- /dev/null +++ b/models/bots/task_list.go @@ -0,0 +1,50 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package bots + +import ( + "context" + + "code.gitea.io/gitea/models/db" + "code.gitea.io/gitea/modules/timeutil" + "xorm.io/builder" +) + +type TaskList []*Task + +type FindTaskOptions struct { + db.ListOptions + Status Status + UpdatedBefore timeutil.TimeStamp + StartedBefore timeutil.TimeStamp +} + +func (opts FindTaskOptions) toConds() builder.Cond { + cond := builder.NewCond() + if opts.Status > StatusUnknown { + cond = cond.And(builder.Eq{"status": opts.Status}) + } + if opts.UpdatedBefore > 0 { + cond = cond.And(builder.Lt{"updated": opts.UpdatedBefore}) + } + if opts.StartedBefore > 0 { + cond = cond.And(builder.Lt{"started": opts.StartedBefore}) + } + return cond +} + +func FindTasks(ctx context.Context, opts FindTaskOptions) (TaskList, int64, error) { + e := db.GetEngine(ctx).Where(opts.toConds()) + if opts.PageSize > 0 && opts.Page >= 1 { + e.Limit(opts.PageSize, (opts.Page-1)*opts.PageSize) + } + var tasks TaskList + total, err := e.FindAndCount(&tasks) + return tasks, total, err +} + +func CountTasks(ctx context.Context, opts FindTaskOptions) (int64, error) { + return db.GetEngine(ctx).Where(opts.toConds()).Count(new(Task)) +} diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index 28fd71abe9..04abab3a88 100644 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -2574,6 +2574,9 @@ dashboard.delete_old_actions = Delete all old actions from database dashboard.delete_old_actions.started = Delete all old actions from database started. dashboard.update_checker = Update checker dashboard.delete_old_system_notices = Delete all old system notices from database +dashboard.stop_zombie_tasks = Stop zombie tasks +dashboard.stop_endless_tasks = Stop endless tasks +dashboard.cancel_abandoned_jobs = Cancel abandoned jobs users.user_manage_panel = User Account Management users.new_account = Create User Account diff --git a/services/bots/clear_tasks.go b/services/bots/clear_tasks.go new file mode 100644 index 0000000000..ba657fe7ba --- /dev/null +++ b/services/bots/clear_tasks.go @@ -0,0 +1,67 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package bots + +import ( + "context" + "time" + + bots_model "code.gitea.io/gitea/models/bots" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/timeutil" + runnerv1 "gitea.com/gitea/proto-go/runner/v1" +) + +const ( + zombieTaskTimeout = 10 * time.Minute + endlessTaskTimeout = 3 * time.Hour // the task is running for a long time with updates + abandonedJobTimeout = 24 * time.Hour // the job is waiting for being picked by a runner +) + +// StopZombieTasks stops the task which have running status, but haven't been updated for a long time +func StopZombieTasks(ctx context.Context) error { + tasks, _, err := bots_model.FindTasks(ctx, bots_model.FindTaskOptions{ + Status: bots_model.StatusRunning, + UpdatedBefore: timeutil.TimeStamp(time.Now().Add(-zombieTaskTimeout).Unix()), + }) + if err != nil { + log.Warn("find zombie tasks: %v", err) + return err + } + + for _, task := range tasks { + if _, err := bots_model.StopTask(ctx, task, runnerv1.Result_RESULT_FAILURE); err != nil { + log.Warn("stop zombie task %v: %v", task.ID, err) + // go on + } + } + return nil +} + +// StopEndlessTasks stops the tasks which have running status and continuous updates, but don't end for a long time +func StopEndlessTasks(ctx context.Context) error { + tasks, _, err := bots_model.FindTasks(ctx, bots_model.FindTaskOptions{ + Status: bots_model.StatusRunning, + StartedBefore: timeutil.TimeStamp(time.Now().Add(-endlessTaskTimeout).Unix()), + }) + if err != nil { + log.Warn("find endless tasks: %v", err) + return err + } + + for _, task := range tasks { + if _, err := bots_model.StopTask(ctx, task, runnerv1.Result_RESULT_FAILURE); err != nil { + log.Warn("stop endless task %v: %v", task.ID, err) + // go on + } + } + return nil +} + +// CancelAbandonedJobs cancels the jobs which have waiting status, but haven't been picked by a runner for a long time +func CancelAbandonedJobs(ctx context.Context) error { + // TODO + return nil +} diff --git a/services/cron/cron.go b/services/cron/cron.go index 8e19e04416..e238b6879d 100644 --- a/services/cron/cron.go +++ b/services/cron/cron.go @@ -31,6 +31,7 @@ func NewContext(original context.Context) { _, _, finished := process.GetManager().AddTypedContext(graceful.GetManager().ShutdownContext(), "Service: Cron", process.SystemProcessType, true) initBasicTasks() initExtendedTasks() + initBotsTasks() lock.Lock() for _, task := range tasks { diff --git a/services/cron/tasks_bots.go b/services/cron/tasks_bots.go new file mode 100644 index 0000000000..72d73da54b --- /dev/null +++ b/services/cron/tasks_bots.go @@ -0,0 +1,48 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package cron + +import ( + "context" + + user_model "code.gitea.io/gitea/models/user" + bots_service "code.gitea.io/gitea/services/bots" +) + +func initBotsTasks() { + registerStopZombieTasks() + registerStopEndlessTasks() + registerCancelAbandonedJobs() +} + +func registerStopZombieTasks() { + RegisterTaskFatal("stop_zombie_tasks", &BaseConfig{ + Enabled: true, + RunAtStart: true, + Schedule: "@every 5m", + }, func(ctx context.Context, _ *user_model.User, cfg Config) error { + return bots_service.StopZombieTasks(ctx) + }) +} + +func registerStopEndlessTasks() { + RegisterTaskFatal("stop_endless_tasks", &BaseConfig{ + Enabled: true, + RunAtStart: true, + Schedule: "@every 30m", + }, func(ctx context.Context, _ *user_model.User, cfg Config) error { + return bots_service.StopEndlessTasks(ctx) + }) +} + +func registerCancelAbandonedJobs() { + RegisterTaskFatal("cancel_abandoned_jobs", &BaseConfig{ + Enabled: true, + RunAtStart: true, + Schedule: "@every 6h", + }, func(ctx context.Context, _ *user_model.User, cfg Config) error { + return bots_service.CancelAbandonedJobs(ctx) + }) +}