From d7b99e00287ef9756dda3770efcd6bf1cf36485d Mon Sep 17 00:00:00 2001 From: Jason Song Date: Tue, 18 Oct 2022 13:32:52 +0800 Subject: [PATCH] feat: use dbfs to store log --- assets/go-licenses.json | 10 +-- go.mod | 2 +- models/bots/task.go | 46 +++++++---- models/bots/task_log.go | 77 ------------------ modules/bots/log.go | 130 ++++++++++++++++++++++++++++++ routers/api/bots/runner/runner.go | 47 ++++++----- routers/web/dev/buildview.go | 16 ++-- 7 files changed, 206 insertions(+), 122 deletions(-) delete mode 100644 models/bots/task_log.go create mode 100644 modules/bots/log.go diff --git a/assets/go-licenses.json b/assets/go-licenses.json index 1afb6b45ab..3b8e84ecc4 100644 --- a/assets/go-licenses.json +++ b/assets/go-licenses.json @@ -24,6 +24,11 @@ "path": "git.sr.ht/~mariusor/go-xsd-duration/LICENSE", "licenseText": "MIT License\n\nCopyright (c) 2019 Go xsd:duration\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n" }, + { + "name": "gitea.com/gitea/act_runner/core", + "path": "gitea.com/gitea/act_runner/core/LICENSE", + "licenseText": "Copyright (c) 2022 The Gitea Authors\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in\nall copies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\nTHE SOFTWARE.\n" + }, { "name": "gitea.com/gitea/proto-go", "path": "gitea.com/gitea/proto-go/LICENSE", @@ -544,11 +549,6 @@ "path": "github.com/gorilla/sessions/LICENSE", "licenseText": "Copyright (c) 2012-2018 The Gorilla Authors. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are\nmet:\n\n\t * Redistributions of source code must retain the above copyright\nnotice, this list of conditions and the following disclaimer.\n\t * Redistributions in binary form must reproduce the above\ncopyright notice, this list of conditions and the following disclaimer\nin the documentation and/or other materials provided with the\ndistribution.\n\t * Neither the name of Google Inc. nor the names of its\ncontributors may be used to endorse or promote products derived from\nthis software without specific prior written permission.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n\"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\nLIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\nA PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\nOWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\nSPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\nLIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\nDATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\nTHEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n" }, - { - "name": "github.com/gorilla/websocket", - "path": "github.com/gorilla/websocket/LICENSE", - "licenseText": "Copyright (c) 2013 The Gorilla WebSocket Authors. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are met:\n\n Redistributions of source code must retain the above copyright notice, this\n list of conditions and the following disclaimer.\n\n Redistributions in binary form must reproduce the above copyright notice,\n this list of conditions and the following disclaimer in the documentation\n and/or other materials provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\nANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\nWARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE\nFOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\nDAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\nCAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\nOR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n" - }, { "name": "github.com/hashicorp/go-cleanhttp", "path": "github.com/hashicorp/go-cleanhttp/LICENSE", diff --git a/go.mod b/go.mod index bf5f941363..aeedd6d172 100644 --- a/go.mod +++ b/go.mod @@ -59,7 +59,6 @@ require ( github.com/google/uuid v1.3.0 github.com/gorilla/feeds v1.1.1 github.com/gorilla/sessions v1.2.1 - github.com/gorilla/websocket v1.4.2 github.com/hashicorp/go-version v1.6.0 github.com/hashicorp/golang-lru v0.5.4 github.com/huandu/xstrings v1.3.2 @@ -209,6 +208,7 @@ require ( github.com/gorilla/handlers v1.5.1 // indirect github.com/gorilla/mux v1.8.0 // indirect github.com/gorilla/securecookie v1.1.1 // indirect + github.com/gorilla/websocket v1.4.2 // indirect github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 // indirect github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect github.com/grpc-ecosystem/grpc-gateway v1.16.0 // indirect diff --git a/models/bots/task.go b/models/bots/task.go index 7a883f3c13..b64e5274d4 100644 --- a/models/bots/task.go +++ b/models/bots/task.go @@ -19,19 +19,24 @@ import ( // Task represents a distribution of job type Task struct { - ID int64 - JobID int64 - Job *RunJob `xorm:"-"` - Steps []*TaskStep `xorm:"-"` - Attempt int64 - RunnerID int64 `xorm:"index"` - LogToFile bool // read log from database or from storage - LogURL string // url of the log file in storage - Result runnerv1.Result - Started timeutil.TimeStamp - Stopped timeutil.TimeStamp - Created timeutil.TimeStamp `xorm:"created"` - Updated timeutil.TimeStamp `xorm:"updated"` + ID int64 + JobID int64 + Job *RunJob `xorm:"-"` + Steps []*TaskStep `xorm:"-"` + Attempt int64 + RunnerID int64 `xorm:"index"` + Result runnerv1.Result + Started timeutil.TimeStamp + Stopped timeutil.TimeStamp + + LogURL string // dbfs:///a/b.log or s3://endpoint.com/a/b.log and etc. + LogLength int64 // lines count + LogSize int64 // blob size + LogIndexes []int64 `xorm:"JSON TEXT"` // line number to offset + LogExpired bool + + Created timeutil.TimeStamp `xorm:"created"` + Updated timeutil.TimeStamp `xorm:"updated"` } func init() { @@ -80,7 +85,7 @@ func (task *Task) FullSteps() []*TaskStep { headStep := &TaskStep{ Name: "Set up job", LogIndex: 0, - LogLength: -1, // no limit + LogLength: task.LogLength, Started: task.Started, } if firstStep != nil { @@ -93,7 +98,7 @@ func (task *Task) FullSteps() []*TaskStep { } if lastStep != nil { tailStep.LogIndex = lastStep.LogIndex + lastStep.LogLength - tailStep.LogLength = -1 // no limit + tailStep.LogLength = task.LogLength - tailStep.LogIndex tailStep.Started = lastStep.Stopped } steps := make([]*TaskStep, 0, len(task.Steps)+2) @@ -208,7 +213,16 @@ func CreateTaskForRunner(runner *Runner) (*Task, bool, error) { return task, true, nil } -func UpdateTask(state *runnerv1.TaskState) error { +func UpdateTask(ctx context.Context, task *Task, cols ...string) error { + sess := db.GetEngine(ctx).ID(task.ID) + if len(cols) > 0 { + sess.Cols(cols...) + } + _, err := sess.Update(task) + return err +} + +func UpdateTaskByState(state *runnerv1.TaskState) error { stepStates := map[int64]*runnerv1.StepState{} for _, v := range state.Steps { stepStates[v.Id] = v diff --git a/models/bots/task_log.go b/models/bots/task_log.go deleted file mode 100644 index adfa17aa9c..0000000000 --- a/models/bots/task_log.go +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright 2022 The Gitea Authors. All rights reserved. -// Use of this source code is governed by a MIT-style -// license that can be found in the LICENSE file. - -package bots - -import ( - "fmt" - - "code.gitea.io/gitea/models/db" - "code.gitea.io/gitea/modules/log" - "code.gitea.io/gitea/modules/timeutil" -) - -// TaskLog represents a task's log, every task has a standalone table -type TaskLog struct { - ID int64 `xorm:"pk"` - Timestamp timeutil.TimeStamp - Content string `xorm:"LONGTEXT"` -} - -func GetTaskLogTableName(taskID int64) string { - return fmt.Sprintf("bots_task_log_%d", taskID) -} - -// CreateTaskLog table for a task -func CreateTaskLog(taskID int64) error { - return db.GetEngine(db.DefaultContext). - Table(GetTaskLogTableName(taskID)). - Sync(new(TaskLog)) -} - -func GetTaskLogs(taskID, index, length int64) (logs []*TaskLog, err error) { - sess := db.GetEngine(db.DefaultContext).Table(GetTaskLogTableName(taskID)). - Where("id>=?", index).OrderBy("id") - - if length >= 0 { - sess.Limit(int(length)) - } - - err = sess.Find(&logs) - - return -} - -func InsertTaskLogs(taskID int64, logs []*TaskLog) (int64, error) { - if err := CreateTaskLog(taskID); err != nil { - return 0, err - } - table := GetTaskLogTableName(taskID) - - // TODO: A more complete way to insert logs - // Be careful: - // - the id of a log can be 0 - // - some logs may already exist in db - // - if use exec, consider different databases - // - the input should be ordered by id - // - the ids should be continuously increasing - // - the min id of input should be 1 + (the max id in db) - - if len(logs) == 0 { - return 0, fmt.Errorf("no logs") - } - ack := logs[0].ID - - sess := db.GetEngine(db.DefaultContext) - for _, v := range logs { - _, err := sess.Exec(fmt.Sprintf("INSERT IGNORE INTO %s (id, timestamp, content) VALUES (?,?,?)", table), v.ID, v.Timestamp, []byte(v.Content)) - if err != nil { - log.Error("insert log %d of task %d: %v", v.ID, taskID, err) - break - } - ack = v.ID + 1 - } - - return ack, nil -} diff --git a/modules/bots/log.go b/modules/bots/log.go new file mode 100644 index 0000000000..ea332accfc --- /dev/null +++ b/modules/bots/log.go @@ -0,0 +1,130 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package bots + +import ( + "bufio" + "context" + "fmt" + "io" + "net/url" + "os" + "strconv" + "strings" + "time" + + "code.gitea.io/gitea/models/dbfs" + runnerv1 "gitea.com/gitea/proto-go/runner/v1" + + "google.golang.org/protobuf/types/known/timestamppb" +) + +const defaultBufSize = 64 * 1024 + +const ( + StorageSchemaDBFS = "dbfs" + StorageSchemaS2 = "s2" + // ... +) + +func WriteLogs(ctx context.Context, rawURL string, offset int64, rows []*runnerv1.LogRow) ([]int, error) { + name, err := parseDBFSName(rawURL) + if err != nil { + return nil, err + } + f, err := dbfs.OpenFile(ctx, name, os.O_RDWR|os.O_CREATE|os.O_TRUNC|os.O_APPEND) + if err != nil { + return nil, fmt.Errorf("dbfs OpenFile %q: %w", name, err) + } + defer f.Close() + if _, err := f.Seek(offset, io.SeekStart); err != nil { + return nil, fmt.Errorf("dbfs Seek %q: %w", name, err) + } + + writer := bufio.NewWriterSize(f, defaultBufSize) + + ns := make([]int, 0, len(rows)) + for _, row := range rows { + n, err := writer.WriteString(FormatLog(row.Time.AsTime(), row.Content) + "\n") + if err != nil { + return nil, err + } + ns = append(ns, n) + } + + if err := writer.Flush(); err != nil { + return nil, err + } + return ns, nil +} + +func ReadLogs(ctx context.Context, rawURL string, offset int64, limit int64) ([]*runnerv1.LogRow, error) { + name, err := parseDBFSName(rawURL) + if err != nil { + return nil, err + } + + f, err := dbfs.Open(ctx, name) + if err != nil { + return nil, fmt.Errorf("dbfs Open %q: %w", name, err) + } + defer f.Close() + if _, err := f.Seek(offset, io.SeekStart); err != nil { + return nil, fmt.Errorf("dbfs Seek %q: %w", name, err) + } + + var rows []*runnerv1.LogRow + scanner := bufio.NewScanner(f) + for scanner.Scan() && (int64(len(rows)) < limit || limit < 0) { + t, c, err := ParseLog(scanner.Text()) + if err != nil { + return nil, fmt.Errorf("parse log %q: %w", scanner.Text(), err) + } + rows = append(rows, &runnerv1.LogRow{ + Time: timestamppb.New(t), + Content: c, + }) + } + + if err := scanner.Err(); err != nil { + return nil, fmt.Errorf("scan: %w", err) + } + + return rows, nil +} + +func parseDBFSName(rawURL string) (string, error) { + u, err := url.Parse(rawURL) + if err != nil { + return "", fmt.Errorf("invalid url: %w", err) + } + if u.Scheme != StorageSchemaDBFS { + return "", fmt.Errorf("%s supported only yet", StorageSchemaDBFS) + } + + if u.Path == "" { + return "", fmt.Errorf("empty path") + } + + return u.Path, nil +} + +func FormatLog(timestamp time.Time, content string) string { + return fmt.Sprintf("%s %s", timestamp.UTC().Format(time.RFC3339Nano), strconv.Quote(content)) +} + +func ParseLog(in string) (timestamp time.Time, content string, err error) { + index := strings.IndexRune(in, ' ') + if index < 0 { + err = fmt.Errorf("invalid log: %q", in) + return + } + timestamp, err = time.Parse(time.RFC3339Nano, in[:index]) + if err != nil { + return + } + content, err = strconv.Unquote(in[index+1:]) + return +} diff --git a/routers/api/bots/runner/runner.go b/routers/api/bots/runner/runner.go index d6274834b5..8352e028f7 100644 --- a/routers/api/bots/runner/runner.go +++ b/routers/api/bots/runner/runner.go @@ -12,9 +12,9 @@ import ( "code.gitea.io/gitea/core" bots_model "code.gitea.io/gitea/models/bots" + "code.gitea.io/gitea/modules/bots" "code.gitea.io/gitea/modules/json" "code.gitea.io/gitea/modules/log" - "code.gitea.io/gitea/modules/timeutil" runnerv1 "gitea.com/gitea/proto-go/runner/v1" "gitea.com/gitea/proto-go/runner/v1/runnerv1connect" @@ -149,7 +149,7 @@ func (s *Service) UpdateTask( ) (*connect.Response[runnerv1.UpdateTaskResponse], error) { res := connect.NewResponse(&runnerv1.UpdateTaskResponse{}) - if err := bots_model.UpdateTask(req.Msg.State); err != nil { + if err := bots_model.UpdateTaskByState(req.Msg.State); err != nil { return nil, status.Errorf(codes.Internal, "update task: %v", err) } @@ -163,27 +163,38 @@ func (s *Service) UpdateLog( ) (*connect.Response[runnerv1.UpdateLogResponse], error) { res := connect.NewResponse(&runnerv1.UpdateLogResponse{}) - if len(req.Msg.Rows) == 0 { - // TODO: should be 1 + the max id of stored log - res.Msg.AckIndex = req.Msg.Index + task, err := bots_model.GetTaskByID(ctx, req.Msg.TaskId) + if err != nil { + return nil, status.Errorf(codes.Internal, "get task: %v", err) + } + if task.LogURL == "" { + task.LogURL = fmt.Sprintf("dbfs:///bots/tasks/%d.log", task.ID) + if err := bots_model.UpdateTask(ctx, task, "log_url"); err != nil { + return nil, status.Errorf(codes.Internal, "update task: %v", err) + } + } + ack := task.LogLength + + if len(req.Msg.Rows) == 0 || req.Msg.Index > ack || int64(len(req.Msg.Rows))+req.Msg.Index <= ack { + res.Msg.AckIndex = ack return res, nil } - rowIndex := req.Msg.Index - rows := make([]*bots_model.TaskLog, len(req.Msg.Rows)) - for i, v := range req.Msg.Rows { - rows[i] = &bots_model.TaskLog{ - ID: rowIndex + int64(i), - Timestamp: timeutil.TimeStamp(v.Time.AsTime().Unix()), - Content: v.Content, - } + rows := req.Msg.Rows[ack-req.Msg.Index:] + ns, err := bots.WriteLogs(ctx, task.LogURL, task.LogSize, rows) + if err != nil { + return nil, status.Errorf(codes.Internal, "write logs: %v", err) + } + task.LogLength += int64(len(rows)) + for _, n := range ns { + task.LogIndexes = append(task.LogIndexes, task.LogSize) + task.LogSize += int64(n) + } + if err := bots_model.UpdateTask(ctx, task, "log_indexes", "log_length", "log_size"); err != nil { + return nil, status.Errorf(codes.Internal, "update task: %v", err) } - ack, err := bots_model.InsertTaskLogs(req.Msg.TaskId, rows) - if err != nil { - return nil, status.Errorf(codes.Internal, "insert task log: %v", err) - } - res.Msg.AckIndex = ack + res.Msg.AckIndex = task.LogLength if req.Msg.NoMore { // TODO: transfer logs to storage from db diff --git a/routers/web/dev/buildview.go b/routers/web/dev/buildview.go index dc4ff7cbda..87abf4f248 100644 --- a/routers/web/dev/buildview.go +++ b/routers/web/dev/buildview.go @@ -3,11 +3,14 @@ package dev import ( "fmt" "net/http" + "time" "code.gitea.io/gitea/core" bots_model "code.gitea.io/gitea/models/bots" + "code.gitea.io/gitea/modules/bots" "code.gitea.io/gitea/modules/context" "code.gitea.io/gitea/modules/web" + runnerv1 "gitea.com/gitea/proto-go/runner/v1" ) func BuildView(ctx *context.Context) { @@ -78,7 +81,7 @@ type BuildViewStepLog struct { } type BuildViewStepLogLine struct { - Ln int `json:"ln"` + Ln int64 `json:"ln"` M string `json:"m"` T float64 `json:"t"` } @@ -172,9 +175,12 @@ func BuildViewPost(ctx *context.Context) { for _, cursor := range req.StepLogCursors { if cursor.Expanded { step := steps[cursor.StepIndex] - var logRows []*bots_model.TaskLog + var logRows []*runnerv1.LogRow if cursor.Cursor < step.LogLength || step.LogLength < 0 { - logRows, err = bots_model.GetTaskLogs(task.ID, step.LogIndex+cursor.Cursor, step.LogLength-cursor.Cursor) + index := step.LogIndex + cursor.Cursor + length := step.LogLength - cursor.Cursor + offset := task.LogIndexes[index] + logRows, err = bots.ReadLogs(ctx, task.LogURL, offset, length) if err != nil { ctx.Error(http.StatusInternalServerError, err.Error()) return @@ -183,9 +189,9 @@ func BuildViewPost(ctx *context.Context) { logLines := make([]BuildViewStepLogLine, len(logRows)) for i, row := range logRows { logLines[i] = BuildViewStepLogLine{ - Ln: i, + Ln: cursor.Cursor + int64(i), M: row.Content, - T: float64(row.Timestamp), + T: float64(row.Time.AsTime().UnixNano()) / float64(time.Second), } } resp.LogsData.StreamingLogs = append(resp.LogsData.StreamingLogs, BuildViewStepLog{