From d3effd2ead4a3a7d17f23cf455470ed4beb6fd78 Mon Sep 17 00:00:00 2001 From: aiordache Date: Wed, 30 Sep 2020 14:52:53 +0200 Subject: [PATCH] Detect task failures Signed-off-by: aiordache --- api/compose/api.go | 1 + cli/cmd/compose/list.go | 4 +- ecs/sdk.go | 112 +++++++++++++++++++++++++++++++++++----- ecs/wait.go | 26 +++++++++- 4 files changed, 126 insertions(+), 17 deletions(-) diff --git a/api/compose/api.go b/api/compose/api.go index e37e65a9a..dc5b975d3 100644 --- a/api/compose/api.go +++ b/api/compose/api.go @@ -77,4 +77,5 @@ type Stack struct { ID string Name string Status string + Reason string } diff --git a/cli/cmd/compose/list.go b/cli/cmd/compose/list.go index 806ee5356..98ac8f583 100644 --- a/cli/cmd/compose/list.go +++ b/cli/cmd/compose/list.go @@ -21,6 +21,7 @@ import ( "fmt" "io" "os" + "strings" "github.com/spf13/cobra" "github.com/spf13/pflag" @@ -60,7 +61,8 @@ func runList(ctx context.Context, opts composeOptions) error { view := viewFromStackList(stackList) return formatter.Print(view, opts.Format, os.Stdout, func(w io.Writer) { for _, stack := range view { - _, _ = fmt.Fprintf(w, "%s\t%s\n", stack.Name, stack.Status) + _, _ = fmt.Fprintf(w, "%s\t%s\n", stack.Name, strings.TrimSpace( + fmt.Sprintf("%s %s", stack.Status, stack.Reason)) } }, "NAME", "STATUS") } diff --git a/ecs/sdk.go b/ecs/sdk.go index e1cef4c98..9a9119aac 100644 --- a/ecs/sdk.go +++ b/ecs/sdk.go @@ -304,29 +304,113 @@ func (s sdk) ListStacks(ctx context.Context, name string) ([]compose.Stack, erro } stacks := []compose.Stack{} for _, stack := range cfStacks.Stacks { + skip := true for _, t := range stack.Tags { if *t.Key == compose.ProjectTag { - status := compose.RUNNING - switch aws.StringValue(stack.StackStatus) { - case "CREATE_IN_PROGRESS": - status = compose.STARTING - case "DELETE_IN_PROGRESS": - status = compose.REMOVING - case "UPDATE_IN_PROGRESS": - status = compose.UPDATING - } - stacks = append(stacks, compose.Stack{ - ID: aws.StringValue(stack.StackId), - Name: aws.StringValue(stack.StackName), - Status: status, - }) + skip = false break } } + if skip { + continue + } + status := compose.RUNNING + reason := "" + switch aws.StringValue(stack.StackStatus) { + case "CREATE_IN_PROGRESS": + status = compose.STARTING + case "DELETE_IN_PROGRESS": + status = compose.REMOVING + case "UPDATE_IN_PROGRESS": + status = compose.UPDATING + } + if status == compose.STARTING { + if err := s.CheckStackState(ctx, aws.StringValue(stack.StackName)); err != nil { + status = compose.FAILED + reason = err.Error() + } + } + stacks = append(stacks, compose.Stack{ + ID: aws.StringValue(stack.StackId), + Name: aws.StringValue(stack.StackName), + Status: status, + Reason: reason, + }) + } return stacks, nil } +func (s sdk) CheckStackState(ctx context.Context, name string) error { + resources, err := s.CF.ListStackResourcesWithContext(ctx, &cloudformation.ListStackResourcesInput{ + StackName: aws.String(name), + }) + if err != nil { + return err + } + services := []*string{} + serviceNames := []string{} + var cluster *string + for _, r := range resources.StackResourceSummaries { + if aws.StringValue(r.ResourceType) == "AWS::ECS::Cluster" { + cluster = r.PhysicalResourceId + continue + } + if aws.StringValue(r.ResourceType) == "AWS::ECS::Service" { + if r.PhysicalResourceId == nil { + continue + } + services = append(services, r.PhysicalResourceId) + serviceNames = append(serviceNames, *r.LogicalResourceId) + } + } + for i, service := range services { + err := s.CheckTaskState(ctx, aws.StringValue(cluster), aws.StringValue(service)) + if err != nil { + return fmt.Errorf("%s error: %s", serviceNames[i], err.Error()) + } + } + return nil +} + +func (s sdk) CheckTaskState(ctx context.Context, cluster string, serviceName string) error { + tasks, err := s.ECS.ListTasksWithContext(ctx, &ecs.ListTasksInput{ + Cluster: aws.String(cluster), + ServiceName: aws.String(serviceName), + }) + if err != nil { + return err + } + if len(tasks.TaskArns) > 0 { + return nil + } + tasks, err = s.ECS.ListTasksWithContext(ctx, &ecs.ListTasksInput{ + Cluster: aws.String(cluster), + ServiceName: aws.String(serviceName), + DesiredStatus: aws.String("STOPPED"), + }) + if err != nil { + return err + } + if len(tasks.TaskArns) > 0 { + taskDescriptions, err := s.ECS.DescribeTasksWithContext(ctx, &ecs.DescribeTasksInput{ + Cluster: aws.String(cluster), + Tasks: tasks.TaskArns, + }) + if err != nil { + return err + } + if len(taskDescriptions.Tasks) > 0 { + recentTask := taskDescriptions.Tasks[0] + switch aws.StringValue(recentTask.StopCode) { + case "TaskFailedToStart": + return fmt.Errorf(aws.StringValue(recentTask.StoppedReason)) + } + } + } + return nil +} + func (s sdk) DescribeStackEvents(ctx context.Context, stackID string) ([]*cloudformation.StackEvent, error) { // Fixme implement Paginator on Events and return as a chan(events) events := []*cloudformation.StackEvent{} diff --git a/ecs/wait.go b/ecs/wait.go index 7413a4feb..55c47da96 100644 --- a/ecs/wait.go +++ b/ecs/wait.go @@ -52,6 +52,7 @@ func (b *ecsAPIService) WaitStackCompletion(ctx context.Context, name string, op var completed bool var stackErr error + for !completed { select { case <-done: @@ -77,8 +78,8 @@ func (b *ecsAPIService) WaitStackCompletion(ctx context.Context, name string, op reason := aws.StringValue(event.ResourceStatusReason) status := aws.StringValue(event.ResourceStatus) progressStatus := progress.Working - switch status { + case "CREATE_COMPLETE": if operation == stackCreate { progressStatus = progress.Done @@ -100,12 +101,33 @@ func (b *ecsAPIService) WaitStackCompletion(ctx context.Context, name string, op } } } + w.Event(progress.Event{ ID: resource, Status: progressStatus, - StatusText: status, + StatusText: reason, }) } + if operation != stackCreate || stackErr != nil { + continue + } + if err := b.SDK.CheckStackState(ctx, name); err != nil { + stackErr = err + b.SDK.DeleteStack(ctx, name) + operation = stackDelete + + reason := err.Error() + if len(reason) > 30 { + reason = reason[:30] + "..." + } + w.Event(progress.Event{ + ID: name, + Status: progress.Error, + StatusText: reason, + }) + + } + } return stackErr