Some more functional design

Signed-off-by: Nicolas De Loof <nicolas.deloof@gmail.com>
This commit is contained in:
Nicolas De Loof 2020-09-17 12:24:11 +02:00
parent dbe87e23a9
commit 101e1555b8
No known key found for this signature in database
GPG Key ID: 9858809D6F8F6E7E
2 changed files with 124 additions and 54 deletions

View File

@ -18,6 +18,7 @@ package ecs
import ( import (
"fmt" "fmt"
"math"
"strconv" "strconv"
"github.com/compose-spec/compose-go/types" "github.com/compose-spec/compose-go/types"
@ -74,69 +75,138 @@ func (f family) firstOrError(msg string, args ...interface{}) (machine, error) {
} }
func guessMachineType(project *types.Project) (string, error) { func guessMachineType(project *types.Project) (string, error) {
// we select a machine type to match all gpu-bound services requirements // we select a machine type to match all gpus-bound services requirements
// once https://github.com/aws/containers-roadmap/issues/631 is implemented we can define dedicated CapacityProviders per service. // once https://github.com/aws/containers-roadmap/issues/631 is implemented we can define dedicated CapacityProviders per service.
minMemory, minCPU, minGPU, err := getResourceRequirements(project) requirements, err := getResourceRequirements(project)
if err != nil { if err != nil {
return "", err return "", err
} }
instanceType, err := p3family. instanceType, err := p3family.
filter(func(m machine) bool { filter(func(m machine) bool {
return m.memory >= minMemory return m.memory >= requirements.memory
}). }).
filter(func(m machine) bool { filter(func(m machine) bool {
return m.cpus >= minCPU return m.cpus >= requirements.cpus
}). }).
filter(func(m machine) bool { filter(func(m machine) bool {
return m.gpus >= minGPU return m.gpus >= requirements.gpus
}). }).
firstOrError("none of the AWS p3 machines match requirement for memory:%d cpu:%f gpu:%d", minMemory, minCPU, minGPU) firstOrError("none of the AWS p3 machines match requirement for memory:%d cpu:%f gpus:%d", requirements.memory, requirements.cpus, requirements.gpus)
if err != nil { if err != nil {
return "", err return "", err
} }
return instanceType.id, nil return instanceType.id, nil
} }
func getResourceRequirements(project *types.Project) (types.UnitBytes, float64, int64, error) { type resourceRequirements struct {
var minMemory types.UnitBytes memory types.UnitBytes
var minCPU float64 cpus float64
var minGPU int64 gpus int64
}
func getResourceRequirements(project *types.Project) (*resourceRequirements, error) {
return toResourceRequirementsSlice(project).
filter(func(requirements *resourceRequirements) bool {
return requirements.gpus != 0
}).
max()
}
type eitherRequirementsOrError struct {
requirements []*resourceRequirements
err error
}
func toResourceRequirementsSlice(project *types.Project) eitherRequirementsOrError {
var requirements []*resourceRequirements
for _, service := range project.Services { for _, service := range project.Services {
if service.Deploy == nil { r, err := toResourceRequirements(service)
continue if err != nil {
} return eitherRequirementsOrError{nil, err}
reservations := service.Deploy.Resources.Reservations
if reservations == nil {
continue
} }
requirements = append(requirements, r)
}
return eitherRequirementsOrError{requirements, nil}
}
var requiredGPUs int64 func (r eitherRequirementsOrError) filter(fn func(*resourceRequirements) bool) eitherRequirementsOrError {
for _, r := range reservations.GenericResources { if r.err != nil {
if r.DiscreteResourceSpec.Kind == "gpu" { return r
requiredGPUs = r.DiscreteResourceSpec.Value }
break var requirements []*resourceRequirements
} for _, req := range r.requirements {
} if fn(req) {
if requiredGPUs == 0 { requirements = append(requirements, req)
continue
}
if requiredGPUs > minGPU {
minGPU = requiredGPUs
}
if reservations.MemoryBytes > minMemory {
minMemory = reservations.MemoryBytes
}
if reservations.NanoCPUs != "" {
nanocpu, err := strconv.ParseFloat(reservations.NanoCPUs, 64)
if err != nil {
return 0, 0, 0, err
}
if nanocpu > minCPU {
minCPU = nanocpu
}
} }
} }
return minMemory, minCPU, minGPU, nil return eitherRequirementsOrError{requirements, nil}
}
func toResourceRequirements(service types.ServiceConfig) (*resourceRequirements, error) {
if service.Deploy == nil {
return nil, nil
}
reservations := service.Deploy.Resources.Reservations
if reservations == nil {
return nil, nil
}
var requiredGPUs int64
for _, r := range reservations.GenericResources {
if r.DiscreteResourceSpec.Kind == "gpus" {
requiredGPUs = r.DiscreteResourceSpec.Value
break
}
}
var nanocpu float64
if reservations.NanoCPUs != "" {
v, err := strconv.ParseFloat(reservations.NanoCPUs, 64)
if err != nil {
return nil, err
}
nanocpu = v
}
return &resourceRequirements{
memory: reservations.MemoryBytes,
cpus: nanocpu,
gpus: requiredGPUs,
}, nil
}
func (r resourceRequirements) combine(o *resourceRequirements) resourceRequirements {
if o == nil {
return r
}
return resourceRequirements{
memory: maxUnitBytes(r.memory, o.memory),
cpus: math.Max(r.cpus, o.cpus),
gpus: maxInt64(r.gpus, o.gpus),
}
}
func (r eitherRequirementsOrError) max() (*resourceRequirements, error) {
if r.err != nil {
return nil, r.err
}
min := resourceRequirements{}
for _, req := range r.requirements {
min = min.combine(req)
}
return &min, nil
}
func maxInt64(a, b int64) int64 {
if a > b {
return a
}
return b
}
func maxUnitBytes(a, b types.UnitBytes) types.UnitBytes {
if a > b {
return a
}
return b
} }

View File

@ -28,63 +28,63 @@ func TestGuessMachineType(t *testing.T) {
wantErr bool wantErr bool
}{ }{
{ {
name: "1-gpu", name: "1-gpus",
yaml: ` yaml: `
services: services:
learning: learning:
image: tensorflow/tensorflow:latest-gpu image: tensorflow/tensorflow:latest-gpus
deploy: deploy:
resources: resources:
reservations: reservations:
generic_resources: generic_resources:
- discrete_resource_spec: - discrete_resource_spec:
kind: gpu kind: gpus
value: 1 value: 1
`, `,
want: "p3.2xlarge", want: "p3.2xlarge",
wantErr: false, wantErr: false,
}, },
{ {
name: "4-gpu", name: "4-gpus",
yaml: ` yaml: `
services: services:
learning: learning:
image: tensorflow/tensorflow:latest-gpu image: tensorflow/tensorflow:latest-gpus
deploy: deploy:
resources: resources:
reservations: reservations:
generic_resources: generic_resources:
- discrete_resource_spec: - discrete_resource_spec:
kind: gpu kind: gpus
value: 4 value: 4
`, `,
want: "p3.8xlarge", want: "p3.8xlarge",
wantErr: false, wantErr: false,
}, },
{ {
name: "1-gpu, high-memory", name: "1-gpus, high-memory",
yaml: ` yaml: `
services: services:
learning: learning:
image: tensorflow/tensorflow:latest-gpu image: tensorflow/tensorflow:latest-gpus
deploy: deploy:
resources: resources:
reservations: reservations:
memory: 300Gb memory: 300Gb
generic_resources: generic_resources:
- discrete_resource_spec: - discrete_resource_spec:
kind: gpu kind: gpus
value: 2 value: 2
`, `,
want: "p3.16xlarge", want: "p3.16xlarge",
wantErr: false, wantErr: false,
}, },
{ {
name: "1-gpu, high-cpu", name: "1-gpus, high-cpu",
yaml: ` yaml: `
services: services:
learning: learning:
image: tensorflow/tensorflow:latest-gpu image: tensorflow/tensorflow:latest-gpus
deploy: deploy:
resources: resources:
reservations: reservations:
@ -92,7 +92,7 @@ services:
cpus: "32" cpus: "32"
generic_resources: generic_resources:
- discrete_resource_spec: - discrete_resource_spec:
kind: gpu kind: gpus
value: 2 value: 2
`, `,
want: "p3.8xlarge", want: "p3.8xlarge",