Some more functional design

Signed-off-by: Nicolas De Loof <nicolas.deloof@gmail.com>
This commit is contained in:
Nicolas De Loof 2020-09-17 12:24:11 +02:00
parent dbe87e23a9
commit 101e1555b8
No known key found for this signature in database
GPG Key ID: 9858809D6F8F6E7E
2 changed files with 124 additions and 54 deletions

View File

@ -18,6 +18,7 @@ package ecs
import (
"fmt"
"math"
"strconv"
"github.com/compose-spec/compose-go/types"
@ -74,69 +75,138 @@ func (f family) firstOrError(msg string, args ...interface{}) (machine, error) {
}
func guessMachineType(project *types.Project) (string, error) {
// we select a machine type to match all gpu-bound services requirements
// we select a machine type to match all gpus-bound services requirements
// once https://github.com/aws/containers-roadmap/issues/631 is implemented we can define dedicated CapacityProviders per service.
minMemory, minCPU, minGPU, err := getResourceRequirements(project)
requirements, err := getResourceRequirements(project)
if err != nil {
return "", err
}
instanceType, err := p3family.
filter(func(m machine) bool {
return m.memory >= minMemory
return m.memory >= requirements.memory
}).
filter(func(m machine) bool {
return m.cpus >= minCPU
return m.cpus >= requirements.cpus
}).
filter(func(m machine) bool {
return m.gpus >= minGPU
return m.gpus >= requirements.gpus
}).
firstOrError("none of the AWS p3 machines match requirement for memory:%d cpu:%f gpu:%d", minMemory, minCPU, minGPU)
firstOrError("none of the AWS p3 machines match requirement for memory:%d cpu:%f gpus:%d", requirements.memory, requirements.cpus, requirements.gpus)
if err != nil {
return "", err
}
return instanceType.id, nil
}
func getResourceRequirements(project *types.Project) (types.UnitBytes, float64, int64, error) {
var minMemory types.UnitBytes
var minCPU float64
var minGPU int64
type resourceRequirements struct {
memory types.UnitBytes
cpus float64
gpus int64
}
func getResourceRequirements(project *types.Project) (*resourceRequirements, error) {
return toResourceRequirementsSlice(project).
filter(func(requirements *resourceRequirements) bool {
return requirements.gpus != 0
}).
max()
}
type eitherRequirementsOrError struct {
requirements []*resourceRequirements
err error
}
func toResourceRequirementsSlice(project *types.Project) eitherRequirementsOrError {
var requirements []*resourceRequirements
for _, service := range project.Services {
if service.Deploy == nil {
continue
}
reservations := service.Deploy.Resources.Reservations
if reservations == nil {
continue
r, err := toResourceRequirements(service)
if err != nil {
return eitherRequirementsOrError{nil, err}
}
requirements = append(requirements, r)
}
return eitherRequirementsOrError{requirements, nil}
}
var requiredGPUs int64
for _, r := range reservations.GenericResources {
if r.DiscreteResourceSpec.Kind == "gpu" {
requiredGPUs = r.DiscreteResourceSpec.Value
break
}
}
if requiredGPUs == 0 {
continue
}
if requiredGPUs > minGPU {
minGPU = requiredGPUs
}
if reservations.MemoryBytes > minMemory {
minMemory = reservations.MemoryBytes
}
if reservations.NanoCPUs != "" {
nanocpu, err := strconv.ParseFloat(reservations.NanoCPUs, 64)
if err != nil {
return 0, 0, 0, err
}
if nanocpu > minCPU {
minCPU = nanocpu
}
func (r eitherRequirementsOrError) filter(fn func(*resourceRequirements) bool) eitherRequirementsOrError {
if r.err != nil {
return r
}
var requirements []*resourceRequirements
for _, req := range r.requirements {
if fn(req) {
requirements = append(requirements, req)
}
}
return minMemory, minCPU, minGPU, nil
return eitherRequirementsOrError{requirements, nil}
}
func toResourceRequirements(service types.ServiceConfig) (*resourceRequirements, error) {
if service.Deploy == nil {
return nil, nil
}
reservations := service.Deploy.Resources.Reservations
if reservations == nil {
return nil, nil
}
var requiredGPUs int64
for _, r := range reservations.GenericResources {
if r.DiscreteResourceSpec.Kind == "gpus" {
requiredGPUs = r.DiscreteResourceSpec.Value
break
}
}
var nanocpu float64
if reservations.NanoCPUs != "" {
v, err := strconv.ParseFloat(reservations.NanoCPUs, 64)
if err != nil {
return nil, err
}
nanocpu = v
}
return &resourceRequirements{
memory: reservations.MemoryBytes,
cpus: nanocpu,
gpus: requiredGPUs,
}, nil
}
func (r resourceRequirements) combine(o *resourceRequirements) resourceRequirements {
if o == nil {
return r
}
return resourceRequirements{
memory: maxUnitBytes(r.memory, o.memory),
cpus: math.Max(r.cpus, o.cpus),
gpus: maxInt64(r.gpus, o.gpus),
}
}
func (r eitherRequirementsOrError) max() (*resourceRequirements, error) {
if r.err != nil {
return nil, r.err
}
min := resourceRequirements{}
for _, req := range r.requirements {
min = min.combine(req)
}
return &min, nil
}
func maxInt64(a, b int64) int64 {
if a > b {
return a
}
return b
}
func maxUnitBytes(a, b types.UnitBytes) types.UnitBytes {
if a > b {
return a
}
return b
}

View File

@ -28,63 +28,63 @@ func TestGuessMachineType(t *testing.T) {
wantErr bool
}{
{
name: "1-gpu",
name: "1-gpus",
yaml: `
services:
learning:
image: tensorflow/tensorflow:latest-gpu
image: tensorflow/tensorflow:latest-gpus
deploy:
resources:
reservations:
generic_resources:
- discrete_resource_spec:
kind: gpu
kind: gpus
value: 1
`,
want: "p3.2xlarge",
wantErr: false,
},
{
name: "4-gpu",
name: "4-gpus",
yaml: `
services:
learning:
image: tensorflow/tensorflow:latest-gpu
image: tensorflow/tensorflow:latest-gpus
deploy:
resources:
reservations:
generic_resources:
- discrete_resource_spec:
kind: gpu
kind: gpus
value: 4
`,
want: "p3.8xlarge",
wantErr: false,
},
{
name: "1-gpu, high-memory",
name: "1-gpus, high-memory",
yaml: `
services:
learning:
image: tensorflow/tensorflow:latest-gpu
image: tensorflow/tensorflow:latest-gpus
deploy:
resources:
reservations:
memory: 300Gb
generic_resources:
- discrete_resource_spec:
kind: gpu
kind: gpus
value: 2
`,
want: "p3.16xlarge",
wantErr: false,
},
{
name: "1-gpu, high-cpu",
name: "1-gpus, high-cpu",
yaml: `
services:
learning:
image: tensorflow/tensorflow:latest-gpu
image: tensorflow/tensorflow:latest-gpus
deploy:
resources:
reservations:
@ -92,7 +92,7 @@ services:
cpus: "32"
generic_resources:
- discrete_resource_spec:
kind: gpu
kind: gpus
value: 2
`,
want: "p3.8xlarge",