Skip to content

Commit

Permalink
[chore] Add extension/cgroupruntime integration tests (open-telemetry…
Browse files Browse the repository at this point in the history
…#36617)

<!--Ex. Fixing a bug - Describe the bug and how this fixes the issue.
Ex. Adding a feature - Explain what this achieves.-->
#### Description

Adds some integration tests for the extension. It uses the
`containerd/cgroups` package to modify the current process's allocated
cgroup resources and assert the corresponding values for
GOMEMLIMIT/GOMAXPROCS set by the extension.

<!-- Issue number (e.g. open-telemetry#1234) or full URL to issue, if applicable. -->
#### Link to tracking issue
Fixes
open-telemetry#36545

<!--Describe what testing was performed and which tests were added.-->
#### Testing

Cgroup resources modification requires privileged access in GHA runner
instances, thus the test must be run with `sudo`. The `go` toolchain has
an `exec` flag to run tests binary(s) via another binary such as sudo.
The Makefile has been modified to run Go tests files with build tag
`integration` && `sudo` with the sudo command. I am not very confident
with this solution, as I could not find any other component requiring
privileged execution for its integration tests and the "go test
-tags=integration,sudo" would run for all of them. I am all ears on
other testing strategies for this use case.

Similar strategy in cgroups package
https://github.com/containerd/cgroups/blob/main/.github/workflows/ci.yml#L101

<!--Describe the documentation added.-->
#### Documentation

<!--Please delete paragraphs that you did not use before submitting.-->

---------

Co-authored-by: Pablo Baeyens <[email protected]>
  • Loading branch information
2 people authored and mterhar committed Dec 19, 2024
1 parent 699c230 commit 7b1fce7
Show file tree
Hide file tree
Showing 4 changed files with 264 additions and 22 deletions.
7 changes: 5 additions & 2 deletions Makefile.Common
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,11 @@ GO_BUILD_TAGS=""
GO_BUILD_LDFLAGS="-s -w"
GOTEST_TIMEOUT?= 600s
GOTEST_OPT?= -race -timeout $(GOTEST_TIMEOUT) -parallel 4 --tags=$(GO_BUILD_TAGS)
GOTEST_INTEGRATION_OPT?= -race -timeout 360s -parallel 4
GOTEST_INTEGRATION_OPT?= -race -timeout 360s -parallel 4 -skip Sudo
GOTEST_INTEGRATION_OPT_SUDO= $(GOTEST_INTEGRATION_OPT) -exec sudo -run Sudo
GOTEST_OPT_WITH_COVERAGE = $(GOTEST_OPT) -coverprofile=coverage.txt -covermode=atomic
GOTEST_OPT_WITH_INTEGRATION=$(GOTEST_INTEGRATION_OPT) -tags=integration,$(GO_BUILD_TAGS)
GOTEST_OPT_WITH_INTEGRATION_SUDO=$(GOTEST_INTEGRATION_OPT_SUDO) -tags=integration,$(GO_BUILD_TAGS)
GOTEST_OPT_WITH_INTEGRATION_COVERAGE=$(GOTEST_OPT_WITH_INTEGRATION) -coverprofile=integration-coverage.txt -covermode=atomic
GOCMD?= go
GOOS=$(shell $(GOCMD) env GOOS)
Expand Down Expand Up @@ -152,12 +154,13 @@ endif
runbuilttest: $(GOTESTSUM)
ifneq (,$(wildcard ./builtunitetest.test))
$(GOTESTSUM) --raw-command -- $(GOCMD) tool test2json -p "./..." -t ./builtunitetest.test -test.v -test.failfast -test.timeout $(GOTEST_TIMEOUT)
endif
endif

.PHONY: mod-integration-test
mod-integration-test: $(GOTESTSUM)
@echo "running $(GOCMD) integration test ./... in `pwd`"
$(GOTESTSUM) $(GOTESTSUM_OPT) --packages="./..." -- $(GOTEST_OPT_WITH_INTEGRATION)
$(GOTESTSUM) $(GOTESTSUM_OPT) --packages="./..." -- $(GOTEST_OPT_WITH_INTEGRATION_SUDO)
@if [ -e integration-coverage.txt ]; then \
$(GOCMD) tool cover -html=integration-coverage.txt -o integration-coverage.html; \
fi
Expand Down
14 changes: 7 additions & 7 deletions extension/cgroupruntimeextension/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ go 1.22.0

require (
github.com/KimMachineGun/automemlimit v0.6.1
github.com/containerd/cgroups/v3 v3.0.2
github.com/stretchr/testify v1.10.0
go.opentelemetry.io/collector/component v0.116.0
go.opentelemetry.io/collector/component/componenttest v0.116.0
Expand All @@ -13,29 +14,29 @@ require (
go.uber.org/automaxprocs v1.6.0
go.uber.org/goleak v1.3.0
go.uber.org/zap v1.27.0
golang.org/x/sys v0.27.0
)

require (
github.com/cilium/ebpf v0.9.1 // indirect
github.com/containerd/cgroups/v3 v3.0.1 // indirect
github.com/coreos/go-systemd/v22 v22.3.2 // indirect
github.com/coreos/go-systemd/v22 v22.5.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/docker/go-units v0.4.0 // indirect
github.com/docker/go-units v0.5.0 // indirect
github.com/go-logr/logr v1.4.2 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-viper/mapstructure/v2 v2.2.1 // indirect
github.com/godbus/dbus/v5 v5.0.4 // indirect
github.com/godbus/dbus/v5 v5.1.0 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/knadh/koanf/maps v0.1.1 // indirect
github.com/knadh/koanf/providers/confmap v0.1.0 // indirect
github.com/knadh/koanf/v2 v2.1.2 // indirect
github.com/mitchellh/copystructure v1.2.0 // indirect
github.com/mitchellh/reflectwalk v1.0.2 // indirect
github.com/opencontainers/runtime-spec v1.0.2 // indirect
github.com/opencontainers/runtime-spec v1.1.0 // indirect
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/sirupsen/logrus v1.8.1 // indirect
github.com/sirupsen/logrus v1.9.0 // indirect
go.opentelemetry.io/collector/config/configtelemetry v0.116.0 // indirect
go.opentelemetry.io/collector/pdata v1.22.0 // indirect
go.opentelemetry.io/otel v1.32.0 // indirect
Expand All @@ -45,7 +46,6 @@ require (
go.opentelemetry.io/otel/trace v1.32.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
golang.org/x/net v0.29.0 // indirect
golang.org/x/sys v0.27.0 // indirect
golang.org/x/text v0.18.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 // indirect
google.golang.org/grpc v1.68.1 // indirect
Expand Down
30 changes: 17 additions & 13 deletions extension/cgroupruntimeextension/go.sum

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

235 changes: 235 additions & 0 deletions extension/cgroupruntimeextension/integration_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0
//go:build integration && linux
// +build integration,linux

// Privileged access is required to set cgroup's memory and cpu max values

package cgroupruntimeextension // import "github.com/open-telemetry/opentelemetry-collector-contrib/extension/cgroupruntimeextension"

import (
"context"
"fmt"
"math"
"os"
"path"
"path/filepath"
"runtime"
"runtime/debug"
"strconv"
"strings"
"testing"

"github.com/containerd/cgroups/v3/cgroup2"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.opentelemetry.io/collector/component/componenttest"
"go.opentelemetry.io/collector/extension/extensiontest"
"golang.org/x/sys/unix"
)

const (
defaultCgroup2Path = "/sys/fs/cgroup"
)

// checkCgroupSystem skips the test if is not run in a cgroupv2 system
func checkCgroupSystem(tb testing.TB) {
var st unix.Statfs_t
err := unix.Statfs(defaultCgroup2Path, &st)
if err != nil {
tb.Skip("cannot statfs cgroup root")
}

isUnified := st.Type == unix.CGROUP2_SUPER_MAGIC
if !isUnified {
tb.Skip("System running in hybrid or cgroupv1 mode")
}
}

// cgroupMaxCpu returns the CPU max definition for a given cgroup slice path
// File format: cpu_quote cpu_period
func cgroupMaxCpu(filename string) (quota int64, period uint64, err error) {
out, err := os.ReadFile(filepath.Join(defaultCgroup2Path, filename, "cpu.max"))
if err != nil {
return 0, 0, err
}
values := strings.Split(strings.TrimSpace(string(out)), " ")
if values[0] == "max" {
quota = math.MaxInt64
} else {
quota, _ = strconv.ParseInt(values[0], 10, 64)
}
period, _ = strconv.ParseUint(values[1], 10, 64)
return quota, period, err
}

func TestCgroupV2SudoIntegration(t *testing.T) {
checkCgroupSystem(t)
pointerInt64 := func(val int64) *int64 {
return &val
}
pointerUint64 := func(uval uint64) *uint64 {
return &uval
}

tests := []struct {
name string
// nil CPU quota == "max" cgroup string value
cgroupCpuQuota *int64
cgroupCpuPeriod uint64
cgroupMaxMemory int64
config *Config
expectedGoMaxProcs int
expectedGoMemLimit int64
}{
{
name: "90% the max cgroup memory and 12 GOMAXPROCS",
cgroupCpuQuota: pointerInt64(100000),
cgroupCpuPeriod: 8000,
// 128 Mb
cgroupMaxMemory: 134217728,
config: &Config{
GoMaxProcs: GoMaxProcsConfig{
Enabled: true,
},
GoMemLimit: GoMemLimitConfig{
Enabled: true,
Ratio: 0.9,
},
},
// 100000 / 8000
expectedGoMaxProcs: 12,
// 134217728 * 0.9
expectedGoMemLimit: 120795955,
},
{
name: "50% of the max cgroup memory and 1 GOMAXPROCS",
cgroupCpuQuota: pointerInt64(100000),
cgroupCpuPeriod: 100000,
// 128 Mb
cgroupMaxMemory: 134217728,
config: &Config{
GoMaxProcs: GoMaxProcsConfig{
Enabled: true,
},
GoMemLimit: GoMemLimitConfig{
Enabled: true,
Ratio: 0.5,
},
},
// 100000 / 100000
expectedGoMaxProcs: 1,
// 134217728 * 0.5
expectedGoMemLimit: 67108864,
},
{
name: "10% of the max cgroup memory, max cpu, default GOMAXPROCS",
cgroupCpuQuota: nil,
cgroupCpuPeriod: 100000,
// 128 Mb
cgroupMaxMemory: 134217728,
config: &Config{
GoMaxProcs: GoMaxProcsConfig{
Enabled: true,
},
GoMemLimit: GoMemLimitConfig{
Enabled: true,
Ratio: 0.1,
},
},
// GOMAXPROCS is set to the value of `cpu.max / cpu.period`
// If cpu.max is set to max, GOMAXPROCS should not be
// modified
expectedGoMaxProcs: runtime.GOMAXPROCS(-1),
// 134217728 * 0.1
expectedGoMemLimit: 13421772,
},
}

cgroupPath, err := cgroup2.PidGroupPath(os.Getpid())
assert.NoError(t, err)
manager, err := cgroup2.Load(cgroupPath)
assert.NoError(t, err)

stats, err := manager.Stat()
require.NoError(t, err)

// Startup resource values
initialMaxMemory := stats.GetMemory().GetUsageLimit()
memoryCgroupCleanUp := func() {
err = manager.Update(&cgroup2.Resources{
Memory: &cgroup2.Memory{
Max: pointerInt64(int64(initialMaxMemory)),
},
})
assert.NoError(t, err)
}

if initialMaxMemory == math.MaxUint64 {
// fallback solution to set cgroup's max memory to "max"
memoryCgroupCleanUp = func() {
err = os.WriteFile(path.Join(defaultCgroup2Path, cgroupPath, "memory.max"), []byte("max"), 0o600)
assert.NoError(t, err)
}
}

initialCpuQuota, initialCpuPeriod, err := cgroupMaxCpu(cgroupPath)
require.NoError(t, err)
cpuCgroupCleanUp := func() {
fmt.Println(initialCpuQuota)
err = manager.Update(&cgroup2.Resources{
CPU: &cgroup2.CPU{
Max: cgroup2.NewCPUMax(pointerInt64(initialCpuQuota), pointerUint64(initialCpuPeriod)),
},
})
assert.NoError(t, err)
}

if initialCpuQuota == math.MaxInt64 {
// fallback solution to set cgroup's max cpu to "max"
cpuCgroupCleanUp = func() {
err = os.WriteFile(path.Join(defaultCgroup2Path, cgroupPath, "cpu.max"), []byte("max"), 0o600)
assert.NoError(t, err)
}
}

initialGoMem := debug.SetMemoryLimit(-1)
initialGoProcs := runtime.GOMAXPROCS(-1)

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
// restore startup cgroup initial resource values
t.Cleanup(func() {
debug.SetMemoryLimit(initialGoMem)
runtime.GOMAXPROCS(initialGoProcs)
memoryCgroupCleanUp()
cpuCgroupCleanUp()
})

err = manager.Update(&cgroup2.Resources{
Memory: &cgroup2.Memory{
// Default max memory must be
// overwritten
// to automemlimit change the GOMEMLIMIT
// value
Max: pointerInt64(test.cgroupMaxMemory),
},
CPU: &cgroup2.CPU{
Max: cgroup2.NewCPUMax(test.cgroupCpuQuota, pointerUint64(test.cgroupCpuPeriod)),
},
})
require.NoError(t, err)

factory := NewFactory()
ctx := context.Background()
extension, err := factory.Create(ctx, extensiontest.NewNopSettings(), test.config)
require.NoError(t, err)

err = extension.Start(ctx, componenttest.NewNopHost())
require.NoError(t, err)

assert.Equal(t, test.expectedGoMaxProcs, runtime.GOMAXPROCS(-1))
assert.Equal(t, test.expectedGoMemLimit, debug.SetMemoryLimit(-1))
})
}
}

0 comments on commit 7b1fce7

Please sign in to comment.