Skip to content

Commit

Permalink
Migrating s3 exporter to use aws sdk v2
Browse files Browse the repository at this point in the history
This change migrates the exporter to use sdk v2 while also providing
some code improvements.
  • Loading branch information
MovieStoreGuy committed Dec 6, 2024
1 parent a490083 commit 18cb087
Show file tree
Hide file tree
Showing 12 changed files with 723 additions and 278 deletions.
32 changes: 22 additions & 10 deletions exporter/awss3exporter/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,28 @@ import (
// S3UploaderConfig contains aws s3 uploader related config to controls things
// like bucket, prefix, batching, connections, retries, etc.
type S3UploaderConfig struct {
Region string `mapstructure:"region"`
S3Bucket string `mapstructure:"s3_bucket"`
S3Prefix string `mapstructure:"s3_prefix"`
S3Partition string `mapstructure:"s3_partition"`
FilePrefix string `mapstructure:"file_prefix"`
Endpoint string `mapstructure:"endpoint"`
RoleArn string `mapstructure:"role_arn"`
S3ForcePathStyle bool `mapstructure:"s3_force_path_style"`
DisableSSL bool `mapstructure:"disable_ssl"`
Compression configcompression.Type `mapstructure:"compression"`
Region string `mapstructure:"region"`
// S3Bucket is the bucket name to be uploaded to.
S3Bucket string `mapstructure:"s3_bucket"`
// S3Prefix is the key (directory) prefix to written to inside the bucket
S3Prefix string `mapstructure:"s3_prefix"`
// S3Partition is used to provide the rollup on how data is written.
// Valid values are: [hour,minute]
S3Partition string `mapstructure:"s3_partition"`
// FilePrefix is the filename prefix used for the file to avoid any potential collisions.
FilePrefix string `mapstructure:"file_prefix"`
// Endpoint is the URL used for communicated with S3.
Endpoint string `mapstructure:"endpoint"`
// RoleArn is the role policy to use when interacting with S3
RoleArn string `mapstructure:"role_arn"`
// S3ForcePathStyle sets the value for force path style.
S3ForcePathStyle bool `mapstructure:"s3_force_path_style"`
// DisableSLL forces communication to happen via HTTP instead of HTTPS.
DisableSSL bool `mapstructure:"disable_ssl"`
// Compression sets the algorithm used to process the payload
// before uploading to S3.
// Valid values are: `gzip` or no value set.
Compression configcompression.Type `mapstructure:"compression"`
}

type MarshalerType string
Expand Down
24 changes: 17 additions & 7 deletions exporter/awss3exporter/exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"context"
"fmt"

"github.com/open-telemetry/opentelemetry-collector-contrib/exporter/awss3exporter/internal/upload"
"go.opentelemetry.io/collector/component"
"go.opentelemetry.io/collector/consumer"
"go.opentelemetry.io/collector/exporter"
Expand All @@ -18,23 +19,26 @@ import (

type s3Exporter struct {
config *Config
dataWriter dataWriter
signalType string
uploader upload.Manager
logger *zap.Logger
marshaler marshaler
}

func newS3Exporter(config *Config,
func newS3Exporter(
config *Config,
signalType string,
params exporter.Settings,
) *s3Exporter {
s3Exporter := &s3Exporter{
config: config,
dataWriter: &s3Writer{},
signalType: signalType,
logger: params.Logger,
}
return s3Exporter
}

func (e *s3Exporter) start(_ context.Context, host component.Host) error {
func (e *s3Exporter) start(ctx context.Context, host component.Host) error {
var m marshaler
var err error
if e.config.Encoding != nil {
Expand All @@ -48,6 +52,12 @@ func (e *s3Exporter) start(_ context.Context, host component.Host) error {
}

e.marshaler = m

up, err := newUploadManager(ctx, e.config, e.signalType, m.format())
if err != nil {
return err
}
e.uploader = up
return nil
}

Expand All @@ -61,7 +71,7 @@ func (e *s3Exporter) ConsumeMetrics(ctx context.Context, md pmetric.Metrics) err
return err
}

return e.dataWriter.writeBuffer(ctx, buf, e.config, "metrics", e.marshaler.format())
return e.uploader.Upload(ctx, buf)
}

func (e *s3Exporter) ConsumeLogs(ctx context.Context, logs plog.Logs) error {
Expand All @@ -70,7 +80,7 @@ func (e *s3Exporter) ConsumeLogs(ctx context.Context, logs plog.Logs) error {
return err
}

return e.dataWriter.writeBuffer(ctx, buf, e.config, "logs", e.marshaler.format())
return e.uploader.Upload(ctx, buf)
}

func (e *s3Exporter) ConsumeTraces(ctx context.Context, traces ptrace.Traces) error {
Expand All @@ -79,5 +89,5 @@ func (e *s3Exporter) ConsumeTraces(ctx context.Context, traces ptrace.Traces) er
return err
}

return e.dataWriter.writeBuffer(ctx, buf, e.config, "traces", e.marshaler.format())
return e.uploader.Upload(ctx, buf)
}
10 changes: 5 additions & 5 deletions exporter/awss3exporter/exporter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ type TestWriter struct {
t *testing.T
}

func (testWriter *TestWriter) writeBuffer(_ context.Context, buf []byte, _ *Config, _ string, _ string) error {
func (testWriter *TestWriter) Upload(_ context.Context, buf []byte) error {
assert.Equal(testWriter.t, testLogs, buf)
return nil
}
Expand All @@ -33,10 +33,10 @@ func getTestLogs(tb testing.TB) plog.Logs {
func getLogExporter(t *testing.T) *s3Exporter {
marshaler, _ := newMarshaler("otlp_json", zap.NewNop())
exporter := &s3Exporter{
config: createDefaultConfig().(*Config),
dataWriter: &TestWriter{t},
logger: zap.NewNop(),
marshaler: marshaler,
config: createDefaultConfig().(*Config),
uploader: &TestWriter{t},
logger: zap.NewNop(),
marshaler: marshaler,
}
return exporter
}
Expand Down
6 changes: 3 additions & 3 deletions exporter/awss3exporter/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ func createLogsExporter(ctx context.Context,
params exporter.Settings,
config component.Config,
) (exporter.Logs, error) {
s3Exporter := newS3Exporter(config.(*Config), params)
s3Exporter := newS3Exporter(config.(*Config), "logs", params)

return exporterhelper.NewLogs(ctx, params,
config,
Expand All @@ -51,7 +51,7 @@ func createMetricsExporter(ctx context.Context,
params exporter.Settings,
config component.Config,
) (exporter.Metrics, error) {
s3Exporter := newS3Exporter(config.(*Config), params)
s3Exporter := newS3Exporter(config.(*Config), "metrics", params)

if config.(*Config).MarshalerName == SumoIC {
return nil, fmt.Errorf("metrics are not supported by sumo_ic output format")
Expand All @@ -67,7 +67,7 @@ func createTracesExporter(ctx context.Context,
params exporter.Settings,
config component.Config,
) (exporter.Traces, error) {
s3Exporter := newS3Exporter(config.(*Config), params)
s3Exporter := newS3Exporter(config.(*Config), "traces", params)

if config.(*Config).MarshalerName == SumoIC {
return nil, fmt.Errorf("traces are not supported by sumo_ic output format")
Expand Down
22 changes: 20 additions & 2 deletions exporter/awss3exporter/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,14 @@ module github.com/open-telemetry/opentelemetry-collector-contrib/exporter/awss3e
go 1.22.0

require (
github.com/aws/aws-sdk-go v1.55.5
github.com/aws/aws-sdk-go-v2 v1.32.6
github.com/aws/aws-sdk-go-v2/config v1.28.6
github.com/aws/aws-sdk-go-v2/credentials v1.17.47
github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.43
github.com/aws/aws-sdk-go-v2/service/s3 v1.71.0
github.com/aws/aws-sdk-go-v2/service/sts v1.33.2
github.com/stretchr/testify v1.10.0
github.com/tilinna/clock v1.1.0
go.opentelemetry.io/collector/component v0.115.0
go.opentelemetry.io/collector/component/componenttest v0.115.0
go.opentelemetry.io/collector/config/configcompression v1.21.0
Expand All @@ -20,6 +26,19 @@ require (
)

require (
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.7 // indirect
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.21 // indirect
github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.25 // indirect
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.25 // indirect
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.1 // indirect
github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.25 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.1 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.4.6 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.6 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.6 // indirect
github.com/aws/aws-sdk-go-v2/service/sso v1.24.7 // indirect
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.6 // indirect
github.com/aws/smithy-go v1.22.1 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
Expand All @@ -34,7 +53,6 @@ require (
github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0 // indirect
github.com/hashicorp/go-version v1.7.0 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/jmespath/go-jmespath v0.4.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/compress v1.17.11 // indirect
github.com/knadh/koanf/maps v0.1.1 // indirect
Expand Down
49 changes: 40 additions & 9 deletions exporter/awss3exporter/go.sum

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

95 changes: 95 additions & 0 deletions exporter/awss3exporter/internal/upload/partition.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

package upload // import "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/awss3exporter/internal/upload"

import (
"fmt"
"math/rand/v2"
"strconv"
"time"

"go.opentelemetry.io/collector/config/configcompression"
)

var (
compressionFileExtensions = map[configcompression.Type]string{
configcompression.TypeGzip: ".gz",
}
)

type PartitionKeyBuilder struct {
// PartitionPrefix defines the S3 directory (key)
// prefix used to write the file
PartitionPrefix string
// PartitionTruncation is used to truncate values into
// different time buckets.
// Currently hourly or minutely is supported
PartitionTruncation string
// FilePrefix is used to define the prefix of the file written
// to the directory in S3.
FilePrefix string
// FileFormat defines what encoding was used to write
// the content to s3
FileFormat string
// Metadata provides additional details regarding the file
// Expected to be one of "metrics", "traces", or "logs"
Metadata string
// Compression defines algorithm used on the
// body before uploaded.
Compression configcompression.Type
// UniqueKeyFunc allows for overwritting the default behaviour of
// generating a new unique string to avoid collosions on file upload
// across many different instances.
//
// TODO: Expose the ability to config additional UniqueKeyField via config
UniqueKeyFunc func() string
}

func (pki *PartitionKeyBuilder) Build(ts time.Time) string {
return pki.bucketKeyPrefix(ts) + "/" + pki.fileName()
}

func (pki *PartitionKeyBuilder) bucketKeyPrefix(ts time.Time) string {
key := fmt.Sprintf("year=%d/month=%02d/day=%02d/hour=%02d", ts.Year(), ts.Month(), ts.Day(), ts.Hour())

switch pki.PartitionTruncation {
case "minute":
key += "/" + fmt.Sprintf("minute=%02d", ts.Minute())
default:
// Nothing to do, key defaults to hourly
}

return pki.PartitionPrefix + "/" + key
}

func (pki *PartitionKeyBuilder) fileName() string {
var (
suffix string
)

if pki.FileFormat != "" {
suffix = "." + pki.FileFormat
}

if ext, ok := compressionFileExtensions[pki.Compression]; ok {
suffix += ext
}

return pki.FilePrefix + pki.Metadata + "_" + pki.uniqueKey() + suffix
}

func (pki *PartitionKeyBuilder) uniqueKey() string {
if pki.UniqueKeyFunc != nil {
return pki.UniqueKeyFunc()
}

// This follows the original "uniqueness" algorithm
// to avoid collosions on file uploads across different nodes.
const (
uniqueValues = 999999999
minOffset = 100000000
)

return strconv.Itoa(minOffset + rand.IntN(uniqueValues-minOffset))
}
Loading

0 comments on commit 18cb087

Please sign in to comment.