diff --git a/cmd/nfd-worker/main.go b/cmd/nfd-worker/main.go index fe589f5461..fc09d81a93 100644 --- a/cmd/nfd-worker/main.go +++ b/cmd/nfd-worker/main.go @@ -59,9 +59,6 @@ func main() { klog.InfoS("version not set! Set -ldflags \"-X sigs.k8s.io/node-feature-discovery/pkg/version.version=`git describe --tags --dirty --always`\" during build or run.") } - // Plug klog into grpc logging infrastructure - utils.ConfigureGrpcKlog() - // Get new NfdWorker instance instance, err := worker.NewNfdWorker(worker.WithArgs(args)) if err != nil { @@ -109,10 +106,8 @@ func initFlags(flagset *flag.FlagSet) (*worker.Args, *worker.ConfigOverrideArgs) "Kubeconfig to use") flagset.BoolVar(&args.Oneshot, "oneshot", false, "Do not publish feature labels") - flagset.IntVar(&args.MetricsPort, "metrics", 8081, - "Port on which to expose metrics.") - flagset.IntVar(&args.GrpcHealthPort, "grpc-health", 8082, - "Port on which to expose the grpc health endpoint.") + flagset.IntVar(&args.Port, "port", 8080, + "Port on which to metrics and healthz endpoints are served") flagset.StringVar(&args.Options, "options", "", "Specify config options from command line. Config options are specified "+ "in the same format as in the config file (i.e. json or yaml). These options") diff --git a/deployment/base/worker-daemonset/worker-daemonset.yaml b/deployment/base/worker-daemonset/worker-daemonset.yaml index 955157877c..8bdc062d99 100644 --- a/deployment/base/worker-daemonset/worker-daemonset.yaml +++ b/deployment/base/worker-daemonset/worker-daemonset.yaml @@ -20,13 +20,15 @@ spec: image: gcr.io/k8s-staging-nfd/node-feature-discovery:master imagePullPolicy: Always livenessProbe: - grpc: - port: 8082 + httpGet: + path: /healthz + port: http initialDelaySeconds: 10 periodSeconds: 10 readinessProbe: - grpc: - port: 8082 + httpGet: + path: /healthz + port: http initialDelaySeconds: 5 periodSeconds: 10 failureThreshold: 10 @@ -42,5 +44,5 @@ spec: args: - "-server=nfd-master:8080" ports: - - name: metrics - containerPort: 8081 + - name: http + containerPort: 8080 diff --git a/deployment/helm/node-feature-discovery/templates/worker.yaml b/deployment/helm/node-feature-discovery/templates/worker.yaml index fbbc741e50..526841aabc 100644 --- a/deployment/helm/node-feature-discovery/templates/worker.yaml +++ b/deployment/helm/node-feature-discovery/templates/worker.yaml @@ -47,8 +47,9 @@ spec: image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" imagePullPolicy: {{ .Values.image.pullPolicy }} livenessProbe: - grpc: - port: {{ .Values.worker.healthPort | default "8082" }} + httpGet: + path: /healthz + port: http {{- with .Values.worker.livenessProbe.initialDelaySeconds }} initialDelaySeconds: {{ . }} {{- end }} @@ -62,8 +63,9 @@ spec: timeoutSeconds: {{ . }} {{- end }} readinessProbe: - grpc: - port: {{ .Values.worker.healthPort | default "8082" }} + httpGet: + path: /healthz + port: http {{- with .Values.worker.readinessProbe.initialDelaySeconds }} initialDelaySeconds: {{ . }} {{- end }} @@ -107,16 +109,13 @@ spec: {{- range $key, $value := .Values.featureGates }} - "-feature-gates={{ $key }}={{ $value }}" {{- end }} - - "-metrics={{ .Values.worker.metricsPort | default "8081"}}" - - "-grpc-health={{ .Values.worker.healthPort | default "8082" }}" + - "-port={{ .Values.worker.port | default "8080"}}" {{- with .Values.gc.extraArgs }} {{- toYaml . | nindent 8 }} {{- end }} ports: - - containerPort: {{ .Values.worker.metricsPort | default "8081"}} - name: metrics - - containerPort: {{ .Values.worker.healthPort | default "8082" }} - name: health + - containerPort: {{ .Values.worker.port | default "8080"}} + name: http volumeMounts: - name: host-boot mountPath: "/host-boot" diff --git a/deployment/helm/node-feature-discovery/values.yaml b/deployment/helm/node-feature-discovery/values.yaml index 5ff0df68b5..27c3e9ef21 100644 --- a/deployment/helm/node-feature-discovery/values.yaml +++ b/deployment/helm/node-feature-discovery/values.yaml @@ -419,8 +419,7 @@ worker: # matchName: {op: In, value: ["SWAP", "X86", "ARM"]} ### - metricsPort: 8081 - healthPort: 8082 + port: 8080 daemonsetAnnotations: {} podSecurityContext: {} # fsGroup: 2000 @@ -434,15 +433,11 @@ worker: # runAsUser: 1000 livenessProbe: - grpc: - port: 8082 initialDelaySeconds: 10 # failureThreshold: 3 # periodSeconds: 10 # timeoutSeconds: 1 readinessProbe: - grpc: - port: 8082 initialDelaySeconds: 5 failureThreshold: 10 # periodSeconds: 10 diff --git a/docs/deployment/helm.md b/docs/deployment/helm.md index a469c41b11..972f0c1592 100644 --- a/docs/deployment/helm.md +++ b/docs/deployment/helm.md @@ -230,8 +230,7 @@ API's you need to install the prometheus operator in your cluster. | `worker.*` | dict | | NFD worker daemonset configuration | | `worker.enable` | bool | true | Specifies whether nfd-worker should be deployed | | `worker.hostNetwork` | bool | false | Specifies whether to enable or disable running the container in the host's network namespace | -| `worker.metricsPort` | int | 8081 | Port on which to expose metrics from components to prometheus operator | -| `worker.healthPort` | int | 8082 | Port on which to expose the grpc health endpoint, will be also used for the probes | +| `worker.port` | int | 8080 | Port on which to serve http for metrics and healthz endpoints. | | `worker.config` | dict | | NFD worker [configuration](../reference/worker-configuration-reference) | | `worker.podSecurityContext` | dict | {} | [PodSecurityContext](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod) holds pod-level security attributes and common container settins | | `worker.securityContext` | dict | {} | Container [security settings](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container) | diff --git a/pkg/nfd-worker/nfd-worker.go b/pkg/nfd-worker/nfd-worker.go index 0c65a59f26..6599c85f83 100644 --- a/pkg/nfd-worker/nfd-worker.go +++ b/pkg/nfd-worker/nfd-worker.go @@ -19,7 +19,7 @@ package nfdworker import ( "encoding/json" "fmt" - "net" + "net/http" "os" "path/filepath" "regexp" @@ -27,11 +27,10 @@ import ( "strings" "time" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" "golang.org/x/exp/maps" "golang.org/x/net/context" - "google.golang.org/grpc" - "google.golang.org/grpc/health" - "google.golang.org/grpc/health/grpc_health_v1" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/validation" @@ -91,13 +90,12 @@ type Labels map[string]string // Args are the command line arguments of NfdWorker. type Args struct { - ConfigFile string - Klog map[string]*utils.KlogFlagVal - Kubeconfig string - Oneshot bool - Options string - MetricsPort int - GrpcHealthPort int + ConfigFile string + Klog map[string]*utils.KlogFlagVal + Kubeconfig string + Oneshot bool + Options string + Port int Overrides ConfigOverrideArgs } @@ -115,7 +113,6 @@ type nfdWorker struct { configFilePath string config *NFDConfig kubernetesNamespace string - healthServer *grpc.Server k8sClient k8sclient.Interface nfdClient nfdclient.Interface stop chan struct{} // channel for signaling stop @@ -203,6 +200,10 @@ func newDefaultConfig() *NFDConfig { } } +func (w *nfdWorker) Healthz(writer http.ResponseWriter, _ *http.Request) { + writer.WriteHeader(http.StatusOK) +} + func (i *infiniteTicker) Reset(d time.Duration) { switch { case d > 0: @@ -214,29 +215,6 @@ func (i *infiniteTicker) Reset(d time.Duration) { } } -func (w *nfdWorker) startGrpcHealthServer(errChan chan<- error) error { - lis, err := net.Listen("tcp", fmt.Sprintf(":%d", w.args.GrpcHealthPort)) - if err != nil { - return fmt.Errorf("failed to listen: %w", err) - } - - s := grpc.NewServer() - grpc_health_v1.RegisterHealthServer(s, health.NewServer()) - klog.InfoS("gRPC health server serving", "port", w.args.GrpcHealthPort) - - go func() { - defer func() { - lis.Close() - }() - if err := s.Serve(lis); err != nil { - errChan <- fmt.Errorf("gRPC health server exited with an error: %w", err) - } - klog.InfoS("gRPC health server stopped") - }() - w.healthServer = s - return nil -} - // Run feature discovery. func (w *nfdWorker) runFeatureDiscovery() error { discoveryStart := time.Now() @@ -312,15 +290,13 @@ func (w *nfdWorker) Run() error { w.ownerReference = ownerReference + httpMux := http.NewServeMux() + // Register to metrics server - if w.args.MetricsPort > 0 { - m := utils.CreateMetricsServer(w.args.MetricsPort, - buildInfo, - featureDiscoveryDuration) - go m.Run() - registerVersion(version.Get()) - defer m.Stop() - } + promRegistry := prometheus.NewRegistry() + promRegistry.MustRegister(buildInfo, featureDiscoveryDuration) + httpMux.Handle("/metrics", promhttp.HandlerFor(promRegistry, promhttp.HandlerOpts{})) + registerVersion(version.Get()) err = w.runFeatureDiscovery() if err != nil { @@ -332,20 +308,19 @@ func (w *nfdWorker) Run() error { return nil } - grpcErr := make(chan error) + // Register health probe (at this point we're "ready and live") + httpMux.HandleFunc("/healthz", w.Healthz) - // Start gRPC server for liveness probe (at this point we're "live") - if w.args.GrpcHealthPort != 0 { - if err := w.startGrpcHealthServer(grpcErr); err != nil { - return fmt.Errorf("failed to start gRPC health server: %w", err) - } - } + // Start HTTP server + httpServer := http.Server{Addr: fmt.Sprintf(":%d", w.args.Port), Handler: httpMux} + go func() { + klog.InfoS("http server starting", "port", httpServer.Addr) + klog.InfoS("http server stopped", "exitCode", httpServer.ListenAndServe()) + }() + defer httpServer.Close() for { select { - case err := <-grpcErr: - return fmt.Errorf("error in serving gRPC: %w", err) - case <-labelTrigger.C: err = w.runFeatureDiscovery() if err != nil { @@ -354,9 +329,6 @@ func (w *nfdWorker) Run() error { case <-w.stop: klog.InfoS("shutting down nfd-worker") - if w.healthServer != nil { - w.healthServer.GracefulStop() - } return nil } }