Skip to content

Commit

Permalink
✨ Add Namespace Scoped Zone Discovery and Watch (#3146)
Browse files Browse the repository at this point in the history
* Add Namespace Scoped Zone Discovery

- Introduce a feature flag to enable Namespace Scoped Zone.
- Enhance zone discovery to support Namespace Scoped Zones.
- Filter out zones marked for deletion during the discovery process.

* vspherecluster: re-write tests for getFailureDomains

* Add Watch for Zone event to update FailureDomains accordingly

Signed-off-by: Gong Zhang <[email protected]>

---------

Signed-off-by: Gong Zhang <[email protected]>
Co-authored-by: Christian Schlotter <[email protected]>
  • Loading branch information
zhanggbj and chrischdi authored Aug 13, 2024
1 parent c6eff61 commit 4f8d638
Show file tree
Hide file tree
Showing 23 changed files with 848 additions and 1,141 deletions.
1 change: 1 addition & 0 deletions .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ run:
- "zz_generated.*\\.go$"
- "_conversion\\.go$"
- "vendored_cluster_api\\.go$"
- "^internal/apis/topology/v1alpha1"
allow-parallel-runners: true

linters:
Expand Down
2 changes: 1 addition & 1 deletion config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ spec:
- "--diagnostics-address=${CAPI_DIAGNOSTICS_ADDRESS:=:8443}"
- "--insecure-diagnostics=${CAPI_INSECURE_DIAGNOSTICS:=false}"
- --v=4
- "--feature-gates=NodeAntiAffinity=${EXP_NODE_ANTI_AFFINITY:=false}"
- "--feature-gates=NodeAntiAffinity=${EXP_NODE_ANTI_AFFINITY:=false},NamespaceScopedZones=${EXP_NAMESPACE_SCOPED_ZONES:=false}"
image: controller:latest
imagePullPolicy: IfNotPresent
name: manager
Expand Down
8 changes: 8 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,14 @@ rules:
- get
- list
- watch
- apiGroups:
- topology.tanzu.vmware.com
resources:
- zones
verbs:
- get
- list
- watch
- apiGroups:
- vmoperator.vmware.com
resources:
Expand Down
68 changes: 63 additions & 5 deletions controllers/vmware/vspherecluster_reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ import (
"fmt"

"github.com/pkg/errors"
topologyv1 "github.com/vmware-tanzu/vm-operator/external/tanzu-topology/api/v1alpha1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/types"
kerrors "k8s.io/apimachinery/pkg/util/errors"
Expand All @@ -40,6 +39,8 @@ import (
"sigs.k8s.io/controller-runtime/pkg/reconcile"

vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1"
"sigs.k8s.io/cluster-api-provider-vsphere/feature"
topologyv1 "sigs.k8s.io/cluster-api-provider-vsphere/internal/apis/topology/v1alpha1"
"sigs.k8s.io/cluster-api-provider-vsphere/pkg/context/vmware"
"sigs.k8s.io/cluster-api-provider-vsphere/pkg/services"
"sigs.k8s.io/cluster-api-provider-vsphere/pkg/util"
Expand Down Expand Up @@ -160,7 +161,7 @@ func (r *ClusterReconciler) reconcileDelete(clusterCtx *vmware.ClusterContext) {

func (r *ClusterReconciler) reconcileNormal(ctx context.Context, clusterCtx *vmware.ClusterContext) error {
// Get any failure domains to report back to the CAPI core controller.
failureDomains, err := r.getFailureDomains(ctx)
failureDomains, err := r.getFailureDomains(ctx, clusterCtx.VSphereCluster.Namespace)
if err != nil {
return errors.Wrapf(
err,
Expand Down Expand Up @@ -369,9 +370,68 @@ func (r *ClusterReconciler) VSphereMachineToCluster(ctx context.Context, o clien
}}
}

// ZoneToVSphereClusters adds reconcile requests for VSphereClusters when Zone has an event.
func (r *ClusterReconciler) ZoneToVSphereClusters(ctx context.Context, o client.Object) []reconcile.Request {
log := ctrl.LoggerFrom(ctx)

zone, ok := o.(*topologyv1.Zone)
if !ok {
log.Error(nil, fmt.Sprintf("Expected a Zone but got a %T", o))
return nil
}
log = log.WithValues("Zone", klog.KObj(zone))
ctx = ctrl.LoggerInto(ctx, log)

vsphereClusters := &vmwarev1.VSphereClusterList{}
err := r.Client.List(ctx, vsphereClusters, &client.ListOptions{Namespace: zone.Namespace})
if err != nil {
log.V(4).Error(err, "Failed to get VSphereClusters from Zone")
return nil
}

log.V(6).Info("Triggering VSphereCluster reconcile for Zone")
requests := []reconcile.Request{}
for _, c := range vsphereClusters.Items {
r := reconcile.Request{
NamespacedName: types.NamespacedName{
Name: c.Name,
Namespace: c.Namespace,
},
}
requests = append(requests, r)
}

return requests
}

// Returns the failure domain information discovered on the cluster
// hosting this controller.
func (r *ClusterReconciler) getFailureDomains(ctx context.Context) (clusterv1.FailureDomains, error) {
func (r *ClusterReconciler) getFailureDomains(ctx context.Context, namespace string) (clusterv1.FailureDomains, error) {
failureDomains := clusterv1.FailureDomains{}
// Determine the source of failure domain based on feature gates NamespaceScopedZones.
// If NamespaceScopedZones is enabled, use Zone which is Namespace scoped,otherwise use
// Availability Zone which is Cluster scoped.
if feature.Gates.Enabled(feature.NamespaceScopedZones) {
zoneList := &topologyv1.ZoneList{}
listOptions := &client.ListOptions{Namespace: namespace}
if err := r.Client.List(ctx, zoneList, listOptions); err != nil {
return nil, errors.Wrapf(err, "failed to list Zones in namespace %s", namespace)
}

for _, zone := range zoneList.Items {
// Skip zones which are in deletion
if !zone.DeletionTimestamp.IsZero() {
continue
}
failureDomains[zone.Name] = clusterv1.FailureDomainSpec{ControlPlane: true}
}

if len(failureDomains) == 0 {
return nil, nil
}

return failureDomains, nil
}
availabilityZoneList := &topologyv1.AvailabilityZoneList{}
if err := r.Client.List(ctx, availabilityZoneList); err != nil {
return nil, err
Expand All @@ -380,8 +440,6 @@ func (r *ClusterReconciler) getFailureDomains(ctx context.Context) (clusterv1.Fa
if len(availabilityZoneList.Items) == 0 {
return nil, nil
}

failureDomains := clusterv1.FailureDomains{}
for _, az := range availabilityZoneList.Items {
failureDomains[az.Name] = clusterv1.FailureDomainSpec{
ControlPlane: true,
Expand Down
179 changes: 151 additions & 28 deletions controllers/vmware/vspherecluster_reconciler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,22 +17,30 @@ limitations under the License.
package vmware

import (
"context"
"os"
"path/filepath"
"reflect"
"testing"

. "github.com/onsi/ginkgo/v2"
"github.com/onsi/ginkgo/v2/types"
. "github.com/onsi/gomega"
topologyv1 "github.com/vmware-tanzu/vm-operator/external/tanzu-topology/api/v1alpha1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
apirecord "k8s.io/client-go/tools/record"
utilfeature "k8s.io/component-base/featuregate/testing"
"k8s.io/utils/ptr"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
"sigs.k8s.io/cluster-api/util/conditions"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"

vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1"
"sigs.k8s.io/cluster-api-provider-vsphere/feature"
topologyv1 "sigs.k8s.io/cluster-api-provider-vsphere/internal/apis/topology/v1alpha1"
capvcontext "sigs.k8s.io/cluster-api-provider-vsphere/pkg/context"
"sigs.k8s.io/cluster-api-provider-vsphere/pkg/context/vmware"
"sigs.k8s.io/cluster-api-provider-vsphere/pkg/services/network"
Expand Down Expand Up @@ -128,34 +136,149 @@ var _ = Describe("Cluster Controller Tests", func() {
Expect(c.Reason).NotTo(Equal(clusterv1.DeletingReason))
})
})
})

Context("Test getFailureDomains", func() {
It("should not find FailureDomains", func() {
fds, err := reconciler.getFailureDomains(ctx)
Expect(err).ToNot(HaveOccurred())
Expect(fds).Should(BeEmpty())
})
func TestClusterReconciler_getFailureDomains(t *testing.T) {
g := NewWithT(t)
ctx := context.Background()

It("should find FailureDomains", func() {
zoneNames := []string{"homer", "marge", "bart"}
for _, name := range zoneNames {
zone := &topologyv1.AvailabilityZone{
TypeMeta: metav1.TypeMeta{
APIVersion: topologyv1.GroupVersion.String(),
Kind: "AvailabilityZone",
},
ObjectMeta: metav1.ObjectMeta{
Name: name,
},
}

Expect(controllerManagerContext.Client.Create(ctx, zone)).To(Succeed())
}
scheme := runtime.NewScheme()
g.Expect(corev1.AddToScheme(scheme)).To(Succeed())
g.Expect(topologyv1.AddToScheme(scheme)).To(Succeed())

fds, err := reconciler.getFailureDomains(ctx)
Expect(err).ToNot(HaveOccurred())
Expect(fds).NotTo(BeNil())
Expect(fds).Should(HaveLen(3))
namespace := &corev1.Namespace{
ObjectMeta: metav1.ObjectMeta{
Name: "test-namespace",
},
}

tests := []struct {
name string
objects []client.Object
want clusterv1.FailureDomains
wantErr bool
featureGate bool
}{
{
name: "Cluster-Wide: should not find any FailureDomains if no exists",
objects: []client.Object{},
want: nil,
wantErr: false,
featureGate: false,
},
{
name: "Namespaced: should not find any FailureDomains if no exists",
objects: []client.Object{},
want: nil,
wantErr: false,
featureGate: true,
},
{
name: "Cluster-Wide: should not find any FailureDomains if only namespaced exist",
objects: []client.Object{zone(namespace.Name, "ns-one", false)},
want: nil,
wantErr: false,
featureGate: false,
},
{
name: "Namespaced: should not find any FailureDomains if only cluster-wide exist",
objects: []client.Object{availabilityZone("c-one")},
want: nil,
wantErr: false,
featureGate: true,
},
{
name: "Cluster-Wide: should find FailureDomains if only cluster-wide exist",
objects: []client.Object{availabilityZone("c-one")},
want: failureDomains("c-one"),
wantErr: false,
featureGate: false,
},
{
name: "Namespaced: should find FailureDomains if only namespaced exist",
objects: []client.Object{zone(namespace.Name, "ns-one", false)},
want: failureDomains("ns-one"),
wantErr: false,
featureGate: true,
},
{
name: "Cluster-Wide: should only find cluster-wide FailureDomains if both types exist",
objects: []client.Object{availabilityZone("c-one"), zone(namespace.Name, "ns-one", false)},
want: failureDomains("c-one"),
wantErr: false,
featureGate: false,
},
{
name: "Namespaced: should only find namespaced FailureDomains if both types exist",
objects: []client.Object{availabilityZone("c-one"), zone(namespace.Name, "ns-one", false)},
want: failureDomains("ns-one"),
wantErr: false,
featureGate: true,
},
{
name: "Namespaced: should only find non-deleting namespaced FailureDomains",
objects: []client.Object{
availabilityZone("c-one"),
zone(namespace.Name, "ns-one", false),
zone(namespace.Name, "ns-two", false),
zone(namespace.Name, "ns-three", false),
zone(namespace.Name, "ns-four", true),
},
want: failureDomains("ns-one", "ns-two", "ns-three"),
wantErr: false,
featureGate: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
r := &ClusterReconciler{
Client: fake.NewClientBuilder().
WithScheme(scheme).
WithObjects(append([]client.Object{namespace}, tt.objects...)...).
Build(),
}
defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.NamespaceScopedZones, tt.featureGate)()
got, err := r.getFailureDomains(ctx, namespace.Name)
if (err != nil) != tt.wantErr {
t.Errorf("ClusterReconciler.getFailureDomains() error = %v, wantErr %v", err, tt.wantErr)
return
}
if !reflect.DeepEqual(got, tt.want) {
t.Errorf("ClusterReconciler.getFailureDomains() = %v, want %v", got, tt.want)
}
})
})
})
}
}

func availabilityZone(name string) *topologyv1.AvailabilityZone {
return &topologyv1.AvailabilityZone{
ObjectMeta: metav1.ObjectMeta{
Name: name,
},
}
}

func zone(namespace, name string, deleting bool) *topologyv1.Zone {
z := &topologyv1.Zone{
ObjectMeta: metav1.ObjectMeta{
Namespace: namespace,
Name: name,
},
}

if deleting {
z.ObjectMeta.DeletionTimestamp = ptr.To(metav1.Now())
z.ObjectMeta.Finalizers = []string{"deletion.test.io/protection"}
}
return z
}

func failureDomains(names ...string) clusterv1.FailureDomains {
fds := clusterv1.FailureDomains{}
for _, name := range names {
fds[name] = clusterv1.FailureDomainSpec{
ControlPlane: true,
}
}
return fds
}
17 changes: 14 additions & 3 deletions controllers/vspherecluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import (
vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1"
"sigs.k8s.io/cluster-api-provider-vsphere/controllers/vmware"
"sigs.k8s.io/cluster-api-provider-vsphere/feature"
topologyv1 "sigs.k8s.io/cluster-api-provider-vsphere/internal/apis/topology/v1alpha1"
capvcontext "sigs.k8s.io/cluster-api-provider-vsphere/pkg/context"
inframanager "sigs.k8s.io/cluster-api-provider-vsphere/pkg/manager"
"sigs.k8s.io/cluster-api-provider-vsphere/pkg/services"
Expand All @@ -52,6 +53,7 @@ import (
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters;clusters/status,verbs=get;list;watch
// +kubebuilder:rbac:groups=topology.tanzu.vmware.com,resources=availabilityzones,verbs=get;list;watch
// +kubebuilder:rbac:groups=topology.tanzu.vmware.com,resources=availabilityzones/status,verbs=get;list;watch
// +kubebuilder:rbac:groups=topology.tanzu.vmware.com,resources=zones,verbs=get;list;watch

// AddClusterControllerToManager adds the cluster controller to the provided
// manager.
Expand All @@ -72,15 +74,24 @@ func AddClusterControllerToManager(ctx context.Context, controllerManagerCtx *ca
},
NetworkProvider: networkProvider,
}
return ctrl.NewControllerManagedBy(mgr).
builder := ctrl.NewControllerManagedBy(mgr).
For(&vmwarev1.VSphereCluster{}).
WithOptions(options).
Watches(
&vmwarev1.VSphereMachine{},
handler.EnqueueRequestsFromMapFunc(reconciler.VSphereMachineToCluster),
).
WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(ctrl.LoggerFrom(ctx), controllerManagerCtx.WatchFilterValue)).
Complete(reconciler)
WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(ctrl.LoggerFrom(ctx), controllerManagerCtx.WatchFilterValue))

// Conditionally add a Watch for topologyv1.Zone when the feature gate is enabled
if feature.Gates.Enabled(feature.NamespaceScopedZones) {
builder = builder.Watches(
&topologyv1.Zone{},
handler.EnqueueRequestsFromMapFunc(reconciler.ZoneToVSphereClusters),
)
}

return builder.Complete(reconciler)
}

reconciler := &clusterReconciler{
Expand Down
Loading

0 comments on commit 4f8d638

Please sign in to comment.