Skip to content

Commit b1933f7

Browse files
committed
Use a single cache for all dynamic controllers
Crossplane uses a controller engine to dynamically start claim and XR controllers when a new XRD is installed. Before this commit, each controller gets at least one cache. This is because when I built this functionality, you couldn't stop a single informer within a cache (a cache is basically a map of informers by GVK). When realtime composition is enabled, there are even more caches. One per composed resource GVK. A GVK routed cache routes cache lookups to these various delegate caches. Meanwhile, controller-runtime recently made it possible to stop an informer within a cache. It's also been possible to remove an event handler from an informer for some time (since Kubernetes 1.26). kubernetes-sigs/controller-runtime#2285 kubernetes-sigs/controller-runtime#2046 This commit uses a single client, backed by a single cache, across all dynamic controllers (specifically the definition, offered, claim, and XR controllers). Compared to the current implementation, this commit: * Takes fewer global locks when realtime compositions are enabled. Locking is now mostly at the controller scope. * Works with the breaking changes to source.Source introduced in controller-runtime v0.18. :) I think this makes the realtime composition code a little easier to follow by consolodating it into the ControllerEngine, but that's pretty subjective. Signed-off-by: Nic Cope <[email protected]>
1 parent cfe26aa commit b1933f7

File tree

21 files changed

+2340
-2669
lines changed

21 files changed

+2340
-2669
lines changed

cmd/crossplane/core/core.go

Lines changed: 86 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"context"
2222
"crypto/tls"
2323
"fmt"
24+
"io"
2425
"os"
2526
"path/filepath"
2627
"time"
@@ -30,6 +31,7 @@ import (
3031
corev1 "k8s.io/api/core/v1"
3132
"k8s.io/apimachinery/pkg/runtime"
3233
"k8s.io/client-go/rest"
34+
kcache "k8s.io/client-go/tools/cache"
3335
"k8s.io/client-go/tools/leaderelection/resourcelock"
3436
"k8s.io/client-go/tools/record"
3537
ctrl "sigs.k8s.io/controller-runtime"
@@ -47,6 +49,7 @@ import (
4749

4850
"github.com/crossplane/crossplane/internal/controller/apiextensions"
4951
apiextensionscontroller "github.com/crossplane/crossplane/internal/controller/apiextensions/controller"
52+
"github.com/crossplane/crossplane/internal/controller/engine"
5053
"github.com/crossplane/crossplane/internal/controller/pkg"
5154
pkgcontroller "github.com/crossplane/crossplane/internal/controller/pkg/controller"
5255
"github.com/crossplane/crossplane/internal/features"
@@ -134,6 +137,8 @@ func (c *startCommand) Run(s *runtime.Scheme, log logging.Logger) error { //noli
134137
Deduplicate: true,
135138
})
136139

140+
// The claim and XR controllers don't use the manager's cache or client.
141+
// They use their own. They're setup later in this method.
137142
eb := record.NewBroadcaster()
138143
mgr, err := ctrl.NewManager(ratelimiter.LimitRESTConfig(cfg, c.MaxReconcileRate), ctrl.Options{
139144
Scheme: s,
@@ -270,9 +275,88 @@ func (c *startCommand) Run(s *runtime.Scheme, log logging.Logger) error { //noli
270275
log.Info("Alpha feature enabled", "flag", features.EnableAlphaClaimSSA)
271276
}
272277

278+
// Claim and XR controllers are started and stopped dynamically by the
279+
// ControllerEngine below. When realtime compositions are enabled, they also
280+
// start and stop their watches (e.g. of composed resources) dynamically. To
281+
// do this, the ControllerEngine must have exclusive ownership of a cache.
282+
// This allows it to track what controllers are using the cache's informers.
283+
ca, err := cache.New(mgr.GetConfig(), cache.Options{
284+
HTTPClient: mgr.GetHTTPClient(),
285+
Scheme: mgr.GetScheme(),
286+
Mapper: mgr.GetRESTMapper(),
287+
SyncPeriod: &c.SyncInterval,
288+
289+
// When a CRD is deleted, any informers for its GVKs will start trying
290+
// to restart their watches, and fail with scary errors. This should
291+
// only happen when realtime composition is enabled, and we should GC
292+
// the informer within 60 seconds. This handler tries to make the error
293+
// a little more informative, and less scary.
294+
DefaultWatchErrorHandler: func(_ *kcache.Reflector, err error) {
295+
if errors.Is(io.EOF, err) {
296+
// Watch closed normally.
297+
return
298+
}
299+
log.Debug("Watch error - probably due to CRD being uninstalled", "error", err)
300+
},
301+
})
302+
if err != nil {
303+
return errors.Wrap(err, "cannot create cache for API extension controllers")
304+
}
305+
306+
ctx, cancel := context.WithCancel(context.Background())
307+
defer cancel()
308+
go func() {
309+
// Don't start the cache until the manager is elected.
310+
<-mgr.Elected()
311+
312+
if err := ca.Start(ctx); err != nil {
313+
log.Info("API extensions cache returned an error", "error", err)
314+
}
315+
316+
log.Info("API extensions cache stopped")
317+
}()
318+
319+
cl, err := client.New(mgr.GetConfig(), client.Options{
320+
HTTPClient: mgr.GetHTTPClient(),
321+
Scheme: mgr.GetScheme(),
322+
Mapper: mgr.GetRESTMapper(),
323+
Cache: &client.CacheOptions{
324+
Reader: ca,
325+
326+
// Don't cache secrets - there may be a lot of them.
327+
DisableFor: []client.Object{&corev1.Secret{}},
328+
329+
// When this is enabled the client will automatically start a cache
330+
// informer whenever an XR controller Gets a new kind of composed
331+
// resource.
332+
//
333+
// We need to stop the informer when it's not needed anymore. At
334+
// best an unused informer wastes memory and keeps an unneeded watch
335+
// open on the API server. At worst, the CRD the informer listens
336+
// for is uninstalled and the informer continuously tries to
337+
// restart its watch, logging errors the whole time.
338+
//
339+
// When realtime composition is enabled, controllers record what
340+
// GVKs they watch. We can garbage collect an informer when no
341+
// controller watches its types. When it's not enabled we don't know
342+
// when to garbage collect informers.
343+
//
344+
// TODO(negz): GC informers when their CRD is deleted.
345+
// TODO(negz): GC informers when no XR composes their GVK.
346+
Unstructured: o.Features.Enabled(features.EnableAlphaRealtimeCompositions),
347+
},
348+
})
349+
if err != nil {
350+
return errors.Wrap(err, "cannot create client for API extension controllers")
351+
}
352+
353+
ce := engine.New(mgr, engine.TrackInformers(ca, mgr.GetScheme()), engine.WithLogger(log))
273354
ao := apiextensionscontroller.Options{
274-
Options: o,
275-
FunctionRunner: functionRunner,
355+
Options: o,
356+
ControllerClient: cl,
357+
ControllerFieldIndexer: ca,
358+
ControllerEngine: ce,
359+
FunctionRunner: functionRunner,
276360
}
277361

278362
if err := apiextensions.Setup(mgr, ao); err != nil {

go.mod

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ require (
142142
github.com/evanphx/json-patch/v5 v5.9.0 // indirect
143143
github.com/fatih/color v1.16.0 // indirect
144144
github.com/fsnotify/fsnotify v1.7.0 // indirect
145-
github.com/go-logr/logr v1.4.1
145+
github.com/go-logr/logr v1.4.1 // indirect
146146
github.com/go-logr/stdr v1.2.2 // indirect
147147
github.com/go-logr/zapr v1.3.0 // indirect
148148
github.com/go-openapi/jsonpointer v0.19.6 // indirect
@@ -154,7 +154,7 @@ require (
154154
github.com/golang/protobuf v1.5.4 // indirect
155155
github.com/google/go-containerregistry/pkg/authn/kubernetes v0.0.0-20230516205744-dbecb1de8cfa // indirect
156156
github.com/google/gofuzz v1.2.0 // indirect
157-
github.com/google/uuid v1.6.0
157+
github.com/google/uuid v1.6.0 // indirect
158158
github.com/imdario/mergo v0.3.16 // indirect
159159
github.com/inconshreveable/mousetrap v1.1.0 // indirect
160160
github.com/jmespath/go-jmespath v0.4.0 // indirect

internal/controller/apiextensions/claim/reconciler.go

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ import (
2727
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2828
"k8s.io/apimachinery/pkg/runtime/schema"
2929
"sigs.k8s.io/controller-runtime/pkg/client"
30-
"sigs.k8s.io/controller-runtime/pkg/manager"
3130
"sigs.k8s.io/controller-runtime/pkg/reconcile"
3231

3332
xpv1 "github.com/crossplane/crossplane-runtime/apis/common/v1"
@@ -37,7 +36,6 @@ import (
3736
"github.com/crossplane/crossplane-runtime/pkg/meta"
3837
"github.com/crossplane/crossplane-runtime/pkg/reconciler/managed"
3938
"github.com/crossplane/crossplane-runtime/pkg/resource"
40-
"github.com/crossplane/crossplane-runtime/pkg/resource/unstructured"
4139
"github.com/crossplane/crossplane-runtime/pkg/resource/unstructured/claim"
4240
"github.com/crossplane/crossplane-runtime/pkg/resource/unstructured/composite"
4341

@@ -221,14 +219,6 @@ func defaultCRClaim(c client.Client) crClaim {
221219
// A ReconcilerOption configures a Reconciler.
222220
type ReconcilerOption func(*Reconciler)
223221

224-
// WithClient specifies how the Reconciler should interact with the Kubernetes
225-
// API.
226-
func WithClient(c client.Client) ReconcilerOption {
227-
return func(r *Reconciler) {
228-
r.client = c
229-
}
230-
}
231-
232222
// WithManagedFieldsUpgrader specifies how the Reconciler should upgrade claim
233223
// and composite resource (XR) managed fields from client-side apply to
234224
// server-side apply.
@@ -300,8 +290,7 @@ func WithPollInterval(after time.Duration) ReconcilerOption {
300290
// The returned Reconciler will apply only the ObjectMetaConfigurator by
301291
// default; most callers should supply one or more CompositeConfigurators to
302292
// configure their composite resources.
303-
func NewReconciler(m manager.Manager, of resource.CompositeClaimKind, with resource.CompositeKind, o ...ReconcilerOption) *Reconciler {
304-
c := unstructured.NewClient(m.GetClient())
293+
func NewReconciler(c client.Client, of resource.CompositeClaimKind, with resource.CompositeKind, o ...ReconcilerOption) *Reconciler {
305294
r := &Reconciler{
306295
client: c,
307296
gvkClaim: schema.GroupVersionKind(of),

0 commit comments

Comments
 (0)