Skip to content

Commit

Permalink
Update for latest CSI driver image post issues/167
Browse files Browse the repository at this point in the history
Since the [fix](kubernetes-sigs/aws-efs-csi-driver#185) for [issue #167](kubernetes-sigs/aws-efs-csi-driver#167) merged, the AWS EFS CSI driver overwrote the [`latest` image tag](sha256-962619a5deb34e1c4257f2120dd941ab026fc96adde003e27f70b65023af5a07?context=explore) to include it.

For starters, this means we can update this operator to use the [new method of specifying access points](https://github.com/kubernetes-sigs/aws-efs-csi-driver/tree/0ae998c5a95fe6dbee7f43c182997e64872695e6/examples/kubernetes/access_points#edit-persistent-volume-spec) via a colon-delimited `volumeHandle` as opposed to in `mountOptions`.

However, the same update to `latest` also brought in a [commit](kubernetes-sigs/aws-efs-csi-driver@b3baff8) that requires an additional mount in the `efs-plugin` container of the DaemonSet. So we need to update our YAML for that resource at the same time, or everything is broken (this might be upstream [issue #192](kubernetes-sigs/aws-efs-csi-driver#192). This update to the DaemonSet YAML also syncs with [upstream](https://github.com/kubernetes-sigs/aws-efs-csi-driver/blob/0ae998c5a95fe6dbee7f43c182997e64872695e6/deploy/kubernetes/base/node.yaml) by bumping the image versions for the other two containers (csi-node-driver-registrar: v1.1.0 => v1.3.0; and livenessprobe: v1.1.0 => livenessprobe:v2.0.0).
  • Loading branch information
2uasimojo committed Jun 23, 2020
1 parent eec341c commit bfcfcda
Show file tree
Hide file tree
Showing 7 changed files with 113 additions and 31 deletions.
6 changes: 4 additions & 2 deletions pkg/controller/sharedvolume/ensurables_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,12 +130,14 @@ func TestCache(t *testing.T) {

// PVs
pv1 := pvEnsurable(&sharedVolume).(*util.EnsurableImpl).Definition.(*corev1.PersistentVolume)
if pv1.Spec.CSI.VolumeHandle != fakeFSID {
expVolumeHandle1 := fakeFSID + "::" + fakeAPID
if pv1.Spec.CSI.VolumeHandle != expVolumeHandle1 {
t.Fatalf("Expected PV ensurable to correspond to\nSharedVolume %v\nbut got\nPV %v",
format(sharedVolume), format(pv1))
}
expVolumeHandle2 := fsid2 + "::" + apid2
pv2 := pvEnsurable(&sv2).(*util.EnsurableImpl).Definition.(*corev1.PersistentVolume)
if pv2.Spec.CSI.VolumeHandle != fsid2 {
if pv2.Spec.CSI.VolumeHandle != expVolumeHandle2 {
t.Fatalf("Expected PV ensurable to correspond to\nSharedVolume %v\nbut got\nPV %v",
format(sv2), format(pv2))
}
Expand Down
7 changes: 2 additions & 5 deletions pkg/controller/sharedvolume/pv_ensurable.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ func pvEqual(local, server runtime.Object) bool {

func pvDefinition(sharedVolume *awsefsv1alpha1.SharedVolume) *corev1.PersistentVolume {
filesystem := corev1.PersistentVolumeFilesystem
volumeHandle := fmt.Sprintf("%s::%s", sharedVolume.Spec.FileSystemID, sharedVolume.Spec.AccessPointID)
pv := &corev1.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{
Name: pvNameForSharedVolume(sharedVolume),
Expand All @@ -70,14 +71,10 @@ func pvDefinition(sharedVolume *awsefsv1alpha1.SharedVolume) *corev1.PersistentV
AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteMany},
PersistentVolumeReclaimPolicy: corev1.PersistentVolumeReclaimRetain,
StorageClassName: statics.StorageClassName,
MountOptions: []string{
"tls",
fmt.Sprintf("accesspoint=%s", sharedVolume.Spec.AccessPointID),
},
PersistentVolumeSource: corev1.PersistentVolumeSource{
CSI: &corev1.CSIPersistentVolumeSource{
Driver: statics.CSIDriverName,
VolumeHandle: sharedVolume.Spec.FileSystemID,
VolumeHandle: volumeHandle,
},
},
},
Expand Down
32 changes: 25 additions & 7 deletions pkg/controller/sharedvolume/sharedvolume_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,12 @@ func (r *ReconcileSharedVolume) Reconcile(request reconcile.Request) (reconcile.
pve := pvEnsurable(sharedVolume)
pvce := pvcEnsurable(sharedVolume)

// TODO: If we just upgraded past 0.0.2, this will update the PV from old style (access point
// in MountOptions) to new style (access point in VolumeHandle). Which would be fine, except
// - I think the Update will bounce because PVs are immutable after creation. That could get
// us in a hard loop.
// - If it doesn't, we've still got https://github.com/openshift/origin/issues/2434 which may
// cause the update to wedge.
reqLogger.Info("Reconciling PersistentVolume", "Name", pve.GetNamespacedName().Name)
if err := pve.Ensure(reqLogger, r.client); err != nil {
// Mark Error status. This is best-effort (ignore any errors), since it's happening within
Expand Down Expand Up @@ -359,22 +365,34 @@ func (r *ReconcileSharedVolume) uneditSharedVolume(
svname := fmt.Sprintf("%s/%s", sharedVolume.Namespace, sharedVolume.Name)

// We found the corresponding PV. Peel the FS and AP IDs out of it.
fsid := pv.Spec.PersistentVolumeSource.CSI.VolumeHandle
if fsid == "" {
volHandle := pv.Spec.PersistentVolumeSource.CSI.VolumeHandle
if volHandle == "" {
// Let's funnel this into our recover() since it's the same class of error as e.g. nil
// pointer dereference. This will make it easier to handle those errors differently if
// we decide to do that in the future.
panic(fmt.Sprintf("PersistentVolume %s for SharedVolume %s has no VolumeHandle", pvname, svname))
}
var apid string
for _, opt := range pv.Spec.MountOptions {
tokens := strings.SplitN(opt, "=", 2)
if len(tokens) == 2 && tokens[0] == "accesspoint" {
apid = tokens[1]
// Discover the access point. We'll tolerate either the old style with the access point in the
// MountOptions (0.0.2 and earlier), or the new style where the VolumeHandle is colon-delimited
// and includes the access point as the third field.
tokens := strings.SplitN(volHandle, ":", 3)
fsid := tokens[0]
if len(tokens) == 1 {
// Access point is in MountOptions
for _, opt := range pv.Spec.MountOptions {
tokens := strings.SplitN(opt, "=", 2)
if len(tokens) == 2 && tokens[0] == "accesspoint" {
apid = tokens[1]
}
}
} else if len(tokens) == 3 {
apid = tokens[2]
} else {
panic(fmt.Sprintf("Couldn't parse VolumeHandle %q", volHandle))
}
if apid == "" {
// Ditto
// Ditto above
panic(fmt.Sprintf("Couldn't find Access Point ID in PersistentVolume %s for SharedVolume %s", pvname, svname))
}

Expand Down
69 changes: 61 additions & 8 deletions pkg/controller/sharedvolume/sharedvolume_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,38 @@ func TestReconcile(t *testing.T) {
t.Fatalf("Expected file system ID to be reverted to %s, but got %s", fs1, sv2.Spec.FileSystemID)
}
// And we should be back to gold
_, pvMap, _ = validateResources(t, r.client, 2)

// Let's do that again with a "legacy" PV -- one with the access point in the MountOptions.
sv2.Spec.AccessPointID = ape
sv2.Spec.FileSystemID = fs2
if err = r.client.Update(ctx, sv2); err != nil {
t.Fatal(err)
}
pvname := fmt.Sprintf("/%s", pvNameForSharedVolume(sv2))
pv := pvMap[pvname]
pv.Spec.CSI.VolumeHandle = fs1
pv.Spec.MountOptions = []string{
"tls",
"accesspoint=" + apd,
}
if err = r.client.Update(ctx, pv); err != nil {
t.Fatal(err)
}
// This should ask to requeue so the next run through can take a greener path
if res, err = r.Reconcile(req); res != test.RequeueResult || err != nil {
t.Fatalf("Expected requeue, no error; got\nresult: %v\nerr: %v", res, err)
}
// There should (still) be two of each resource, but let's check the SV by hand
svMap, _, _ = validateResources(t, r.client, 2)
sv2 = svMap[fmt.Sprintf("%s/%s", nsy, svb)]
if sv2.Spec.AccessPointID != apd {
t.Fatalf("Expected access point ID to be reverted to %s, but got %s", apd, sv2.Spec.AccessPointID)
}
if sv2.Spec.FileSystemID != fs1 {
t.Fatalf("Expected file system ID to be reverted to %s, but got %s", fs1, sv2.Spec.FileSystemID)
}
// And we should be back to gold
_, pvMap, pvcMap = validateResources(t, r.client, 2)

// Now make sure changes to our managed resources are reverted.
Expand All @@ -313,8 +345,7 @@ func TestReconcile(t *testing.T) {
t.Fatal(err)
}
// And mung the PV
pvname := fmt.Sprintf("/%s", pvNameForSharedVolume(sv2))
pv := pvMap[pvname]
pv = pvMap[pvname]
pv.Spec.CSI = nil
if err = r.client.Update(ctx, pv); err != nil {
t.Fatal(err)
Expand Down Expand Up @@ -343,11 +374,29 @@ func TestReconcile(t *testing.T) {
}
_, pvMap, _ = validateResources(t, r.client, 2)
pv = pvMap[pvname]
if pv.Spec.CSI.VolumeHandle != fs1 {
expVolHandle := fs1 + "::" + apd
if pv.Spec.CSI.VolumeHandle != expVolHandle {
t.Fatalf("Expected PV's VolumeHandle to be restored, but got %v", format(pv))
}

// And again, covering the case where APID is missing
// And again, covering the case where the VolumeHandle is downright malformed
pv.Spec.CSI.VolumeHandle = fs1 + ":" + apd
if err = r.client.Update(ctx, pv); err != nil {
t.Fatal(err)
}
if res, err = r.Reconcile(req); res != test.NullResult || err != nil {
t.Fatalf("Expected no requeue, no error; got\nresult: %v\nerr: %v", res, err)
}
_, pvMap, _ = validateResources(t, r.client, 2)
pv = pvMap[pvname]
expVolHandle = fs1 + "::" + apd
if pv.Spec.CSI.VolumeHandle != expVolHandle {
t.Fatalf("Expected PV's VolumeHandle to be restored, but got %v", format(pv))
}

// And again, covering the case where APID is missing from the MountOptions. To trigger this,
// we have to force the old style VolumeHandle.
pv.Spec.CSI.VolumeHandle = fs1
pv.Spec.MountOptions = []string{}
if err = r.client.Update(ctx, pv); err != nil {
t.Fatal(err)
Expand All @@ -357,9 +406,13 @@ func TestReconcile(t *testing.T) {
}
svMap, pvMap, _ = validateResources(t, r.client, 2)
pv = pvMap[pvname]
expectedMountOpt := fmt.Sprintf("accesspoint=%s", apd)
if len(pv.Spec.MountOptions) != 2 || pv.Spec.MountOptions[1] != expectedMountOpt {
t.Fatalf("Expected PV's MountOptions to be restored, but got %v", format(pv))
// Since we're always using the new style for access points, the MountOptions should stay
// empty, and the access point should go into the VolumeHandle.
if pv.Spec.CSI.VolumeHandle != expVolHandle {
t.Fatalf("Expected PV's VolumeHandle to be restored, but got %v", format(pv))
}
if len(pv.Spec.MountOptions) != 0 {
t.Fatalf("Expected no MountOptions, but got %v", format(pv))
}

// Test the delete path. Note that this doesn't happen by deleting the SharedVolume (yet). We
Expand Down Expand Up @@ -519,7 +572,7 @@ func TestUneditUpdateError(t *testing.T) {
pve := pvEnsurable(sv)
pv := pve.(*util.EnsurableImpl).Definition.(*corev1.PersistentVolume)
// Make this trigger the unedit path
pv.Spec.CSI.VolumeHandle = "abc123"
pv.Spec.CSI.VolumeHandle = "abc123::ap"

// The version of SharedVolume we expect to be passed to Update() will have that changed FSID
svUpdate := sv.DeepCopy()
Expand Down
6 changes: 1 addition & 5 deletions pkg/controller/sharedvolume/testdata/persistentvolume.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,7 @@ spec:
- ReadWriteMany
persistentVolumeReclaimPolicy: Retain
storageClassName: efs-sc
mountOptions:
- tls
# File system access point ID
- accesspoint=fsap-0123456789abcdef
csi:
driver: efs.csi.aws.com
# EFS volume ID
volumeHandle: fs-0123cdef
volumeHandle: fs-0123cdef::fsap-0123456789abcdef
12 changes: 10 additions & 2 deletions pkg/controller/statics/defs/daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ spec:
# DELTA: Removed
# priorityClassName: system-node-critical
nodeSelector:
kubernetes.io/os: linux
kubernetes.io/arch: amd64
# DELTA: only deploy this on worker nodes
# NOTE: This will hit infra nodes as well.
node-role.kubernetes.io/worker: ''
Expand Down Expand Up @@ -51,6 +53,8 @@ spec:
mountPath: /csi
- name: efs-state-dir
mountPath: /var/run/efs
- name: efs-utils-config
mountPath: /etc/amazon/efs
ports:
- containerPort: 9809
hostPort: 9809
Expand All @@ -65,7 +69,7 @@ spec:
periodSeconds: 2
failureThreshold: 5
- name: csi-driver-registrar
image: quay.io/k8scsi/csi-node-driver-registrar:v1.1.0
image: quay.io/k8scsi/csi-node-driver-registrar:v1.3.0
# DELTA: Always pull
imagePullPolicy: Always
args:
Expand All @@ -88,7 +92,7 @@ spec:
mountPath: /registration
- name: liveness-probe
imagePullPolicy: Always
image: quay.io/k8scsi/livenessprobe:v1.1.0
image: quay.io/k8scsi/livenessprobe:v2.0.0
args:
- --csi-address=/csi/csi.sock
- --health-port=9809
Expand All @@ -112,3 +116,7 @@ spec:
hostPath:
path: /var/run/efs
type: DirectoryOrCreate
- name: efs-utils-config
hostPath:
path: /etc/amazon/efs
type: DirectoryOrCreate
12 changes: 10 additions & 2 deletions pkg/controller/statics/zz_generated_defs.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit bfcfcda

Please sign in to comment.