From ad1abfa2da902bcf1b153a37ab9288ede4da97c9 Mon Sep 17 00:00:00 2001 From: carsonmh Date: Mon, 31 Jul 2023 12:29:59 -0700 Subject: [PATCH 01/10] add: cli status function --- src/codeflare_sdk/cli/commands/status.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 src/codeflare_sdk/cli/commands/status.py diff --git a/src/codeflare_sdk/cli/commands/status.py b/src/codeflare_sdk/cli/commands/status.py new file mode 100644 index 0000000..5d61fa9 --- /dev/null +++ b/src/codeflare_sdk/cli/commands/status.py @@ -0,0 +1,20 @@ +import click + +from codeflare_sdk.cluster.cluster import get_cluster + + +@click.group() +def cli(): + """Get the status of a specified resource""" + pass + + +@cli.command() +@click.argument("name", type=str) +@click.option("--namespace", type=str) +@click.pass_context +def raycluster(ctx, name, namespace): + """Get the status of a specified RayCluster""" + namespace = namespace or "default" + cluster = get_cluster(name, namespace) + cluster.status() From c3ff10cee3e8b46a05c202a2440daa3fe76cf341 Mon Sep 17 00:00:00 2001 From: carsonmh Date: Mon, 31 Jul 2023 12:34:13 -0700 Subject: [PATCH 02/10] add: details cli function --- jobtest.yaml | 173 ++++++++++++++++++++++ src/codeflare_sdk/cli/commands/details.py | 20 +++ 2 files changed, 193 insertions(+) create mode 100644 jobtest.yaml create mode 100644 src/codeflare_sdk/cli/commands/details.py diff --git a/jobtest.yaml b/jobtest.yaml new file mode 100644 index 0000000..92cd39b --- /dev/null +++ b/jobtest.yaml @@ -0,0 +1,173 @@ +apiVersion: mcad.ibm.com/v1beta1 +kind: AppWrapper +metadata: + name: jobtest + namespace: default +spec: + priority: 9 + resources: + GenericItems: + - custompodresources: + - limits: + cpu: 2 + memory: 8G + nvidia.com/gpu: 0 + replicas: 1 + requests: + cpu: 2 + memory: 8G + nvidia.com/gpu: 0 + - limits: + cpu: 1 + memory: 1G + nvidia.com/gpu: 0 + replicas: 2 + requests: + cpu: 1 + memory: 1G + nvidia.com/gpu: 0 + generictemplate: + apiVersion: ray.io/v1alpha1 + kind: RayCluster + metadata: + labels: + appwrapper.mcad.ibm.com: jobtest + controller-tools.k8s.io: '1.0' + name: jobtest + namespace: default + spec: + autoscalerOptions: + idleTimeoutSeconds: 60 + imagePullPolicy: Always + resources: + limits: + cpu: 500m + memory: 512Mi + requests: + cpu: 500m + memory: 512Mi + upscalingMode: Default + enableInTreeAutoscaling: false + headGroupSpec: + rayStartParams: + block: 'true' + dashboard-host: 0.0.0.0 + num-gpus: '0' + serviceType: ClusterIP + template: + spec: + containers: + - env: + - name: MY_POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: RAY_USE_TLS + value: '0' + - name: RAY_TLS_SERVER_CERT + value: /home/ray/workspace/tls/server.crt + - name: RAY_TLS_SERVER_KEY + value: /home/ray/workspace/tls/server.key + - name: RAY_TLS_CA_CERT + value: /home/ray/workspace/tls/ca.crt + image: quay.io/project-codeflare/ray:2.5.0-py38-cu116 + imagePullPolicy: Always + lifecycle: + preStop: + exec: + command: + - /bin/sh + - -c + - ray stop + name: ray-head + ports: + - containerPort: 6379 + name: gcs + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + resources: + limits: + cpu: 2 + memory: 8G + nvidia.com/gpu: 0 + requests: + cpu: 2 + memory: 8G + nvidia.com/gpu: 0 + imagePullSecrets: [] + rayVersion: 2.1.0 + workerGroupSpecs: + - groupName: small-group-jobtest + maxReplicas: 2 + minReplicas: 2 + rayStartParams: + block: 'true' + num-gpus: '0' + replicas: 2 + template: + metadata: + annotations: + key: value + labels: + key: value + spec: + containers: + - env: + - name: MY_POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: RAY_USE_TLS + value: '0' + - name: RAY_TLS_SERVER_CERT + value: /home/ray/workspace/tls/server.crt + - name: RAY_TLS_SERVER_KEY + value: /home/ray/workspace/tls/server.key + - name: RAY_TLS_CA_CERT + value: /home/ray/workspace/tls/ca.crt + image: quay.io/project-codeflare/ray:2.5.0-py38-cu116 + lifecycle: + preStop: + exec: + command: + - /bin/sh + - -c + - ray stop + name: machine-learning + resources: + limits: + cpu: 1 + memory: 1G + nvidia.com/gpu: 0 + requests: + cpu: 1 + memory: 1G + nvidia.com/gpu: 0 + imagePullSecrets: [] + initContainers: + - command: + - sh + - -c + - until nslookup $RAY_IP.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local; + do echo waiting for myservice; sleep 2; done + image: busybox:1.28 + name: init-myservice + replicas: 1 + - generictemplate: + apiVersion: route.openshift.io/v1 + kind: Route + metadata: + labels: + odh-ray-cluster-service: jobtest-head-svc + name: ray-dashboard-jobtest + namespace: default + spec: + port: + targetPort: dashboard + to: + kind: Service + name: jobtest-head-svc + replica: 1 + Items: [] diff --git a/src/codeflare_sdk/cli/commands/details.py b/src/codeflare_sdk/cli/commands/details.py new file mode 100644 index 0000000..b12edd8 --- /dev/null +++ b/src/codeflare_sdk/cli/commands/details.py @@ -0,0 +1,20 @@ +import click + +from codeflare_sdk.cluster.cluster import get_cluster + + +@click.group() +def cli(): + """Get the details of a specified resource""" + pass + + +@cli.command() +@click.argument("name", type=str) +@click.option("--namespace", type=str) +@click.pass_context +def raycluster(ctx, name, namespace): + """Get the details of a specified RayCluster""" + namespace = namespace or "default" + cluster = get_cluster(name, namespace) + cluster.details() From 568bde6494516a640d2ff6b9a5eb36c5b0b27d38 Mon Sep 17 00:00:00 2001 From: carsonmh Date: Thu, 27 Jul 2023 13:24:05 -0700 Subject: [PATCH 03/10] create: function to list rayclusters in all namespaces --- src/codeflare_sdk/cluster/cluster.py | 31 ++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/src/codeflare_sdk/cluster/cluster.py b/src/codeflare_sdk/cluster/cluster.py index b0075df..a25dd1b 100644 --- a/src/codeflare_sdk/cluster/cluster.py +++ b/src/codeflare_sdk/cluster/cluster.py @@ -412,7 +412,17 @@ def list_all_clusters(namespace: str, print_to_console: bool = True): """ Returns (and prints by default) a list of all clusters in a given namespace. """ - clusters = _get_ray_clusters(namespace) + clusters = _get_ray_clusters_in_namespace(namespace) + if print_to_console: + pretty_print.print_clusters(clusters) + return clusters + + +def list_clusters_all_namespaces(print_to_console: bool = True): + """ + Returns (and prints by default) a list of all clusters in the Kubernetes cluster. + """ + clusters = _get_all_ray_clusters() if print_to_console: pretty_print.print_clusters(clusters) return clusters @@ -529,7 +539,7 @@ def _ray_cluster_status(name, namespace="default") -> Optional[RayCluster]: return None -def _get_ray_clusters(namespace="default") -> List[RayCluster]: +def _get_ray_clusters_in_namespace(namespace="default") -> List[RayCluster]: list_of_clusters = [] try: config_check() @@ -548,6 +558,23 @@ def _get_ray_clusters(namespace="default") -> List[RayCluster]: return list_of_clusters +def _get_all_ray_clusters() -> List[RayCluster]: + list_of_clusters = [] + try: + config_check() + api_instance = client.CustomObjectsApi(api_config_handler()) + rcs = api_instance.list_cluster_custom_object( + group="ray.io", + version="v1alpha1", + plural="rayclusters", + ) + except Exception as e: + return _kube_api_error_handling(e) + for rc in rcs["items"]: + list_of_clusters.append(_map_to_ray_cluster(rc)) + return list_of_clusters + + def _get_app_wrappers( namespace="default", filter=List[AppWrapperStatus] ) -> List[AppWrapper]: From d4d11c67adb0266767101fabc001fc9a3df54b76 Mon Sep 17 00:00:00 2001 From: carsonmh Date: Mon, 31 Jul 2023 12:36:18 -0700 Subject: [PATCH 04/10] add: list raycluster function cli --- src/codeflare_sdk/cli/commands/list.py | 31 ++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 src/codeflare_sdk/cli/commands/list.py diff --git a/src/codeflare_sdk/cli/commands/list.py b/src/codeflare_sdk/cli/commands/list.py new file mode 100644 index 0000000..4479ae3 --- /dev/null +++ b/src/codeflare_sdk/cli/commands/list.py @@ -0,0 +1,31 @@ +import click +from kubernetes import client, config + +from codeflare_sdk.cluster.cluster import ( + list_clusters_all_namespaces, + list_all_clusters, + get_current_namespace, +) +from codeflare_sdk.cli.cli_utils import load_auth + + +@click.group() +def cli(): + """List a specified resource""" + pass + + +@cli.command() +@click.option("--namespace") +@click.option("--all", is_flag=True) +@click.pass_context +def rayclusters(ctx, namespace, all): + """List all rayclusters in a specified namespace""" + if all and namespace: + click.echo("--all and --namespace are mutually exclusive") + return + namespace = namespace or "default" + if not all: + list_all_clusters(namespace) + return + list_clusters_all_namespaces() From d5fb3253e08fea9bfb6130f3c5983a0d5b84fbf9 Mon Sep 17 00:00:00 2001 From: carsonmh Date: Mon, 31 Jul 2023 13:46:21 -0700 Subject: [PATCH 05/10] test: add unit test for list_clusters_all_namespaces --- tests/unit_test.py | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/tests/unit_test.py b/tests/unit_test.py index 783ec92..4dedc3d 100644 --- a/tests/unit_test.py +++ b/tests/unit_test.py @@ -34,6 +34,7 @@ get_cluster, _app_wrapper_status, _ray_cluster_status, + list_clusters_all_namespaces, ) from codeflare_sdk.cluster.auth import ( TokenAuthentication, @@ -206,6 +207,33 @@ def test_cluster_deletion_cli(mocker): assert result.exit_code == 0 assert "Cluster deleted successfully" in result.output +def test_list_clusters_all_namespaces(mocker, capsys): + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_cluster_custom_object", + side_effect=get_ray_obj_no_namespace, + ) + list_clusters_all_namespaces() + captured = capsys.readouterr() + assert captured.out == ( + " ๐Ÿš€ CodeFlare Cluster Details ๐Ÿš€ \n" + " \n" + " โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ \n" + " โ”‚ Name โ”‚ \n" + " โ”‚ quicktest Active โœ… โ”‚ \n" + " โ”‚ โ”‚ \n" + " โ”‚ URI: ray://quicktest-head-svc.ns.svc:10001 โ”‚ \n" + " โ”‚ โ”‚ \n" + " โ”‚ Dashboard๐Ÿ”— โ”‚ \n" + " โ”‚ โ”‚ \n" + " โ”‚ Cluster Resources โ”‚ \n" + " โ”‚ โ•ญโ”€ Workers โ”€โ”€โ•ฎ โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Worker specs(each) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ โ”‚ \n" + " โ”‚ โ”‚ Min Max โ”‚ โ”‚ Memory CPU GPU โ”‚ โ”‚ \n" + " โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \n" + " โ”‚ โ”‚ 1 1 โ”‚ โ”‚ 2G~2G 1 0 โ”‚ โ”‚ \n" + " โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \n" + " โ”‚ โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ โ”‚ \n" + " โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ \n" + ) # For mocking openshift client results @@ -992,6 +1020,10 @@ def get_ray_obj(group, version, namespace, plural, cls=None): return api_obj +def get_ray_obj_no_namespace(group, version, plural, cls=None): + return get_ray_obj(group, version, "ns", plural, cls) + + def get_aw_obj(group, version, namespace, plural): api_obj1 = { "items": [ @@ -2360,4 +2392,3 @@ def test_cleanup(): os.remove("tls-cluster-namespace/tls.key") os.rmdir("tls-cluster-namespace") os.remove("cli-test-cluster.yaml") - os.removedirs(os.path.expanduser("~/.codeflare")) From 171392ea705b924e0982830d7f087f54ad5078e0 Mon Sep 17 00:00:00 2001 From: carsonmh Date: Mon, 31 Jul 2023 14:17:12 -0700 Subject: [PATCH 06/10] test: add unit tests for status, details, and list CLI commands --- tests/unit_test.py | 129 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) diff --git a/tests/unit_test.py b/tests/unit_test.py index 4dedc3d..1247264 100644 --- a/tests/unit_test.py +++ b/tests/unit_test.py @@ -236,6 +236,135 @@ def test_list_clusters_all_namespaces(mocker, capsys): ) +def test_raycluster_details_cli(mocker): + runner = CliRunner() + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", + side_effect=get_ray_obj, + ) + mocker.patch( + "codeflare_sdk.cluster.cluster.get_current_namespace", + return_value="ns", + ) + mocker.patch( + "codeflare_sdk.cluster.cluster.Cluster.status", + return_value=(False, CodeFlareClusterStatus.UNKNOWN), + ) + mocker.patch( + "codeflare_sdk.cluster.cluster.Cluster.cluster_dashboard_uri", + return_value="", + ) + mocker.patch.object(client, "ApiClient") + raycluster_details_command = """ + details raycluster quicktest + """ + result = runner.invoke(cli, raycluster_details_command) + quicktest_details = ( + " โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ \n" + + " โ”‚ Name โ”‚ \n" + + " โ”‚ quicktest Inactive โŒ โ”‚ \n" + + " โ”‚ โ”‚ \n" + + " โ”‚ URI: ray://quicktest-head-svc.ns.svc:10001 โ”‚ \n" + + " โ”‚ โ”‚ \n" + + " โ”‚ Dashboard๐Ÿ”— โ”‚ \n" + + " โ”‚ โ”‚ \n" + + " โ”‚ Cluster Resources โ”‚ \n" + + " โ”‚ โ•ญโ”€ Workers โ”€โ”€โ•ฎ โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Worker specs(each) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ โ”‚ \n" + + " โ”‚ โ”‚ Min Max โ”‚ โ”‚ Memory CPU GPU โ”‚ โ”‚ \n" + + " โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \n" + + " โ”‚ โ”‚ 1 1 โ”‚ โ”‚ 2~2 1 0 โ”‚ โ”‚ \n" + + " โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \n" + + " โ”‚ โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ โ”‚ \n" + + " โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ " + ) + assert quicktest_details in result.output + + +def test_raycluster_status_cli(mocker): + runner = CliRunner() + test_raycluster = RayCluster( + "quicktest", + RayClusterStatus.READY, + 1, + 1, + "1", + "1", + 1, + 1, + "default", + "dashboard-url", + ) + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", + side_effect=get_ray_obj, + ) + mocker.patch( + "codeflare_sdk.cluster.cluster.get_current_namespace", + return_value="ns", + ) + mocker.patch( + "codeflare_sdk.cluster.cluster.Cluster.cluster_dashboard_uri", + return_value="", + ) + mocker.patch( + "codeflare_sdk.cluster.cluster._app_wrapper_status", + return_value=test_raycluster, + ) + mocker.patch( + "codeflare_sdk.cluster.cluster._ray_cluster_status", + return_value=test_raycluster, + ) + mocker.patch.object(client, "ApiClient") + raycluster_status_command = """ + status raycluster quicktest + """ + result = runner.invoke(cli, raycluster_status_command) + assert "Active" in result.output + + +def test_raycluster_list_cli(mocker): + runner = CliRunner() + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", + side_effect=get_ray_obj, + ) + mocker.patch( + "codeflare_sdk.cluster.cluster.get_current_namespace", + return_value="ns", + ) + mocker.patch( + "codeflare_sdk.cluster.cluster.Cluster.status", + return_value=(False, CodeFlareClusterStatus.UNKNOWN), + ) + mocker.patch( + "codeflare_sdk.cluster.cluster.Cluster.cluster_dashboard_uri", + return_value="", + ) + mocker.patch.object(client, "ApiClient") + list_rayclusters_command = """ + list rayclusters --namespace=ns + """ + result = runner.invoke(cli, list_rayclusters_command) + assert ( + " โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ \n" + + " โ”‚ Name โ”‚ \n" + + " โ”‚ quicktest Active โœ… โ”‚ \n" + + " โ”‚ โ”‚ \n" + + " โ”‚ URI: ray://quicktest-head-svc.ns.svc:10001 โ”‚ \n" + + " โ”‚ โ”‚ \n" + + " โ”‚ Dashboard๐Ÿ”— โ”‚ \n" + + " โ”‚ โ”‚ \n" + + " โ”‚ Cluster Resources โ”‚ \n" + + " โ”‚ โ•ญโ”€ Workers โ”€โ”€โ•ฎ โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Worker specs(each) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ โ”‚ \n" + + " โ”‚ โ”‚ Min Max โ”‚ โ”‚ Memory CPU GPU โ”‚ โ”‚ \n" + + " โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \n" + + " โ”‚ โ”‚ 1 1 โ”‚ โ”‚ 2G~2G 1 0 โ”‚ โ”‚ \n" + + " โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \n" + + " โ”‚ โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ โ”‚ \n" + + " โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ " + ) in result.output + + # For mocking openshift client results fake_res = openshift.Result("fake") From e3619dec85bb50a7c11be1493fb0ba9fe84971b6 Mon Sep 17 00:00:00 2001 From: carsonmh Date: Mon, 31 Jul 2023 14:21:50 -0700 Subject: [PATCH 07/10] cleanup --- jobtest.yaml | 173 --------------------------------------------------- 1 file changed, 173 deletions(-) delete mode 100644 jobtest.yaml diff --git a/jobtest.yaml b/jobtest.yaml deleted file mode 100644 index 92cd39b..0000000 --- a/jobtest.yaml +++ /dev/null @@ -1,173 +0,0 @@ -apiVersion: mcad.ibm.com/v1beta1 -kind: AppWrapper -metadata: - name: jobtest - namespace: default -spec: - priority: 9 - resources: - GenericItems: - - custompodresources: - - limits: - cpu: 2 - memory: 8G - nvidia.com/gpu: 0 - replicas: 1 - requests: - cpu: 2 - memory: 8G - nvidia.com/gpu: 0 - - limits: - cpu: 1 - memory: 1G - nvidia.com/gpu: 0 - replicas: 2 - requests: - cpu: 1 - memory: 1G - nvidia.com/gpu: 0 - generictemplate: - apiVersion: ray.io/v1alpha1 - kind: RayCluster - metadata: - labels: - appwrapper.mcad.ibm.com: jobtest - controller-tools.k8s.io: '1.0' - name: jobtest - namespace: default - spec: - autoscalerOptions: - idleTimeoutSeconds: 60 - imagePullPolicy: Always - resources: - limits: - cpu: 500m - memory: 512Mi - requests: - cpu: 500m - memory: 512Mi - upscalingMode: Default - enableInTreeAutoscaling: false - headGroupSpec: - rayStartParams: - block: 'true' - dashboard-host: 0.0.0.0 - num-gpus: '0' - serviceType: ClusterIP - template: - spec: - containers: - - env: - - name: MY_POD_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - - name: RAY_USE_TLS - value: '0' - - name: RAY_TLS_SERVER_CERT - value: /home/ray/workspace/tls/server.crt - - name: RAY_TLS_SERVER_KEY - value: /home/ray/workspace/tls/server.key - - name: RAY_TLS_CA_CERT - value: /home/ray/workspace/tls/ca.crt - image: quay.io/project-codeflare/ray:2.5.0-py38-cu116 - imagePullPolicy: Always - lifecycle: - preStop: - exec: - command: - - /bin/sh - - -c - - ray stop - name: ray-head - ports: - - containerPort: 6379 - name: gcs - - containerPort: 8265 - name: dashboard - - containerPort: 10001 - name: client - resources: - limits: - cpu: 2 - memory: 8G - nvidia.com/gpu: 0 - requests: - cpu: 2 - memory: 8G - nvidia.com/gpu: 0 - imagePullSecrets: [] - rayVersion: 2.1.0 - workerGroupSpecs: - - groupName: small-group-jobtest - maxReplicas: 2 - minReplicas: 2 - rayStartParams: - block: 'true' - num-gpus: '0' - replicas: 2 - template: - metadata: - annotations: - key: value - labels: - key: value - spec: - containers: - - env: - - name: MY_POD_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - - name: RAY_USE_TLS - value: '0' - - name: RAY_TLS_SERVER_CERT - value: /home/ray/workspace/tls/server.crt - - name: RAY_TLS_SERVER_KEY - value: /home/ray/workspace/tls/server.key - - name: RAY_TLS_CA_CERT - value: /home/ray/workspace/tls/ca.crt - image: quay.io/project-codeflare/ray:2.5.0-py38-cu116 - lifecycle: - preStop: - exec: - command: - - /bin/sh - - -c - - ray stop - name: machine-learning - resources: - limits: - cpu: 1 - memory: 1G - nvidia.com/gpu: 0 - requests: - cpu: 1 - memory: 1G - nvidia.com/gpu: 0 - imagePullSecrets: [] - initContainers: - - command: - - sh - - -c - - until nslookup $RAY_IP.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local; - do echo waiting for myservice; sleep 2; done - image: busybox:1.28 - name: init-myservice - replicas: 1 - - generictemplate: - apiVersion: route.openshift.io/v1 - kind: Route - metadata: - labels: - odh-ray-cluster-service: jobtest-head-svc - name: ray-dashboard-jobtest - namespace: default - spec: - port: - targetPort: dashboard - to: - kind: Service - name: jobtest-head-svc - replica: 1 - Items: [] From 71b70c0abe61ae4cc277c1c6e2d48a4183fde8da Mon Sep 17 00:00:00 2001 From: carsonmh Date: Mon, 31 Jul 2023 15:03:43 -0700 Subject: [PATCH 08/10] fix: unit tests --- tests/unit_test.py | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/tests/unit_test.py b/tests/unit_test.py index 1247264..73ae92e 100644 --- a/tests/unit_test.py +++ b/tests/unit_test.py @@ -242,10 +242,6 @@ def test_raycluster_details_cli(mocker): "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", side_effect=get_ray_obj, ) - mocker.patch( - "codeflare_sdk.cluster.cluster.get_current_namespace", - return_value="ns", - ) mocker.patch( "codeflare_sdk.cluster.cluster.Cluster.status", return_value=(False, CodeFlareClusterStatus.UNKNOWN), @@ -282,18 +278,6 @@ def test_raycluster_details_cli(mocker): def test_raycluster_status_cli(mocker): runner = CliRunner() - test_raycluster = RayCluster( - "quicktest", - RayClusterStatus.READY, - 1, - 1, - "1", - "1", - 1, - 1, - "default", - "dashboard-url", - ) mocker.patch( "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", side_effect=get_ray_obj, @@ -306,6 +290,19 @@ def test_raycluster_status_cli(mocker): "codeflare_sdk.cluster.cluster.Cluster.cluster_dashboard_uri", return_value="", ) + mocker.patch.object(client, "ApiClient") + test_raycluster = RayCluster( + "quicktest", + RayClusterStatus.READY, + 1, + 1, + "1", + "1", + 1, + 1, + "default", + "dashboard-url", + ) mocker.patch( "codeflare_sdk.cluster.cluster._app_wrapper_status", return_value=test_raycluster, @@ -314,7 +311,6 @@ def test_raycluster_status_cli(mocker): "codeflare_sdk.cluster.cluster._ray_cluster_status", return_value=test_raycluster, ) - mocker.patch.object(client, "ApiClient") raycluster_status_command = """ status raycluster quicktest """ From 4d76f968f67335ac005a8c9617a6f5e41cbb4594 Mon Sep 17 00:00:00 2001 From: carsonmh Date: Wed, 2 Aug 2023 13:40:58 -0700 Subject: [PATCH 09/10] change: make namespace required for functions --- src/codeflare_sdk/cli/commands/define.py | 2 +- src/codeflare_sdk/cli/commands/delete.py | 2 +- src/codeflare_sdk/cli/commands/details.py | 2 +- src/codeflare_sdk/cli/commands/list.py | 6 ++++-- src/codeflare_sdk/cli/commands/status.py | 2 +- tests/unit_test.py | 8 +++++--- 6 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/codeflare_sdk/cli/commands/define.py b/src/codeflare_sdk/cli/commands/define.py index 16b6fa4..09cfd1f 100644 --- a/src/codeflare_sdk/cli/commands/define.py +++ b/src/codeflare_sdk/cli/commands/define.py @@ -13,7 +13,7 @@ def cli(): @cli.command() @click.option("--name", type=str, required=True) -@click.option("--namespace", "-n", type=str) +@click.option("--namespace", "-n", type=str, required=True) @click.option("--head_info", cls=PythonLiteralOption, type=list) @click.option("--machine_types", cls=PythonLiteralOption, type=list) @click.option("--min_cpus", type=int) diff --git a/src/codeflare_sdk/cli/commands/delete.py b/src/codeflare_sdk/cli/commands/delete.py index c1ec124..f828457 100644 --- a/src/codeflare_sdk/cli/commands/delete.py +++ b/src/codeflare_sdk/cli/commands/delete.py @@ -13,7 +13,7 @@ def cli(): @cli.command() @click.argument("name", type=str) -@click.option("--namespace", type=str, default="default") +@click.option("--namespace", type=str, required=True) def raycluster(name, namespace): """ Delete a specified RayCluster from the Kubernetes cluster diff --git a/src/codeflare_sdk/cli/commands/details.py b/src/codeflare_sdk/cli/commands/details.py index b12edd8..3f749f7 100644 --- a/src/codeflare_sdk/cli/commands/details.py +++ b/src/codeflare_sdk/cli/commands/details.py @@ -11,7 +11,7 @@ def cli(): @cli.command() @click.argument("name", type=str) -@click.option("--namespace", type=str) +@click.option("--namespace", type=str, required=True) @click.pass_context def raycluster(ctx, name, namespace): """Get the details of a specified RayCluster""" diff --git a/src/codeflare_sdk/cli/commands/list.py b/src/codeflare_sdk/cli/commands/list.py index 4479ae3..dd3ad4e 100644 --- a/src/codeflare_sdk/cli/commands/list.py +++ b/src/codeflare_sdk/cli/commands/list.py @@ -16,7 +16,7 @@ def cli(): @cli.command() -@click.option("--namespace") +@click.option("--namespace", type=str) @click.option("--all", is_flag=True) @click.pass_context def rayclusters(ctx, namespace, all): @@ -24,7 +24,9 @@ def rayclusters(ctx, namespace, all): if all and namespace: click.echo("--all and --namespace are mutually exclusive") return - namespace = namespace or "default" + if not all and not namespace: + click.echo("You must specify either --namespace or --all") + return if not all: list_all_clusters(namespace) return diff --git a/src/codeflare_sdk/cli/commands/status.py b/src/codeflare_sdk/cli/commands/status.py index 5d61fa9..be83634 100644 --- a/src/codeflare_sdk/cli/commands/status.py +++ b/src/codeflare_sdk/cli/commands/status.py @@ -11,7 +11,7 @@ def cli(): @cli.command() @click.argument("name", type=str) -@click.option("--namespace", type=str) +@click.option("--namespace", type=str, required=True) @click.pass_context def raycluster(ctx, name, namespace): """Get the status of a specified RayCluster""" diff --git a/tests/unit_test.py b/tests/unit_test.py index 73ae92e..45b7038 100644 --- a/tests/unit_test.py +++ b/tests/unit_test.py @@ -201,12 +201,14 @@ def test_cluster_deletion_cli(mocker): runner = CliRunner() delete_cluster_command = """ delete raycluster - quicktest + quicktest --namespace=default """ result = runner.invoke(cli, delete_cluster_command) assert result.exit_code == 0 assert "Cluster deleted successfully" in result.output + + def test_list_clusters_all_namespaces(mocker, capsys): mocker.patch( "kubernetes.client.CustomObjectsApi.list_cluster_custom_object", @@ -252,7 +254,7 @@ def test_raycluster_details_cli(mocker): ) mocker.patch.object(client, "ApiClient") raycluster_details_command = """ - details raycluster quicktest + details raycluster quicktest --namespace=default """ result = runner.invoke(cli, raycluster_details_command) quicktest_details = ( @@ -312,7 +314,7 @@ def test_raycluster_status_cli(mocker): return_value=test_raycluster, ) raycluster_status_command = """ - status raycluster quicktest + status raycluster quicktest --namespace=default """ result = runner.invoke(cli, raycluster_status_command) assert "Active" in result.output From a2168b508ce209ff44d5b0b505a6fa5d510e6aa9 Mon Sep 17 00:00:00 2001 From: carsonmh Date: Wed, 2 Aug 2023 13:44:23 -0700 Subject: [PATCH 10/10] add: error handling for cluster not found --- src/codeflare_sdk/cli/commands/delete.py | 6 +++++- src/codeflare_sdk/cli/commands/details.py | 7 +++++-- src/codeflare_sdk/cli/commands/status.py | 7 +++++-- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/codeflare_sdk/cli/commands/delete.py b/src/codeflare_sdk/cli/commands/delete.py index f828457..7ce9744 100644 --- a/src/codeflare_sdk/cli/commands/delete.py +++ b/src/codeflare_sdk/cli/commands/delete.py @@ -18,6 +18,10 @@ def raycluster(name, namespace): """ Delete a specified RayCluster from the Kubernetes cluster """ - cluster = get_cluster(name, namespace) + try: + cluster = get_cluster(name, namespace) + except FileNotFoundError: + click.echo(f"Cluster {name} not found in {namespace} namespace") + return cluster.down() click.echo(f"Cluster deleted successfully") diff --git a/src/codeflare_sdk/cli/commands/details.py b/src/codeflare_sdk/cli/commands/details.py index 3f749f7..b865caa 100644 --- a/src/codeflare_sdk/cli/commands/details.py +++ b/src/codeflare_sdk/cli/commands/details.py @@ -15,6 +15,9 @@ def cli(): @click.pass_context def raycluster(ctx, name, namespace): """Get the details of a specified RayCluster""" - namespace = namespace or "default" - cluster = get_cluster(name, namespace) + try: + cluster = get_cluster(name, namespace) + except FileNotFoundError: + click.echo(f"Cluster {name} not found in {namespace} namespace") + return cluster.details() diff --git a/src/codeflare_sdk/cli/commands/status.py b/src/codeflare_sdk/cli/commands/status.py index be83634..fc76ffc 100644 --- a/src/codeflare_sdk/cli/commands/status.py +++ b/src/codeflare_sdk/cli/commands/status.py @@ -15,6 +15,9 @@ def cli(): @click.pass_context def raycluster(ctx, name, namespace): """Get the status of a specified RayCluster""" - namespace = namespace or "default" - cluster = get_cluster(name, namespace) + try: + cluster = get_cluster(name, namespace) + except FileNotFoundError: + click.echo(f"Cluster {name} not found in {namespace} namespace") + return cluster.status()