From 54af9ff3fe63ee6d3253f6ecba4f86d6bef11276 Mon Sep 17 00:00:00 2001 From: brettstewart Date: Wed, 18 Jun 2025 16:25:50 -0500 Subject: [PATCH] feat(runtime): add kubernetes support (#8814) Co-authored-by: Corey White Co-authored-by: luke_schulz --- Makefile | 36 + config.template.toml | 44 + dev_config/python/.pre-commit-config.yaml | 1 + kind/cluster.yaml | 9 + kind/manifests/deployment.yaml | 19 + kind/manifests/nginx.yaml | 678 ++++++++++++++++ kind/manifests/role.yaml | 14 + kind/manifests/roleBinding.yaml | 14 + kind/manifests/service.yaml | 12 + openhands/core/config/kubernetes_config.py | 86 ++ openhands/core/config/openhands_config.py | 2 + openhands/core/config/utils.py | 15 + openhands/runtime/__init__.py | 3 + openhands/runtime/impl/kubernetes/README.md | 141 ++++ .../impl/kubernetes/kubernetes_runtime.py | 752 ++++++++++++++++++ poetry.lock | 64 +- pyproject.toml | 2 + tests/unit/test_kubernetes_config.py | 62 ++ 18 files changed, 1941 insertions(+), 13 deletions(-) create mode 100644 kind/cluster.yaml create mode 100644 kind/manifests/deployment.yaml create mode 100644 kind/manifests/nginx.yaml create mode 100644 kind/manifests/role.yaml create mode 100644 kind/manifests/roleBinding.yaml create mode 100644 kind/manifests/service.yaml create mode 100644 openhands/core/config/kubernetes_config.py create mode 100644 openhands/runtime/impl/kubernetes/README.md create mode 100644 openhands/runtime/impl/kubernetes/kubernetes_runtime.py create mode 100644 tests/unit/test_kubernetes_config.py diff --git a/Makefile b/Makefile index 6d5c0761e4..f98d8b3408 100644 --- a/Makefile +++ b/Makefile @@ -12,6 +12,7 @@ DEFAULT_MODEL = "gpt-4o" CONFIG_FILE = config.toml PRE_COMMIT_CONFIG_PATH = "./dev_config/python/.pre-commit-config.yaml" PYTHON_VERSION = 3.12 +KIND_CLUSTER_NAME = "local-hands" # ANSI color codes GREEN=$(shell tput -Txterm setaf 2) @@ -199,6 +200,40 @@ lint: @$(MAKE) -s lint-frontend @$(MAKE) -s lint-backend +kind: + @echo "$(YELLOW)Checking if kind is installed...$(RESET)" + @if ! command -v kind > /dev/null; then \ + echo "$(RED)kind is not installed. Please install kind with `brew install kind` to continue$(RESET)"; \ + exit 1; \ + else \ + echo "$(BLUE)kind $(shell kind version) is already installed.$(RESET)"; \ + fi + @echo "$(YELLOW)Checking if kind cluster '$(KIND_CLUSTER_NAME)' already exists...$(RESET)" + @if kind get clusters | grep -q "^$(KIND_CLUSTER_NAME)$$"; then \ + echo "$(BLUE)Kind cluster '$(KIND_CLUSTER_NAME)' already exists.$(RESET)"; \ + kubectl config use-context kind-$(KIND_CLUSTER_NAME); \ + else \ + echo "$(YELLOW)Creating kind cluster '$(KIND_CLUSTER_NAME)'...$(RESET)"; \ + kind create cluster --name $(KIND_CLUSTER_NAME) --config kind/cluster.yaml; \ + fi + @echo "$(YELLOW)Checking if mirrord is installed...$(RESET)" + @if ! command -v mirrord > /dev/null; then \ + echo "$(RED)mirrord is not installed. Please install mirrord with `brew install metalbear-co/mirrord/mirrord` to continue$(RESET)"; \ + exit 1; \ + else \ + echo "$(BLUE)mirrord $(shell mirrord --version) is already installed.$(RESET)"; \ + fi + @echo "$(YELLOW)Installing k8s mirrord resources...$(RESET)" + @kubectl apply -f kind/manifests + @echo "$(GREEN)Mirrord resources installed successfully.$(RESET)" + @echo "$(YELLOW)Waiting for Mirrord pod to be ready.$(RESET)" + @sleep 5 + @kubectl wait --for=condition=Available deployment/ubuntu-dev + @echo "$(YELLOW)Waiting for Nginx to be ready.$(RESET)" + @kubectl -n ingress-nginx wait --for=condition=Available deployment/ingress-nginx-controller + @echo "$(YELLOW)Running make run inside of mirrord.$(RESET)" + @mirrord exec --target deployment/ubuntu-dev -- make run + test-frontend: @echo "$(YELLOW)Running tests for frontend...$(RESET)" @cd frontend && npm run test @@ -333,3 +368,4 @@ help: # Phony targets .PHONY: build check-dependencies check-system check-python check-npm check-nodejs check-docker check-poetry install-python-dependencies install-frontend-dependencies install-pre-commit-hooks lint-backend lint-frontend lint test-frontend test build-frontend start-backend start-frontend _run_setup run run-wsl setup-config setup-config-prompts setup-config-basic openhands-cloud-run docker-dev docker-run clean help +.PHONY: kind diff --git a/config.template.toml b/config.template.toml index 844fb7e634..e40bfc4d4d 100644 --- a/config.template.toml +++ b/config.template.toml @@ -415,3 +415,47 @@ type = "noop" # Configuration for the evaluation, please refer to the specific evaluation # plugin for the available options ############################################################################## + + +########################### Kubernetes ####################################### +# Kubernetes configuration when using the Kubernetes runtime +############################################################################## +[kubernetes] +# The Kubernetes namespace to use for OpenHands resources +#namespace = "default" + +# Domain for ingress resources +#ingress_domain = "localhost" + +# Size of the persistent volume claim +#pvc_storage_size = "2Gi" + +# Storage class for persistent volume claims +#pvc_storage_class = "standard" + +# CPU request for runtime pods +#resource_cpu_request = "1" + +# Memory request for runtime pods +#resource_memory_request = "1Gi" + +# Memory limit for runtime pods +#resource_memory_limit = "2Gi" + +# Optional name of image pull secret for private registries +#image_pull_secret = "" + +# Optional name of TLS secret for ingress +#ingress_tls_secret = "" + +# Optional node selector key for pod scheduling +#node_selector_key = "" + +# Optional node selector value for pod scheduling +#node_selector_val = "" + +# Optional YAML string defining pod tolerations +#tolerations_yaml = "" + +# Run the runtime sandbox container in privileged mode for use with docker-in-docker +#privileged = false diff --git a/dev_config/python/.pre-commit-config.yaml b/dev_config/python/.pre-commit-config.yaml index fbb425eec0..e12f358fac 100644 --- a/dev_config/python/.pre-commit-config.yaml +++ b/dev_config/python/.pre-commit-config.yaml @@ -7,6 +7,7 @@ repos: - id: end-of-file-fixer exclude: docs/modules/python - id: check-yaml + args: ["--allow-multiple-documents"] - id: debug-statements - repo: https://github.com/tox-dev/pyproject-fmt diff --git a/kind/cluster.yaml b/kind/cluster.yaml new file mode 100644 index 0000000000..3402b5fe29 --- /dev/null +++ b/kind/cluster.yaml @@ -0,0 +1,9 @@ +--- +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +name: local-hands +nodes: +- role: control-plane + extraPortMappings: + - containerPort: 80 # node port on the cluster for nginx. + hostPort: 80 # local port for nginx http. diff --git a/kind/manifests/deployment.yaml b/kind/manifests/deployment.yaml new file mode 100644 index 0000000000..dd58cfce9a --- /dev/null +++ b/kind/manifests/deployment.yaml @@ -0,0 +1,19 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ubuntu-dev +spec: + replicas: 1 + selector: + matchLabels: + app: ubuntu-dev + template: + metadata: + labels: + app: ubuntu-dev + spec: + containers: + - name: ubuntu + image: ubuntu:22.04 + command: ["sleep", "infinity"] diff --git a/kind/manifests/nginx.yaml b/kind/manifests/nginx.yaml new file mode 100644 index 0000000000..f94c6a9089 --- /dev/null +++ b/kind/manifests/nginx.yaml @@ -0,0 +1,678 @@ +apiVersion: v1 +kind: Namespace +metadata: + labels: + app.kubernetes.io/instance: ingress-nginx + app.kubernetes.io/name: ingress-nginx + name: ingress-nginx +--- +apiVersion: v1 +automountServiceAccountToken: true +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/instance: ingress-nginx + app.kubernetes.io/name: ingress-nginx + app.kubernetes.io/part-of: ingress-nginx + app.kubernetes.io/version: 1.12.1 + name: ingress-nginx + namespace: ingress-nginx +--- +apiVersion: v1 +automountServiceAccountToken: true +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/component: admission-webhook + app.kubernetes.io/instance: ingress-nginx + app.kubernetes.io/name: ingress-nginx + app.kubernetes.io/part-of: ingress-nginx + app.kubernetes.io/version: 1.12.1 + name: ingress-nginx-admission + namespace: ingress-nginx +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/instance: ingress-nginx + app.kubernetes.io/name: ingress-nginx + app.kubernetes.io/part-of: ingress-nginx + app.kubernetes.io/version: 1.12.1 + name: ingress-nginx + namespace: ingress-nginx +rules: +- apiGroups: + - "" + resources: + - namespaces + verbs: + - get +- apiGroups: + - "" + resources: + - configmaps + - pods + - secrets + - endpoints + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - services + verbs: + - get + - list + - watch +- apiGroups: + - networking.k8s.io + resources: + - ingresses + verbs: + - get + - list + - watch +- apiGroups: + - networking.k8s.io + resources: + - ingresses/status + verbs: + - update +- apiGroups: + - networking.k8s.io + resources: + - ingressclasses + verbs: + - get + - list + - watch +- apiGroups: + - coordination.k8s.io + resourceNames: + - ingress-nginx-leader + resources: + - leases + verbs: + - get + - update +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - create +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch +- apiGroups: + - discovery.k8s.io + resources: + - endpointslices + verbs: + - list + - watch + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + labels: + app.kubernetes.io/component: admission-webhook + app.kubernetes.io/instance: ingress-nginx + app.kubernetes.io/name: ingress-nginx + app.kubernetes.io/part-of: ingress-nginx + app.kubernetes.io/version: 1.12.1 + name: ingress-nginx-admission + namespace: ingress-nginx +rules: +- apiGroups: + - "" + resources: + - secrets + verbs: + - get + - create +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/instance: ingress-nginx + app.kubernetes.io/name: ingress-nginx + app.kubernetes.io/part-of: ingress-nginx + app.kubernetes.io/version: 1.12.1 + name: ingress-nginx +rules: +- apiGroups: + - "" + resources: + - configmaps + - endpoints + - nodes + - pods + - secrets + - namespaces + verbs: + - list + - watch +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - list + - watch +- apiGroups: + - "" + resources: + - nodes + verbs: + - get +- apiGroups: + - "" + resources: + - services + verbs: + - get + - list + - watch +- apiGroups: + - networking.k8s.io + resources: + - ingresses + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch +- apiGroups: + - networking.k8s.io + resources: + - ingresses/status + verbs: + - update +- apiGroups: + - networking.k8s.io + resources: + - ingressclasses + verbs: + - get + - list + - watch +- apiGroups: + - discovery.k8s.io + resources: + - endpointslices + verbs: + - list + - watch + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: admission-webhook + app.kubernetes.io/instance: ingress-nginx + app.kubernetes.io/name: ingress-nginx + app.kubernetes.io/part-of: ingress-nginx + app.kubernetes.io/version: 1.12.1 + name: ingress-nginx-admission +rules: +- apiGroups: + - admissionregistration.k8s.io + resources: + - validatingwebhookconfigurations + verbs: + - get + - update +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/instance: ingress-nginx + app.kubernetes.io/name: ingress-nginx + app.kubernetes.io/part-of: ingress-nginx + app.kubernetes.io/version: 1.12.1 + name: ingress-nginx + namespace: ingress-nginx +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: ingress-nginx +subjects: +- kind: ServiceAccount + name: ingress-nginx + namespace: ingress-nginx +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + labels: + app.kubernetes.io/component: admission-webhook + app.kubernetes.io/instance: ingress-nginx + app.kubernetes.io/name: ingress-nginx + app.kubernetes.io/part-of: ingress-nginx + app.kubernetes.io/version: 1.12.1 + name: ingress-nginx-admission + namespace: ingress-nginx +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: ingress-nginx-admission +subjects: +- kind: ServiceAccount + name: ingress-nginx-admission + namespace: ingress-nginx +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/instance: ingress-nginx + app.kubernetes.io/name: ingress-nginx + app.kubernetes.io/part-of: ingress-nginx + app.kubernetes.io/version: 1.12.1 + name: ingress-nginx +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: ingress-nginx +subjects: +- kind: ServiceAccount + name: ingress-nginx + namespace: ingress-nginx +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/component: admission-webhook + app.kubernetes.io/instance: ingress-nginx + app.kubernetes.io/name: ingress-nginx + app.kubernetes.io/part-of: ingress-nginx + app.kubernetes.io/version: 1.12.1 + name: ingress-nginx-admission +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: ingress-nginx-admission +subjects: +- kind: ServiceAccount + name: ingress-nginx-admission + namespace: ingress-nginx +--- +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/instance: ingress-nginx + app.kubernetes.io/name: ingress-nginx + app.kubernetes.io/part-of: ingress-nginx + app.kubernetes.io/version: 1.12.1 + name: ingress-nginx-controller + namespace: ingress-nginx +data: + worker-processes: "2" # Set to a lower number than default + max-worker-connections: "1024" +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/instance: ingress-nginx + app.kubernetes.io/name: ingress-nginx + app.kubernetes.io/part-of: ingress-nginx + app.kubernetes.io/version: 1.12.1 + name: ingress-nginx-controller + namespace: ingress-nginx +spec: + ipFamilies: + - IPv4 + ipFamilyPolicy: SingleStack + ports: + - appProtocol: http + name: http + port: 80 + protocol: TCP + targetPort: http + - appProtocol: https + name: https + port: 443 + protocol: TCP + targetPort: https + selector: + app.kubernetes.io/component: controller + app.kubernetes.io/instance: ingress-nginx + app.kubernetes.io/name: ingress-nginx + type: LoadBalancer +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/instance: ingress-nginx + app.kubernetes.io/name: ingress-nginx + app.kubernetes.io/part-of: ingress-nginx + app.kubernetes.io/version: 1.12.1 + name: ingress-nginx-controller-admission + namespace: ingress-nginx +spec: + ports: + - appProtocol: https + name: https-webhook + port: 443 + targetPort: webhook + selector: + app.kubernetes.io/component: controller + app.kubernetes.io/instance: ingress-nginx + app.kubernetes.io/name: ingress-nginx + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/instance: ingress-nginx + app.kubernetes.io/name: ingress-nginx + app.kubernetes.io/part-of: ingress-nginx + app.kubernetes.io/version: 1.12.1 + name: ingress-nginx-controller + namespace: ingress-nginx +spec: + minReadySeconds: 0 + revisionHistoryLimit: 10 + selector: + matchLabels: + app.kubernetes.io/component: controller + app.kubernetes.io/instance: ingress-nginx + app.kubernetes.io/name: ingress-nginx + strategy: + rollingUpdate: + maxUnavailable: 1 + type: RollingUpdate + template: + metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/instance: ingress-nginx + app.kubernetes.io/name: ingress-nginx + app.kubernetes.io/part-of: ingress-nginx + app.kubernetes.io/version: 1.12.1 + spec: + containers: + - args: + - /nginx-ingress-controller + - --election-id=ingress-nginx-leader + - --controller-class=k8s.io/ingress-nginx + - --ingress-class=nginx + - --configmap=$(POD_NAMESPACE)/ingress-nginx-controller + - --validating-webhook=:8443 + - --validating-webhook-certificate=/usr/local/certificates/cert + - --validating-webhook-key=/usr/local/certificates/key + - --watch-ingress-without-class=true + - --publish-status-address=localhost + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: LD_PRELOAD + value: /usr/local/lib/libmimalloc.so + image: registry.k8s.io/ingress-nginx/controller:v1.12.1@sha256:9724476b928967173d501040631b23ba07f47073999e80e34b120e8db5f234d5 + imagePullPolicy: IfNotPresent + lifecycle: + preStop: + exec: + command: + - /wait-shutdown + livenessProbe: + failureThreshold: 5 + httpGet: + path: /healthz + port: 10254 + scheme: HTTP + initialDelaySeconds: 10 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + name: controller + ports: + - containerPort: 80 + hostPort: 80 + name: http + protocol: TCP + - containerPort: 443 + hostPort: 443 + name: https + protocol: TCP + - containerPort: 8443 + name: webhook + protocol: TCP + readinessProbe: + failureThreshold: 3 + httpGet: + path: /healthz + port: 10254 + scheme: HTTP + initialDelaySeconds: 10 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + resources: + requests: + cpu: 300m + memory: 256Mi + limits: + memory: 512Mi + securityContext: + allowPrivilegeEscalation: false + capabilities: + add: + - NET_BIND_SERVICE + drop: + - ALL + readOnlyRootFilesystem: false + runAsGroup: 82 + runAsNonRoot: true + runAsUser: 101 + seccompProfile: + type: RuntimeDefault + volumeMounts: + - mountPath: /usr/local/certificates/ + name: webhook-cert + readOnly: true + dnsPolicy: ClusterFirst + nodeSelector: + kubernetes.io/os: linux + serviceAccountName: ingress-nginx + terminationGracePeriodSeconds: 0 + tolerations: + - effect: NoSchedule + key: node-role.kubernetes.io/master + operator: Equal + - effect: NoSchedule + key: node-role.kubernetes.io/control-plane + operator: Equal + volumes: + - name: webhook-cert + secret: + secretName: ingress-nginx-admission +--- +apiVersion: batch/v1 +kind: Job +metadata: + labels: + app.kubernetes.io/component: admission-webhook + app.kubernetes.io/instance: ingress-nginx + app.kubernetes.io/name: ingress-nginx + app.kubernetes.io/part-of: ingress-nginx + app.kubernetes.io/version: 1.12.1 + name: ingress-nginx-admission-create + namespace: ingress-nginx +spec: + template: + metadata: + labels: + app.kubernetes.io/component: admission-webhook + app.kubernetes.io/instance: ingress-nginx + app.kubernetes.io/name: ingress-nginx + app.kubernetes.io/part-of: ingress-nginx + app.kubernetes.io/version: 1.12.1 + name: ingress-nginx-admission-create + spec: + containers: + - args: + - create + - --host=ingress-nginx-controller-admission,ingress-nginx-controller-admission.$(POD_NAMESPACE).svc + - --namespace=$(POD_NAMESPACE) + - --secret-name=ingress-nginx-admission + env: + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + image: registry.k8s.io/ingress-nginx/kube-webhook-certgen:v1.4.4@sha256:a9f03b34a3cbfbb26d103a14046ab2c5130a80c3d69d526ff8063d2b37b9fd3f + imagePullPolicy: IfNotPresent + name: create + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsGroup: 65532 + runAsNonRoot: true + runAsUser: 65532 + seccompProfile: + type: RuntimeDefault + nodeSelector: + kubernetes.io/os: linux + restartPolicy: OnFailure + serviceAccountName: ingress-nginx-admission +--- +apiVersion: batch/v1 +kind: Job +metadata: + labels: + app.kubernetes.io/component: admission-webhook + app.kubernetes.io/instance: ingress-nginx + app.kubernetes.io/name: ingress-nginx + app.kubernetes.io/part-of: ingress-nginx + app.kubernetes.io/version: 1.12.1 + name: ingress-nginx-admission-patch + namespace: ingress-nginx +spec: + template: + metadata: + labels: + app.kubernetes.io/component: admission-webhook + app.kubernetes.io/instance: ingress-nginx + app.kubernetes.io/name: ingress-nginx + app.kubernetes.io/part-of: ingress-nginx + app.kubernetes.io/version: 1.12.1 + name: ingress-nginx-admission-patch + spec: + containers: + - args: + - patch + - --webhook-name=ingress-nginx-admission + - --namespace=$(POD_NAMESPACE) + - --patch-mutating=false + - --secret-name=ingress-nginx-admission + - --patch-failure-policy=Fail + env: + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + image: registry.k8s.io/ingress-nginx/kube-webhook-certgen:v1.4.4@sha256:a9f03b34a3cbfbb26d103a14046ab2c5130a80c3d69d526ff8063d2b37b9fd3f + imagePullPolicy: IfNotPresent + name: patch + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsGroup: 65532 + runAsNonRoot: true + runAsUser: 65532 + seccompProfile: + type: RuntimeDefault + nodeSelector: + kubernetes.io/os: linux + restartPolicy: OnFailure + serviceAccountName: ingress-nginx-admission +--- +apiVersion: networking.k8s.io/v1 +kind: IngressClass +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/instance: ingress-nginx + app.kubernetes.io/name: ingress-nginx + app.kubernetes.io/part-of: ingress-nginx + app.kubernetes.io/version: 1.12.1 + name: nginx +spec: + controller: k8s.io/ingress-nginx +--- +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingWebhookConfiguration +metadata: + labels: + app.kubernetes.io/component: admission-webhook + app.kubernetes.io/instance: ingress-nginx + app.kubernetes.io/name: ingress-nginx + app.kubernetes.io/part-of: ingress-nginx + app.kubernetes.io/version: 1.12.1 + name: ingress-nginx-admission +webhooks: +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: ingress-nginx-controller-admission + namespace: ingress-nginx + path: /networking/v1/ingresses + port: 443 + failurePolicy: Fail + matchPolicy: Equivalent + name: validate.nginx.ingress.kubernetes.io + rules: + - apiGroups: + - networking.k8s.io + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - ingresses + sideEffects: None diff --git a/kind/manifests/role.yaml b/kind/manifests/role.yaml new file mode 100644 index 0000000000..1c5485dbc3 --- /dev/null +++ b/kind/manifests/role.yaml @@ -0,0 +1,14 @@ +--- +# mirrord-rbac.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: mirrord-role + namespace: default +rules: + - apiGroups: [""] + resources: ["pods", "pods/exec", "pods/portforward", "services", "persistentvolumeclaims"] + verbs: ["get", "list", "create", "delete", "watch", "update"] + - apiGroups: ["networking.k8s.io"] # Networking API group (for ingress, networkpolicies, etc.) + resources: ["ingresses", "networkpolicies"] + verbs: ["get", "list", "create", "delete", "watch", "update"] diff --git a/kind/manifests/roleBinding.yaml b/kind/manifests/roleBinding.yaml new file mode 100644 index 0000000000..5db395f74e --- /dev/null +++ b/kind/manifests/roleBinding.yaml @@ -0,0 +1,14 @@ +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: mirrord-binding + namespace: default +subjects: + - kind: ServiceAccount + name: default + namespace: default +roleRef: + kind: Role + name: mirrord-role + apiGroup: rbac.authorization.k8s.io diff --git a/kind/manifests/service.yaml b/kind/manifests/service.yaml new file mode 100644 index 0000000000..a04e315884 --- /dev/null +++ b/kind/manifests/service.yaml @@ -0,0 +1,12 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: ubuntu-dev +spec: + selector: + app: ubuntu-dev + ports: + - protocol: TCP + port: 8099 + targetPort: 3000 diff --git a/openhands/core/config/kubernetes_config.py b/openhands/core/config/kubernetes_config.py new file mode 100644 index 0000000000..1ba554825c --- /dev/null +++ b/openhands/core/config/kubernetes_config.py @@ -0,0 +1,86 @@ +from pydantic import BaseModel, Field, ValidationError + + +class KubernetesConfig(BaseModel): + """Configuration for Kubernetes runtime. + + Attributes: + namespace: The Kubernetes namespace to use for OpenHands resources + ingress_domain: Domain for ingress resources + pvc_storage_size: Size of the persistent volume claim (e.g. "2Gi") + pvc_storage_class: Storage class for persistent volume claims + resource_cpu_request: CPU request for runtime pods + resource_memory_request: Memory request for runtime pods + resource_memory_limit: Memory limit for runtime pods + image_pull_secret: Optional name of image pull secret for private registries + ingress_tls_secret: Optional name of TLS secret for ingress + node_selector_key: Optional node selector key for pod scheduling + node_selector_val: Optional node selector value for pod scheduling + tolerations_yaml: Optional YAML string defining pod tolerations + """ + + namespace: str = Field( + default='default', + description='The Kubernetes namespace to use for OpenHands resources', + ) + ingress_domain: str = Field( + default='localhost', description='Domain for ingress resources' + ) + pvc_storage_size: str = Field( + default='2Gi', description='Size of the persistent volume claim' + ) + pvc_storage_class: str | None = Field( + default=None, description='Storage class for persistent volume claims' + ) + resource_cpu_request: str = Field( + default='1', description='CPU request for runtime pods' + ) + resource_memory_request: str = Field( + default='1Gi', description='Memory request for runtime pods' + ) + resource_memory_limit: str = Field( + default='2Gi', description='Memory limit for runtime pods' + ) + image_pull_secret: str | None = Field( + default=None, + description='Optional name of image pull secret for private registries', + ) + ingress_tls_secret: str | None = Field( + default=None, description='Optional name of TLS secret for ingress' + ) + node_selector_key: str | None = Field( + default=None, description='Optional node selector key for pod scheduling' + ) + node_selector_val: str | None = Field( + default=None, description='Optional node selector value for pod scheduling' + ) + tolerations_yaml: str | None = Field( + default=None, description='Optional YAML string defining pod tolerations' + ) + privileged: bool = Field( + default=False, + description='Run the runtime sandbox container in privileged mode for use with docker-in-docker', + ) + + model_config = {'extra': 'forbid'} + + @classmethod + def from_toml_section(cls, data: dict) -> dict[str, 'KubernetesConfig']: + """ + Create a mapping of KubernetesConfig instances from a toml dictionary representing the [kubernetes] section. + + The configuration is built from all keys in data. + + Returns: + dict[str, KubernetesConfig]: A mapping where the key "kubernetes" corresponds to the [kubernetes] configuration + """ + # Initialize the result mapping + kubernetes_mapping: dict[str, KubernetesConfig] = {} + + # Try to create the configuration instance + try: + kubernetes_mapping['kubernetes'] = cls.model_validate(data) + except ValidationError as e: + raise ValueError(f'Invalid kubernetes configuration: {e}') + + return kubernetes_mapping diff --git a/openhands/core/config/openhands_config.py b/openhands/core/config/openhands_config.py index a387426eac..83b8af1464 100644 --- a/openhands/core/config/openhands_config.py +++ b/openhands/core/config/openhands_config.py @@ -11,6 +11,7 @@ from openhands.core.config.config_utils import ( model_defaults_to_dict, ) from openhands.core.config.extended_config import ExtendedConfig +from openhands.core.config.kubernetes_config import KubernetesConfig from openhands.core.config.llm_config import LLMConfig from openhands.core.config.mcp_config import MCPConfig from openhands.core.config.sandbox_config import SandboxConfig @@ -107,6 +108,7 @@ class OpenHandsConfig(BaseModel): ) # Maximum number of concurrent agent loops allowed per user mcp_host: str = Field(default=f'localhost:{os.getenv("port", 3000)}') mcp: MCPConfig = Field(default_factory=MCPConfig) + kubernetes: KubernetesConfig = Field(default_factory=KubernetesConfig) defaults_dict: ClassVar[dict] = {} diff --git a/openhands/core/config/utils.py b/openhands/core/config/utils.py index b63f9063a6..9507226b19 100644 --- a/openhands/core/config/utils.py +++ b/openhands/core/config/utils.py @@ -25,6 +25,7 @@ from openhands.core.config.config_utils import ( OH_MAX_ITERATIONS, ) from openhands.core.config.extended_config import ExtendedConfig +from openhands.core.config.kubernetes_config import KubernetesConfig from openhands.core.config.llm_config import LLMConfig from openhands.core.config.mcp_config import MCPConfig from openhands.core.config.openhands_config import OpenHandsConfig @@ -228,6 +229,19 @@ def load_from_toml(cfg: OpenHandsConfig, toml_file: str = 'config.toml') -> None # Re-raise ValueError from MCPConfig.from_toml_section raise ValueError('Error in MCP sections in config.toml') + # Process kubernetes section if present + if 'kubernetes' in toml_config: + try: + kubernetes_mapping = KubernetesConfig.from_toml_section( + toml_config['kubernetes'] + ) + if 'kubernetes' in kubernetes_mapping: + cfg.kubernetes = kubernetes_mapping['kubernetes'] + except (TypeError, KeyError, ValidationError) as e: + logger.openhands_logger.warning( + f'Cannot parse [kubernetes] config from toml, values have not been applied.\nError: {e}' + ) + # Process condenser section if present if 'condenser' in toml_config: try: @@ -286,6 +300,7 @@ def load_from_toml(cfg: OpenHandsConfig, toml_file: str = 'config.toml') -> None 'sandbox', 'condenser', 'mcp', + 'kubernetes', } for key in toml_config: if key.lower() not in known_sections: diff --git a/openhands/runtime/__init__.py b/openhands/runtime/__init__.py index 5324ca3322..45f539ef2c 100644 --- a/openhands/runtime/__init__.py +++ b/openhands/runtime/__init__.py @@ -5,6 +5,7 @@ from openhands.runtime.impl.docker.docker_runtime import ( DockerRuntime, ) from openhands.runtime.impl.e2b.e2b_runtime import E2BRuntime +from openhands.runtime.impl.kubernetes.kubernetes_runtime import KubernetesRuntime from openhands.runtime.impl.local.local_runtime import LocalRuntime from openhands.runtime.impl.modal.modal_runtime import ModalRuntime from openhands.runtime.impl.remote.remote_runtime import RemoteRuntime @@ -21,6 +22,7 @@ _DEFAULT_RUNTIME_CLASSES: dict[str, type[Runtime]] = { 'runloop': RunloopRuntime, 'local': LocalRuntime, 'daytona': DaytonaRuntime, + 'kubernetes': KubernetesRuntime, 'cli': CLIRuntime, } @@ -50,6 +52,7 @@ __all__ = [ 'RunloopRuntime', 'DockerRuntime', 'DaytonaRuntime', + 'KubernetesRuntime', 'CLIRuntime', 'get_runtime_cls', ] diff --git a/openhands/runtime/impl/kubernetes/README.md b/openhands/runtime/impl/kubernetes/README.md new file mode 100644 index 0000000000..d58e090435 --- /dev/null +++ b/openhands/runtime/impl/kubernetes/README.md @@ -0,0 +1,141 @@ +# OpenHands Kubernetes Runtime + +This directory contains the Kubernetes runtime implementation for OpenHands, which allows the software to run on Kubernetes clusters for scalable and isolated execution environments. + +## Local Development with KIND + +For local development and testing, OpenHands provides a convenient setup using KIND (Kubernetes IN Docker) that creates a local Kubernetes cluster. + +### Prerequisites + +Before setting up the local Kubernetes environment, ensure you have the following tools installed: + +1. **KIND (Kubernetes IN Docker)** - [Installation Guide](https://kind.sigs.k8s.io/docs/user/quick-start/) + +2. **kubectl** - [Installation Guide](https://kubernetes.io/docs/tasks/tools/#kubectl) + +3. **mirrord** - [Installation Guide](https://metalbear.co/mirrord/docs/overview/quick-start/#installation) + + MirrorD is used for network mirroring allowing the locally running process to interact with the kubernetes cluster as if it were running inside of kubernetes. + +4. **Docker or Podman** - Required for KIND to work + - Docker: Follow the official Docker installation guide for your platform + - Podman: [Installation Guide](https://podman.io/docs/installation) + +### Configuration + +To use the Kubernetes runtime, you need to configure OpenHands properly. The configuration is done through a TOML configuration file. + +#### Required Configuration + +Two configuration options are required to use the Kubernetes runtime: + +1. **Runtime Type**: Set the runtime to use Kubernetes + + ```toml + [core] + runtime = "kubernetes" + ``` + +2. **Runtime Container Image**: Specify the container image to use for the runtime environment + ```toml + [sandbox] + runtime_container_image = "docker.all-hands.dev/all-hands-ai/runtime:0.44-nikolaik" + ``` + +#### Additional Kubernetes Options + +OpenHands provides extensive configuration options for Kubernetes deployments under the `[kubernetes]` section. These options allow you to customize: + +- Kubernetes namespace +- Persistent volume configuration +- Ingress and networking settings +- Runtime Pod Security settings +- Resource limits and requests + +For a complete list of available Kubernetes configuration options, refer to the `[kubernetes]` section in the `config.template.toml` file in the repository root. + +## Local Development Setup + +### Quick Start + +To set up and run OpenHands with the Kubernetes runtime locally: + +First build the application with + +```bash +make build +``` + +Then + +```bash +make kind # target is stateless and will check for an existing kind cluster or make a new one if not present. +``` + +This command will: + +1. **Check Dependencies**: Verify that `kind`, `kubectl`, and `mirrord` are installed +2. **Create KIND Cluster**: Create a local Kubernetes cluster named "local-hands" using the configuration in `kind/cluster.yaml` +3. **Deploy Infrastructure**: Apply Kubernetes manifests including: + - Ubuntu development pod for runtime execution + - Nginx ingress controller for HTTP routing + - RBAC configurations for proper permissions +4. **Setup Mirrord**: Install mirrord resources for development workflow +5. **Run Application**: Execute `make run` inside the mirrord environment + +### Cluster Configuration + +The KIND cluster is configured with: + +- **Cluster Name**: `local-hands` +- **Node Configuration**: Single control-plane node +- **Port Mapping**: Host port 80 maps to container port 80 for nginx ingress +- **Base Image**: Ubuntu 22.04 for the development environment + +### Infrastructure Components + +The local setup includes several Kubernetes resources: + +#### Development Environment + +- **Deployment**: `ubuntu-dev` - Ubuntu 22.04 container for code execution +- **Service**: Exposes the development environment within the cluster + +#### Ingress Controller (Nginx) + +- **Namespace**: `ingress-nginx` - Dedicated namespace for ingress resources +- **Deployment**: `ingress-nginx-controller` - Handles HTTP routing and load balancing +- **Service**: LoadBalancer service for external access +- **ConfigMap**: Custom configuration for nginx controller +- **RBAC**: Roles and bindings for proper cluster permissions + +#### Development Workflow + +- **Mirrord Integration**: Allows running local development server while connecting to cluster resources +- **Port Forwarding**: Direct access to cluster services from localhost + +### Usage + +Once the environment is set up with `make kind`, the system will: + +1. Wait for all deployments to be ready +2. Automatically start the OpenHands application using mirrord +3. Provide access to the application at http://127.0.0.1:3000/ + +The mirrord integration allows you to develop locally while your application has access to the Kubernetes cluster resources, providing a seamless development experience that mirrors production behavior. + +### Troubleshooting + +If you encounter issues: + +1. **Check cluster status**: `kubectl get nodes` +2. **Verify deployments**: `kubectl get deployments --all-namespaces` +3. **Check ingress**: `kubectl get ingress --all-namespaces` +4. **View logs**: `kubectl logs -l app=ubuntu-dev` + +To clean up the environment: + +```bash +kind delete cluster --name local-hands +``` diff --git a/openhands/runtime/impl/kubernetes/kubernetes_runtime.py b/openhands/runtime/impl/kubernetes/kubernetes_runtime.py new file mode 100644 index 0000000000..fb11631b3f --- /dev/null +++ b/openhands/runtime/impl/kubernetes/kubernetes_runtime.py @@ -0,0 +1,752 @@ +from functools import lru_cache +from typing import Callable +from uuid import UUID + +import tenacity +import yaml +from kubernetes import client, config +from kubernetes.client.models import ( + V1Container, + V1ContainerPort, + V1EnvVar, + V1HTTPIngressPath, + V1HTTPIngressRuleValue, + V1Ingress, + V1IngressBackend, + V1IngressRule, + V1IngressServiceBackend, + V1IngressSpec, + V1IngressTLS, + V1ObjectMeta, + V1PersistentVolumeClaim, + V1PersistentVolumeClaimSpec, + V1PersistentVolumeClaimVolumeSource, + V1Pod, + V1PodSpec, + V1ResourceRequirements, + V1SecurityContext, + V1Service, + V1ServiceBackendPort, + V1ServicePort, + V1ServiceSpec, + V1Toleration, + V1Volume, + V1VolumeMount, +) + +from openhands.core.config import OpenHandsConfig +from openhands.core.exceptions import ( + AgentRuntimeDisconnectedError, + AgentRuntimeNotFoundError, +) +from openhands.core.logger import DEBUG +from openhands.core.logger import openhands_logger as logger +from openhands.events import EventStream +from openhands.integrations.provider import PROVIDER_TOKEN_TYPE +from openhands.runtime.impl.action_execution.action_execution_client import ( + ActionExecutionClient, +) +from openhands.runtime.plugins import PluginRequirement +from openhands.runtime.runtime_status import RuntimeStatus +from openhands.runtime.utils.command import get_action_execution_server_startup_command +from openhands.utils.async_utils import call_sync_from_async +from openhands.utils.shutdown_listener import add_shutdown_listener +from openhands.utils.tenacity_stop import stop_if_should_exit + +POD_NAME_PREFIX = 'openhands-runtime-' +POD_LABEL = 'openhands-runtime' + + +class KubernetesRuntime(ActionExecutionClient): + """ + A Kubernetes runtime for OpenHands that works with Kind. + + This runtime creates pods in a Kubernetes cluster to run the agent code. + It uses the Kubernetes Python client to create and manage the pods. + + Args: + config (OpenHandsConfig): The application configuration. + event_stream (EventStream): The event stream to subscribe to. + sid (str, optional): The session ID. Defaults to 'default'. + plugins (list[PluginRequirement] | None, optional): List of plugin requirements. Defaults to None. + env_vars (dict[str, str] | None, optional): Environment variables to set. Defaults to None. + status_callback (Callable | None, optional): Callback for status updates. Defaults to None. + attach_to_existing (bool, optional): Whether to attach to an existing pod. Defaults to False. + headless_mode (bool, optional): Whether to run in headless mode. Defaults to True. + """ + + _shutdown_listener_id: UUID | None = None + _namespace: str = '' + + def __init__( + self, + config: OpenHandsConfig, + event_stream: EventStream, + sid: str = 'default', + plugins: list[PluginRequirement] | None = None, + env_vars: dict[str, str] | None = None, + status_callback: Callable | None = None, + attach_to_existing: bool = False, + headless_mode: bool = True, + user_id: str | None = None, + git_provider_tokens: PROVIDER_TOKEN_TYPE | None = None, + ): + if not KubernetesRuntime._shutdown_listener_id: + KubernetesRuntime._shutdown_listener_id = add_shutdown_listener( + lambda: KubernetesRuntime._cleanup_k8s_resources( + namespace=self._k8s_namespace, + remove_pvc=True, + conversation_id=self.sid, + ) # this is when you ctrl+c. + ) + self.config = config + self._runtime_initialized: bool = False + self.status_callback = status_callback + + # Load and validate Kubernetes configuration + if self.config.kubernetes is None: + raise ValueError( + 'Kubernetes configuration is required when using KubernetesRuntime. ' + 'Please add a [kubernetes] section to your configuration.' + ) + + self._k8s_config = self.config.kubernetes + self._k8s_namespace = self._k8s_config.namespace + KubernetesRuntime._namespace = self._k8s_namespace + + # Initialize ports with default values in the required range + self._container_port = 8080 # Default internal container port + self._vscode_port = 8081 # Default VSCode port. + self._app_ports: list[int] = [ + 30082, + 30083, + ] # Default app ports in valid range # The agent prefers these when exposing an application. + + self.k8s_client, self.k8s_networking_client = self._init_kubernetes_client() + + self.pod_image = self.config.sandbox.runtime_container_image + if not self.pod_image: + # If runtime_container_image isn't set, use the base_container_image as a fallback + self.pod_image = self.config.sandbox.base_container_image + + self.pod_name = POD_NAME_PREFIX + sid + + # Initialize the API URL with the initial port value + self.k8s_local_url = f'http://{self._get_svc_name(self.pod_name)}.{self._k8s_namespace}.svc.cluster.local' + self.api_url = f'{self.k8s_local_url}:{self._container_port}' + + super().__init__( + config, + event_stream, + sid, + plugins, + env_vars, + status_callback, + attach_to_existing, + headless_mode, + user_id, + git_provider_tokens, + ) + + @staticmethod + def _get_svc_name(pod_name: str) -> str: + """Get the service name for the pod.""" + return f'{pod_name}-svc' + + @staticmethod + def _get_vscode_svc_name(pod_name: str) -> str: + """Get the VSCode service name for the pod.""" + return f'{pod_name}-svc-code' + + @staticmethod + def _get_vscode_ingress_name(pod_name: str) -> str: + """Get the VSCode ingress name for the pod.""" + return f'{pod_name}-ingress-code' + + @staticmethod + def _get_vscode_tls_secret_name(pod_name: str) -> str: + """Get the TLS secret name for the VSCode ingress.""" + return f'{pod_name}-tls-secret' + + @staticmethod + def _get_pvc_name(pod_name: str) -> str: + """Get the PVC name for the pod.""" + return f'{pod_name}-pvc' + + @staticmethod + def _get_pod_name(sid: str) -> str: + """Get the pod name for the session.""" + return POD_NAME_PREFIX + sid + + @property + def action_execution_server_url(self): + return self.api_url + + @property + def node_selector(self) -> dict[str, str] | None: + if ( + not self._k8s_config.node_selector_key + or not self._k8s_config.node_selector_val + ): + return None + return {self._k8s_config.node_selector_key: self._k8s_config.node_selector_val} + + @property + def tolerations(self) -> list[V1Toleration] | None: + if not self._k8s_config.tolerations_yaml: + return None + tolerations_yaml_str = self._k8s_config.tolerations_yaml + tolerations = [] + try: + tolerations_data = yaml.safe_load(tolerations_yaml_str) + if isinstance(tolerations_data, list): + for toleration in tolerations_data: + tolerations.append(V1Toleration(**toleration)) + else: + logger.error( + f'Invalid tolerations format. Should be type list: {tolerations_yaml_str}. Expected a list.' + ) + return None + except yaml.YAMLError as e: + logger.error( + f'Error parsing tolerations YAML: {tolerations_yaml_str}. Error: {e}' + ) + return None + return tolerations + + async def connect(self): + """Connect to the runtime by creating or attaching to a pod.""" + self.log('info', f'Connecting to runtime with conversation ID: {self.sid}') + self.log('info', f'self._attach_to_existing: {self.attach_to_existing}') + self.set_runtime_status(RuntimeStatus.STARTING_RUNTIME) + self.log('info', f'Using API URL {self.api_url}') + + try: + await call_sync_from_async(self._attach_to_pod) + except client.rest.ApiException as e: + # we are not set to attach to existing, ignore error and init k8s resources. + if self.attach_to_existing: + self.log( + 'error', + f'Pod {self.pod_name} not found or cannot connect to it.', + ) + raise AgentRuntimeDisconnectedError from e + + self.log('info', f'Starting runtime with image: {self.pod_image}') + try: + await call_sync_from_async(self._init_k8s_resources) + self.log( + 'info', + f'Pod started: {self.pod_name}. VSCode URL: {self.vscode_url}', + ) + except Exception as init_error: + self.log('error', f'Failed to initialize k8s resources: {init_error}') + raise AgentRuntimeNotFoundError( + f'Failed to initialize kubernetes resources: {init_error}' + ) from init_error + + if not self.attach_to_existing: + self.log('info', 'Waiting for pod to become ready ...') + self.set_runtime_status(RuntimeStatus.STARTING_RUNTIME) + try: + await call_sync_from_async(self._wait_until_ready) + except Exception as alive_error: + self.log('error', f'Failed to connect to runtime: {alive_error}') + self.send_error_message( + 'ERROR$RUNTIME_CONNECTION', + f'Failed to connect to runtime: {alive_error}', + ) + raise AgentRuntimeDisconnectedError( + f'Failed to connect to runtime: {alive_error}' + ) from alive_error + + if not self.attach_to_existing: + self.log('info', 'Runtime is ready.') + + if not self.attach_to_existing: + await call_sync_from_async(self.setup_initial_env) + + self.log( + 'info', + f'Pod initialized with plugins: {[plugin.name for plugin in self.plugins]}. VSCode URL: {self.vscode_url}', + ) + if not self.attach_to_existing: + self.set_runtime_status(RuntimeStatus.READY) + self._runtime_initialized = True + + def _attach_to_pod(self): + """Attach to an existing pod.""" + try: + pod = self.k8s_client.read_namespaced_pod( + name=self.pod_name, namespace=self._k8s_namespace + ) + + if pod.status.phase != 'Running': + try: + self._wait_until_ready() + except TimeoutError: + raise AgentRuntimeDisconnectedError( + f'Pod {self.pod_name} exists but failed to become ready.' + ) + + self.log('info', f'Successfully attached to pod {self.pod_name}') + return True + + except client.rest.ApiException as e: + self.log('error', f'Failed to attach to pod: {e}') + raise + + @tenacity.retry( + stop=tenacity.stop_after_delay(300) | stop_if_should_exit(), + retry=tenacity.retry_if_exception_type(TimeoutError), + reraise=True, + wait=tenacity.wait_fixed(2), + ) + def _wait_until_ready(self): + """Wait until the runtime server is alive by checking the pod status in Kubernetes.""" + self.log('info', f'Checking if pod {self.pod_name} is ready in Kubernetes') + pod = self.k8s_client.read_namespaced_pod( + name=self.pod_name, namespace=self._k8s_namespace + ) + if pod.status.phase == 'Running' and pod.status.conditions: + for condition in pod.status.conditions: + if condition.type == 'Ready' and condition.status == 'True': + self.log('info', f'Pod {self.pod_name} is ready!') + return True # Exit the function if the pod is ready + + self.log( + 'info', + f'Pod {self.pod_name} is not ready yet. Current phase: {pod.status.phase}', + ) + raise TimeoutError(f'Pod {self.pod_name} is not in Running state yet.') + + @staticmethod + @lru_cache(maxsize=1) + def _init_kubernetes_client() -> tuple[client.CoreV1Api, client.NetworkingV1Api]: + """Initialize the Kubernetes client.""" + try: + config.load_incluster_config() # Even local usage with mirrord technically uses an incluster config. + return client.CoreV1Api(), client.NetworkingV1Api() + except Exception as ex: + logger.error( + 'Failed to initialize Kubernetes client. Make sure you have kubectl configured correctly or are running in a Kubernetes cluster.', + ) + raise ex + + @staticmethod + def _cleanup_k8s_resources( + namespace: str, remove_pvc: bool = False, conversation_id: str = '' + ): + """Clean up Kubernetes resources with our prefix in the namespace. + + :param remove_pvc: If True, also remove persistent volume claims (defaults to False). + """ + try: + k8s_api, k8s_networking_api = KubernetesRuntime._init_kubernetes_client() + + pod_name = KubernetesRuntime._get_pod_name(conversation_id) + service_name = KubernetesRuntime._get_svc_name(pod_name) + vscode_service_name = KubernetesRuntime._get_vscode_svc_name(pod_name) + ingress_name = KubernetesRuntime._get_vscode_ingress_name(pod_name) + pvc_name = KubernetesRuntime._get_pvc_name(pod_name) + + try: + if remove_pvc: + # Delete PVC if requested + k8s_api.delete_namespaced_persistent_volume_claim( + name=pvc_name, + namespace=namespace, + body=client.V1DeleteOptions(), + ) + logger.info(f'Deleted PVC {pvc_name}') + + k8s_api.delete_namespaced_pod( + name=pod_name, + namespace=namespace, + body=client.V1DeleteOptions(), + ) + logger.info(f'Deleted pod {pod_name}') + + k8s_api.delete_namespaced_service( + name=service_name, + namespace=namespace, + ) + logger.info(f'Deleted service {service_name}') + # Delete the vs code service + k8s_api.delete_namespaced_service( + name=vscode_service_name, namespace=namespace + ) + logger.info(f'Deleted service {vscode_service_name}') + + k8s_networking_api.delete_namespaced_ingress( + name=ingress_name, namespace=namespace + ) + logger.info(f'Deleted ingress {ingress_name}') + except client.rest.ApiException: + # Service might not exist, ignore + pass + logger.info('Cleaned up Kubernetes resources') + except Exception as e: + logger.error(f'Error cleaning up k8s resources: {e}') + + def _get_pvc_manifest(self): + """Create a PVC manifest for the runtime pod.""" + # Create PVC + pvc = V1PersistentVolumeClaim( + api_version='v1', + kind='PersistentVolumeClaim', + metadata=V1ObjectMeta( + name=self._get_pvc_name(self.pod_name), namespace=self._k8s_namespace + ), + spec=V1PersistentVolumeClaimSpec( + access_modes=['ReadWriteOnce'], + resources=client.V1ResourceRequirements( + requests={'storage': self._k8s_config.pvc_storage_size} + ), + storage_class_name=self._k8s_config.pvc_storage_class, + ), + ) + + return pvc + + def _get_vscode_service_manifest(self): + """Create a service manifest for the VSCode server.""" + + vscode_service_spec = V1ServiceSpec( + selector={'app': POD_LABEL, 'session': self.sid}, + type='ClusterIP', + ports=[ + V1ServicePort( + port=self._vscode_port, + target_port='vscode', + name='code', + ) + ], + ) + + vscode_service = V1Service( + metadata=V1ObjectMeta(name=self._get_vscode_svc_name(self.pod_name)), + spec=vscode_service_spec, + ) + return vscode_service + + def _get_runtime_service_manifest(self): + """Create a service manifest for the runtime pod execution-server.""" + service_spec = V1ServiceSpec( + selector={'app': POD_LABEL, 'session': self.sid}, + type='ClusterIP', + ports=[ + V1ServicePort( + port=self._container_port, + target_port='http', + name='execution-server', + ) + ], + ) + + service = V1Service( + metadata=V1ObjectMeta(name=self._get_svc_name(self.pod_name)), + spec=service_spec, + ) + return service + + def _get_runtime_pod_manifest(self): + """Create a pod manifest for the runtime sandbox.""" + # Prepare environment variables + environment = [ + V1EnvVar(name='port', value=str(self._container_port)), + V1EnvVar(name='PYTHONUNBUFFERED', value='1'), + V1EnvVar(name='VSCODE_PORT', value=str(self._vscode_port)), + ] + + if self.config.debug or DEBUG: + environment.append(V1EnvVar(name='DEBUG', value='true')) + + # Add runtime startup env vars + for key, value in self.config.sandbox.runtime_startup_env_vars.items(): + environment.append(V1EnvVar(name=key, value=value)) + + # Prepare volume mounts if workspace is configured + volume_mounts = [ + V1VolumeMount( + name='workspace-volume', + mount_path=self.config.workspace_mount_path_in_sandbox, + ), + ] + volumes = [ + V1Volume( + name='workspace-volume', + persistent_volume_claim=V1PersistentVolumeClaimVolumeSource( + claim_name=self._get_pvc_name(self.pod_name) + ), + ) + ] + + # Prepare container ports + container_ports = [ + V1ContainerPort(container_port=self._container_port, name='http'), + ] + + if self.vscode_enabled: + container_ports.append( + V1ContainerPort(container_port=self._vscode_port, name='vscode') + ) + + for port in self._app_ports: + container_ports.append(V1ContainerPort(container_port=port)) + + # Define the readiness probe + health_check = client.V1Probe( + http_get=client.V1HTTPGetAction( + path='/alive', + port=self._container_port, # Or the port your application listens on + ), + initial_delay_seconds=5, # Adjust as needed + period_seconds=10, # Adjust as needed + timeout_seconds=5, # Adjust as needed + success_threshold=1, + failure_threshold=3, + ) + # Prepare command + # Entry point command for generated sandbox runtime pod. + command = get_action_execution_server_startup_command( + server_port=self._container_port, + plugins=self.plugins, + app_config=self.config, + override_user_id=0, # if we use the default of app_config.run_as_openhands then we cant edit files in vscode due to file perms. + override_username='root', + ) + + # Prepare resource requirements based on config + resources = V1ResourceRequirements( + limits={'memory': self._k8s_config.resource_memory_limit}, + requests={ + 'cpu': self._k8s_config.resource_cpu_request, + 'memory': self._k8s_config.resource_memory_request, + }, + ) + + # Set security context for the container + security_context = V1SecurityContext(privileged=self._k8s_config.privileged) + + # Create the container definition + container = V1Container( + name='runtime', + image=self.pod_image, + command=command, + env=environment, + ports=container_ports, + volume_mounts=volume_mounts, + working_dir='/openhands/code/', + resources=resources, + readiness_probe=health_check, + security_context=security_context, + ) + + # Create the pod definition + image_pull_secrets = None + if self._k8s_config.image_pull_secret: + image_pull_secrets = [ + client.V1LocalObjectReference(name=self._k8s_config.image_pull_secret) + ] + pod = V1Pod( + metadata=V1ObjectMeta( + name=self.pod_name, labels={'app': POD_LABEL, 'session': self.sid} + ), + spec=V1PodSpec( + containers=[container], + volumes=volumes, + restart_policy='Never', + image_pull_secrets=image_pull_secrets, + node_selector=self.node_selector, + tolerations=self.tolerations, + ), + ) + + return pod + + def _get_vscode_ingress_manifest(self): + """Create an ingress manifest for the VSCode server.""" + + tls = [] + if self._k8s_config.ingress_tls_secret: + runtime_tls = V1IngressTLS( + hosts=[self.ingress_domain], + secret_name=self._k8s_config.ingress_tls_secret, + ) + tls = [runtime_tls] + + rules = [ + V1IngressRule( + host=self.ingress_domain, + http=V1HTTPIngressRuleValue( + paths=[ + V1HTTPIngressPath( + path='/', + path_type='Prefix', + backend=V1IngressBackend( + service=V1IngressServiceBackend( + port=V1ServiceBackendPort( + number=self._vscode_port, + ), + name=self._get_vscode_svc_name(self.pod_name), + ) + ), + ) + ] + ), + ) + ] + ingress_spec = V1IngressSpec(rules=rules, tls=tls) + + ingress = V1Ingress( + api_version='networking.k8s.io/v1', + metadata=V1ObjectMeta( + name=self._get_vscode_ingress_name(self.pod_name), + annotations={ + 'external-dns.alpha.kubernetes.io/hostname': self.ingress_domain + }, + ), + spec=ingress_spec, + ) + + return ingress + + def _pvc_exists(self): + """Check if the PVC already exists.""" + try: + pvc = self.k8s_client.read_namespaced_persistent_volume_claim( + name=self._get_pvc_name(self.pod_name), namespace=self._k8s_namespace + ) + return pvc is not None + except client.rest.ApiException as e: + if e.status == 404: + return False + self.log('error', f'Error checking PVC existence: {e}') + + def _init_k8s_resources(self): + """Initialize the Kubernetes resources.""" + self.log('info', 'Preparing to start pod...') + self.set_runtime_status(RuntimeStatus.STARTING_RUNTIME) + + self.log('info', f'Runtime will be accessible at {self.api_url}') + + pod = self._get_runtime_pod_manifest() + service = self._get_runtime_service_manifest() + vscode_service = self._get_vscode_service_manifest() + pvc_manifest = self._get_pvc_manifest() + ingress = self._get_vscode_ingress_manifest() + + # Create the pod in Kubernetes + try: + if not self._pvc_exists(): + # Create PVC if it doesn't exist + self.k8s_client.create_namespaced_persistent_volume_claim( + namespace=self._k8s_namespace, body=pvc_manifest + ) + self.log('info', f'Created PVC {self._get_pvc_name(self.pod_name)}') + self.k8s_client.create_namespaced_pod( + namespace=self._k8s_namespace, body=pod + ) + self.log('info', f'Created pod {self.pod_name}.') + # Create a service to expose the pod for external access + self.k8s_client.create_namespaced_service( + namespace=self._k8s_namespace, body=service + ) + self.log('info', f'Created service {self._get_svc_name(self.pod_name)}') + + # Create second service service for the vscode server. + self.k8s_client.create_namespaced_service( + namespace=self._k8s_namespace, body=vscode_service + ) + self.log( + 'info', f'Created service {self._get_vscode_svc_name(self.pod_name)}' + ) + + # create the vscode ingress. + self.k8s_networking_client.create_namespaced_ingress( + namespace=self._k8s_namespace, body=ingress + ) + self.log( + 'info', + f'Created ingress {self._get_vscode_ingress_name(self.pod_name)}', + ) + + # Wait for the pod to be running + self._wait_until_ready() + + except client.rest.ApiException as e: + self.log('error', f'Failed to create pod and services: {e}') + raise + except RuntimeError as e: + self.log('error', f'Port forwarding failed: {e}') + raise + + def close(self): + """Close the runtime and clean up resources.""" + # this is called when a single conversation question is answered or a tab is closed. + self.log( + 'info', + f'Closing runtime and cleaning up resources for conersation ID: {self.sid}', + ) + # Call parent class close method first + super().close() + + # Return early if we should keep the runtime alive or if we're attaching to existing + if self.config.sandbox.keep_runtime_alive or self.attach_to_existing: + self.log( + 'info', 'Keeping runtime alive due to configuration or attach mode' + ) + return + + try: + self._cleanup_k8s_resources( + namespace=self._k8s_namespace, + remove_pvc=False, + conversation_id=self.sid, + ) + except Exception as e: + self.log('error', f'Error closing runtime: {e}') + + @property + def ingress_domain(self) -> str: + """Get the ingress domain for the runtime.""" + return f'{self.sid}.{self._k8s_config.ingress_domain}' + + @property + def vscode_url(self) -> str | None: + """Get the URL for VSCode server if enabled.""" + if not self.vscode_enabled: + return None + token = super().get_vscode_token() + if not token: + return None + + protocol = 'https' if self._k8s_config.ingress_tls_secret else 'http' + vscode_url = f'{protocol}://{self.ingress_domain}/?tkn={token}&folder={self.config.workspace_mount_path_in_sandbox}' + self.log('info', f'VSCode URL: {vscode_url}') + return vscode_url + + @property + def web_hosts(self) -> dict[str, int]: + """Get web hosts dict mapping for browser access.""" + hosts = {} + for idx, port in enumerate(self._app_ports): + hosts[f'{self.k8s_local_url}:{port}'] = port + return hosts + + @classmethod + async def delete(cls, conversation_id: str): + """Delete resources associated with a conversation.""" + # This is triggered when you actually do the delete in the UI on the convo. + try: + cls._cleanup_k8s_resources( + namespace=cls._namespace, + remove_pvc=True, + conversation_id=conversation_id, + ) + + except Exception as e: + logger.error( + f'Error deleting resources for conversation {conversation_id}: {e}' + ) diff --git a/poetry.lock b/poetry.lock index 447235a7f2..6d48e97e7a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. [[package]] name = "aioboto3" @@ -462,7 +462,7 @@ description = "LTS Port of Python audioop" optional = false python-versions = ">=3.13" groups = ["main"] -markers = "python_version == \"3.13\"" +markers = "python_version >= \"3.13\"" files = [ {file = "audioop_lts-0.2.1-cp313-abi3-macosx_10_13_universal2.whl", hash = "sha256:fd1345ae99e17e6910f47ce7d52673c6a1a70820d78b67de1b7abb3af29c426a"}, {file = "audioop_lts-0.2.1-cp313-abi3-macosx_10_13_x86_64.whl", hash = "sha256:e175350da05d2087e12cea8e72a70a1a8b14a17e92ed2022952a4419689ede5e"}, @@ -2299,6 +2299,18 @@ https = ["urllib3 (>=1.24.1)"] paramiko = ["paramiko"] pgp = ["gpg"] +[[package]] +name = "durationpy" +version = "0.10" +description = "Module for converting between datetime.timedelta and Go's Duration strings." +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "durationpy-0.10-py3-none-any.whl", hash = "sha256:3b41e1b601234296b4fb368338fdcd3e13e0b4fb5b67345948f4f2bf9868b286"}, + {file = "durationpy-0.10.tar.gz", hash = "sha256:1fa6893409a6e739c9c72334fc65cca1f355dbdd93405d30f726deb5bde42fba"}, +] + [[package]] name = "e2b" version = "1.5.1" @@ -3039,8 +3051,8 @@ files = [ google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" proto-plus = [ - {version = ">=1.25.0,<2.0.0dev", markers = "python_version >= \"3.13\""}, {version = ">=1.22.3,<2.0.0dev"}, + {version = ">=1.25.0,<2.0.0dev", markers = "python_version >= \"3.13\""}, ] protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0dev" @@ -3062,8 +3074,8 @@ googleapis-common-protos = ">=1.56.2,<2.0.0" grpcio = {version = ">=1.49.1,<2.0.0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""} grpcio-status = {version = ">=1.49.1,<2.0.0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""} proto-plus = [ - {version = ">=1.25.0,<2.0.0", markers = "python_version >= \"3.13\""}, {version = ">=1.22.3,<2.0.0"}, + {version = ">=1.25.0,<2.0.0", markers = "python_version >= \"3.13\""}, ] protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<7.0.0" requests = ">=2.18.0,<3.0.0" @@ -3281,8 +3293,8 @@ google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0", extras google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0" grpc-google-iam-v1 = ">=0.14.0,<1.0.0" proto-plus = [ - {version = ">=1.25.0,<2.0.0", markers = "python_version >= \"3.13\""}, {version = ">=1.22.3,<2.0.0"}, + {version = ">=1.25.0,<2.0.0", markers = "python_version >= \"3.13\""}, ] protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<7.0.0" @@ -4788,6 +4800,34 @@ files = [ {file = "kiwisolver-1.4.8.tar.gz", hash = "sha256:23d5f023bdc8c7e54eb65f03ca5d5bb25b601eac4d7f1a042888a1f45237987e"}, ] +[[package]] +name = "kubernetes" +version = "33.1.0" +description = "Kubernetes python client" +optional = false +python-versions = ">=3.6" +groups = ["main"] +files = [ + {file = "kubernetes-33.1.0-py2.py3-none-any.whl", hash = "sha256:544de42b24b64287f7e0aa9513c93cb503f7f40eea39b20f66810011a86eabc5"}, + {file = "kubernetes-33.1.0.tar.gz", hash = "sha256:f64d829843a54c251061a8e7a14523b521f2dc5c896cf6d65ccf348648a88993"}, +] + +[package.dependencies] +certifi = ">=14.05.14" +durationpy = ">=0.7" +google-auth = ">=1.0.1" +oauthlib = ">=3.2.2" +python-dateutil = ">=2.5.3" +pyyaml = ">=5.4.1" +requests = "*" +requests-oauthlib = "*" +six = ">=1.9.0" +urllib3 = ">=1.24.2" +websocket-client = ">=0.32.0,<0.40.0 || >0.40.0,<0.41.dev0 || >=0.43.dev0" + +[package.extras] +adal = ["adal (>=1.0.2)"] + [[package]] name = "lazy-loader" version = "0.4" @@ -5146,11 +5186,8 @@ files = [ {file = "lxml-5.4.0-cp36-cp36m-win_amd64.whl", hash = "sha256:7ce1a171ec325192c6a636b64c94418e71a1964f56d002cc28122fceff0b6121"}, {file = "lxml-5.4.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:795f61bcaf8770e1b37eec24edf9771b307df3af74d1d6f27d812e15a9ff3872"}, {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:29f451a4b614a7b5b6c2e043d7b64a15bd8304d7e767055e8ab68387a8cacf4e"}, - {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:891f7f991a68d20c75cb13c5c9142b2a3f9eb161f1f12a9489c82172d1f133c0"}, {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4aa412a82e460571fad592d0f93ce9935a20090029ba08eca05c614f99b0cc92"}, - {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:ac7ba71f9561cd7d7b55e1ea5511543c0282e2b6450f122672a2694621d63b7e"}, {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:c5d32f5284012deaccd37da1e2cd42f081feaa76981f0eaa474351b68df813c5"}, - {file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:ce31158630a6ac85bddd6b830cffd46085ff90498b397bd0a259f59d27a12188"}, {file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:31e63621e073e04697c1b2d23fcb89991790eef370ec37ce4d5d469f40924ed6"}, {file = "lxml-5.4.0-cp37-cp37m-win32.whl", hash = "sha256:be2ba4c3c5b7900246a8f866580700ef0d538f2ca32535e991027bdaba944063"}, {file = "lxml-5.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:09846782b1ef650b321484ad429217f5154da4d6e786636c38e434fa32e94e49"}, @@ -6544,8 +6581,8 @@ files = [ [package.dependencies] googleapis-common-protos = ">=1.52,<2.0" grpcio = [ - {version = ">=1.66.2,<2.0.0", markers = "python_version >= \"3.13\""}, {version = ">=1.63.2,<2.0.0", markers = "python_version < \"3.13\""}, + {version = ">=1.66.2,<2.0.0", markers = "python_version >= \"3.13\""}, ] opentelemetry-api = ">=1.15,<2.0" opentelemetry-exporter-otlp-proto-common = "1.34.1" @@ -9308,6 +9345,7 @@ files = [ {file = "setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922"}, {file = "setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c"}, ] +markers = {evaluation = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} [package.extras] check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\"", "ruff (>=0.8.0) ; sys_platform != \"cygwin\""] @@ -9550,7 +9588,7 @@ description = "Standard library aifc redistribution. \"dead battery\"." optional = false python-versions = "*" groups = ["main"] -markers = "python_version == \"3.13\"" +markers = "python_version >= \"3.13\"" files = [ {file = "standard_aifc-3.13.0-py3-none-any.whl", hash = "sha256:f7ae09cc57de1224a0dd8e3eb8f73830be7c3d0bc485de4c1f82b4a7f645ac66"}, {file = "standard_aifc-3.13.0.tar.gz", hash = "sha256:64e249c7cb4b3daf2fdba4e95721f811bde8bdfc43ad9f936589b7bb2fae2e43"}, @@ -9567,7 +9605,7 @@ description = "Standard library chunk redistribution. \"dead battery\"." optional = false python-versions = "*" groups = ["main"] -markers = "python_version == \"3.13\"" +markers = "python_version >= \"3.13\"" files = [ {file = "standard_chunk-3.13.0-py3-none-any.whl", hash = "sha256:17880a26c285189c644bd5bd8f8ed2bdb795d216e3293e6dbe55bbd848e2982c"}, {file = "standard_chunk-3.13.0.tar.gz", hash = "sha256:4ac345d37d7e686d2755e01836b8d98eda0d1a3ee90375e597ae43aaf064d654"}, @@ -10899,7 +10937,7 @@ version = "1.8.0" description = "WebSocket client for Python with low level API options" optional = false python-versions = ">=3.8" -groups = ["runtime"] +groups = ["main", "runtime"] files = [ {file = "websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526"}, {file = "websocket_client-1.8.0.tar.gz", hash = "sha256:3239df9f44da632f96012472805d40a23281a991027ce11d2f45a6f24ac4c3da"}, @@ -11729,4 +11767,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.1" python-versions = "^3.12,<3.14" -content-hash = "df8217d9808a5a1f5886e0328cbeb5032b20c28a677154888bd010f7bc945cb2" +content-hash = "cce67d8303f93acbf92f3a3603ad07ff82fea4163fc8c38614b3ecb172c34052" diff --git a/pyproject.toml b/pyproject.toml index 38f41495ee..c877aa2e56 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -85,6 +85,8 @@ stripe = ">=11.5,<13.0" google-cloud-aiplatform = "*" anthropic = { extras = [ "vertex" ], version = "*" } boto3 = "*" +kubernetes = "^33.1.0" +pyyaml = "^6.0.2" [tool.poetry.group.dev] optional = true diff --git a/tests/unit/test_kubernetes_config.py b/tests/unit/test_kubernetes_config.py new file mode 100644 index 0000000000..3bac845395 --- /dev/null +++ b/tests/unit/test_kubernetes_config.py @@ -0,0 +1,62 @@ +import pytest +from pydantic import ValidationError + +from openhands.core.config.kubernetes_config import KubernetesConfig + + +def test_kubernetes_config_defaults(): + """Test that KubernetesConfig has correct default values.""" + config = KubernetesConfig() + assert config.namespace == 'default' + assert config.ingress_domain == 'localhost' + assert config.pvc_storage_size == '2Gi' + assert config.pvc_storage_class is None + assert config.resource_cpu_request == '1' + assert config.resource_memory_request == '1Gi' + assert config.resource_memory_limit == '2Gi' + assert config.image_pull_secret is None + assert config.ingress_tls_secret is None + assert config.node_selector_key is None + assert config.node_selector_val is None + assert config.tolerations_yaml is None + assert config.privileged is False + + +def test_kubernetes_config_custom_values(): + """Test that KubernetesConfig accepts custom values.""" + config = KubernetesConfig( + namespace='test-ns', + ingress_domain='test.example.com', + pvc_storage_size='5Gi', + pvc_storage_class='fast', + resource_cpu_request='2', + resource_memory_request='2Gi', + resource_memory_limit='4Gi', + image_pull_secret='pull-secret', + ingress_tls_secret='tls-secret', + node_selector_key='zone', + node_selector_val='us-east-1', + tolerations_yaml='- key: special\n value: true', + privileged=True, + ) + + assert config.namespace == 'test-ns' + assert config.ingress_domain == 'test.example.com' + assert config.pvc_storage_size == '5Gi' + assert config.pvc_storage_class == 'fast' + assert config.resource_cpu_request == '2' + assert config.resource_memory_request == '2Gi' + assert config.resource_memory_limit == '4Gi' + assert config.image_pull_secret == 'pull-secret' + assert config.ingress_tls_secret == 'tls-secret' + assert config.node_selector_key == 'zone' + assert config.node_selector_val == 'us-east-1' + assert config.tolerations_yaml == '- key: special\n value: true' + assert config.privileged is True + + +def test_kubernetes_config_validation(): + """Test that KubernetesConfig validates input correctly.""" + # Test that extra fields are not allowed + with pytest.raises(ValidationError): + KubernetesConfig(extra_field='not allowed')