From 94c98c77d12c14b49003fdda796debd30d6f10f4 Mon Sep 17 00:00:00 2001 From: Philip Haupt <“der.mad.mob@gmail.com”> Date: Tue, 10 Feb 2026 01:00:05 +0100 Subject: [PATCH] added ollama --- ollama/kustomization.yaml | 6 + ollama/main.yaml | 150 +++++++++++ ollama/src/kustomization.yaml | 12 + ollama/src/values.yaml | 452 ++++++++++++++++++++++++++++++++++ 4 files changed, 620 insertions(+) create mode 100644 ollama/kustomization.yaml create mode 100644 ollama/main.yaml create mode 100644 ollama/src/kustomization.yaml create mode 100644 ollama/src/values.yaml diff --git a/ollama/kustomization.yaml b/ollama/kustomization.yaml new file mode 100644 index 0000000..4ae436c --- /dev/null +++ b/ollama/kustomization.yaml @@ -0,0 +1,6 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - main.yaml \ No newline at end of file diff --git a/ollama/main.yaml b/ollama/main.yaml new file mode 100644 index 0000000..13a0f75 --- /dev/null +++ b/ollama/main.yaml @@ -0,0 +1,150 @@ +apiVersion: v1 +automountServiceAccountToken: true +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/instance: ollama + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: ollama + app.kubernetes.io/version: 0.15.4 + helm.sh/chart: ollama-1.41.0 + name: ollama + namespace: ollama +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/instance: ollama + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: ollama + app.kubernetes.io/version: 0.15.4 + helm.sh/chart: ollama-1.41.0 + name: ollama + namespace: ollama +spec: + ports: + - name: http + port: 11434 + protocol: TCP + targetPort: http + selector: + app.kubernetes.io/instance: ollama + app.kubernetes.io/name: ollama + type: ClusterIP +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + labels: + app.kubernetes.io/instance: ollama + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: ollama + app.kubernetes.io/version: 0.15.4 + helm.sh/chart: ollama-1.41.0 + name: ollama + namespace: ollama +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 30Gi + storageClassName: openebs-3-replicas +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app.kubernetes.io/instance: ollama + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: ollama + app.kubernetes.io/version: 0.15.4 + helm.sh/chart: ollama-1.41.0 + name: ollama + namespace: ollama +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/instance: ollama + app.kubernetes.io/name: ollama + strategy: + type: Recreate + template: + metadata: + labels: + app.kubernetes.io/instance: ollama + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: ollama + app.kubernetes.io/version: 0.15.4 + helm.sh/chart: ollama-1.41.0 + spec: + containers: + - args: null + env: + - name: OLLAMA_HOST + value: 0.0.0.0:11434 + envFrom: null + image: ollama/ollama:0.15.4 + imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 6 + httpGet: + path: / + port: http + initialDelaySeconds: 60 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 5 + name: ollama + ports: + - containerPort: 11434 + name: http + protocol: TCP + readinessProbe: + failureThreshold: 6 + httpGet: + path: / + port: http + initialDelaySeconds: 30 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 3 + resources: + limits: {} + requests: {} + securityContext: {} + volumeMounts: + - mountPath: /root/.ollama + name: ollama-data + securityContext: {} + serviceAccountName: ollama + terminationGracePeriodSeconds: 120 + volumes: + - name: ollama-data + persistentVolumeClaim: + claimName: ollama +--- +apiVersion: v1 +kind: Pod +metadata: + annotations: + helm.sh/hook: test + labels: + app.kubernetes.io/instance: ollama + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: ollama + app.kubernetes.io/version: 0.15.4 + helm.sh/chart: ollama-1.41.0 + name: ollama-test-connection + namespace: ollama +spec: + containers: + - args: + - ollama:11434 + command: + - wget + image: busybox + name: wget + restartPolicy: Never diff --git a/ollama/src/kustomization.yaml b/ollama/src/kustomization.yaml new file mode 100644 index 0000000..03cdc96 --- /dev/null +++ b/ollama/src/kustomization.yaml @@ -0,0 +1,12 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +helmCharts: + - name: ollama + repo: https://otwld.github.io/ollama-helm/ + version: 1.41.0 + releaseName: ollama + includeCRDs: true + namespace: ollama + valuesFile: values.yaml diff --git a/ollama/src/values.yaml b/ollama/src/values.yaml new file mode 100644 index 0000000..a39c37b --- /dev/null +++ b/ollama/src/values.yaml @@ -0,0 +1,452 @@ +# Default values for ollama-helm. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# -- Number of replicas +replicaCount: 1 + +# Knative configuration +knative: + # -- Enable Knative integration + enabled: false + # -- Knative service container concurrency + containerConcurrency: 0 + # -- Knative service timeout seconds + timeoutSeconds: 300 + # -- Knative service response start timeout seconds + responseStartTimeoutSeconds: 300 + # -- Knative service idle timeout seconds + idleTimeoutSeconds: 300 + # -- Knative service annotations + annotations: {} + +# Docker image +image: + # -- Docker image registry + repository: ollama/ollama + + # -- Docker pull policy + pullPolicy: IfNotPresent + + # -- Docker image tag, overrides the image tag whose default is the chart appVersion. + tag: "" + +# -- Docker registry secret names as an array +imagePullSecrets: [] + +# -- String to partially override template (will maintain the release name) +nameOverride: "" + +# -- String to fully override template +fullnameOverride: "" + +# -- String to fully override namespace +namespaceOverride: "" + +# Ollama parameters +ollama: + # Port Ollama is listening on + port: 11434 + + gpu: + # -- Enable GPU integration + enabled: false + + # -- Enable DRA GPU integration + # If enabled, it will use DRA instead of Device Driver Plugin and create a ResourceClaim and GpuClaimParameters + draEnabled: false + + # -- DRA GPU DriverClass + draDriverClass: "gpu.nvidia.com" + + # -- Existing DRA GPU ResourceClaim Template + draExistingClaimTemplate: "" + + # -- GPU type: 'nvidia' or 'amd' + # If 'ollama.gpu.enabled', default value is nvidia + # If set to 'amd', this will add 'rocm' suffix to image tag if 'image.tag' is not override + # This is due cause AMD and CPU/CUDA are different images + type: 'nvidia' + + # -- Specify the number of GPU + # If you use MIG section below then this parameter is ignored + number: 1 + + # -- only for nvidia cards; change to (example) 'nvidia.com/mig-1g.10gb' to use MIG slice + nvidiaResource: "nvidia.com/gpu" + # nvidiaResource: "nvidia.com/mig-1g.10gb" # example + # If you want to use more than one NVIDIA MIG you can use the following syntax (then nvidiaResource is ignored and only the configuration in the following MIG section is used) + + mig: + # -- Enable multiple mig devices + # If enabled you will have to specify the mig devices + # If enabled is set to false this section is ignored + enabled: false + + # -- Specify the mig devices and the corresponding number + devices: {} + # 1g.10gb: 1 + # 3g.40gb: 1 + + models: + # -- List of models to pull at container startup + # The more you add, the longer the container will take to start if models are not present + # pull: + # - llama2 + # - mistral + pull: [] + + # -- List of models to load in memory at container startup + # run: + # - llama2 + # - mistral + run: [] + + # -- List of models to create at container startup, there are two options + # 1. Create a raw model + # 2. Load a model from configMaps, configMaps must be created before and are loaded as volume in "/models" directory. + # create: + # - name: llama3.1-ctx32768 + # configMapRef: my-configmap + # configMapKeyRef: configmap-key + # - name: llama3.1-ctx32768 + # template: | + # FROM llama3.1 + # PARAMETER num_ctx 32768 + create: [] + + # -- Automatically remove models present on the disk but not specified in the values file + clean: false + + # -- Add insecure flag for pulling at container startup + insecure: false + + # -- Override ollama-data volume mount path, default: "/root/.ollama" + mountPath: "" + +# Service account +# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ +serviceAccount: + # -- Specifies whether a service account should be created + create: true + + # -- Automatically mount a ServiceAccount's API credentials? + automount: true + + # -- Annotations to add to the service account + annotations: {} + + # -- The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: "" + +# -- Map of annotations to add to the pods +podAnnotations: {} + +# -- Map of labels to add to the pods +podLabels: {} + +# -- Pod Security Context +podSecurityContext: {} + # fsGroup: 2000 + +# -- Priority Class Name +priorityClassName: "" + +# -- Container Security Context +securityContext: {} + # capabilities: + # drop: + # - ALL + # readOnlyRootFilesystem: true + # runAsNonRoot: true + # runAsUser: 1000 + +# -- Specify runtime class +runtimeClassName: "" + +# Configure Service +service: + + # -- Service type + type: ClusterIP + + # -- Service port + port: 11434 + + # -- Service node port when service type is 'NodePort' + nodePort: 31434 + + # -- Load Balancer IP address + loadBalancerIP: + + # -- Annotations to add to the service + annotations: {} + + # -- Labels to add to the service + labels: {} + + # -- IP Families for the service + ipFamilies: [] + # - IPv4 + # - IPv6 + + # -- IP Family Policy for the service + ipFamilyPolicy: "" + # SingleStack + # PreferDualStack + # RequireDualStack + +# Configure Deployment +deployment: + + # -- Labels to add to the deployment + labels: {} + +# Configure the ingress resource that allows you to access the +ingress: + # -- Enable ingress controller resource + enabled: false + + # -- IngressClass that will be used to implement the Ingress (Kubernetes 1.18+) + className: "" + + # -- Additional annotations for the Ingress resource. + annotations: {} + # kubernetes.io/ingress.class: traefik + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + + # The list of hostnames to be covered with this ingress record. + hosts: + - host: ollama.local + paths: + - path: / + pathType: Prefix + + # -- The tls configuration for hostnames to be covered with this ingress record. + tls: [] + # - secretName: chart-example-tls + # hosts: + # - chart-example.local + +# Configure resource requests and limits +# ref: http://kubernetes.io/docs/user-guide/compute-resources/ +resources: + # -- Pod requests + requests: {} + # Memory request + # memory: 4096Mi + + # CPU request + # cpu: 2000m + + # -- Pod limit + limits: {} + # Memory limit + # memory: 8192Mi + + # CPU limit + # cpu: 4000m + +# Configure extra options for liveness probe +# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes +livenessProbe: + # -- Enable livenessProbe + enabled: true + + # -- Request path for livenessProbe + path: / + + # -- Initial delay seconds for livenessProbe + initialDelaySeconds: 60 + + # -- Period seconds for livenessProbe + periodSeconds: 10 + + # -- Timeout seconds for livenessProbe + timeoutSeconds: 5 + + # -- Failure threshold for livenessProbe + failureThreshold: 6 + + # -- Success threshold for livenessProbe + successThreshold: 1 + +# Configure extra options for readiness probe +# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes +readinessProbe: + # -- Enable readinessProbe + enabled: true + + # -- Request path for readinessProbe + path: / + + # -- Initial delay seconds for readinessProbe + initialDelaySeconds: 30 + + # -- Period seconds for readinessProbe + periodSeconds: 5 + + # -- Timeout seconds for readinessProbe + timeoutSeconds: 3 + + # -- Failure threshold for readinessProbe + failureThreshold: 6 + + # -- Success threshold for readinessProbe + successThreshold: 1 + +# Configure autoscaling +autoscaling: + # -- Enable autoscaling + enabled: false + + # -- Number of minimum replicas + minReplicas: 1 + + # -- Number of maximum replicas + maxReplicas: 100 + + # -- CPU usage to target replica + targetCPUUtilizationPercentage: 80 + + # -- targetMemoryUtilizationPercentage: 80 + +# -- Additional volumes on the output Deployment definition. +volumes: [] +# -- - name: foo +# secret: +# secretName: mysecret +# optional: false + +# -- Additional volumeMounts on the output Deployment definition. +volumeMounts: [] +# -- - name: foo +# mountPath: "/etc/foo" +# readOnly: true + +# -- Additional arguments on the output Deployment definition. +extraArgs: [] + +# -- Additional environments variables on the output Deployment definition. +# For extra OLLAMA env, please refer to https://github.com/ollama/ollama/blob/main/envconfig/config.go +extraEnv: [] +# - name: OLLAMA_DEBUG +# value: "1" + +# -- Additionl environment variables from external sources (like ConfigMap) +extraEnvFrom: [] +# - configMapRef: +# name: my-env-configmap + +# Enable persistence using Persistent Volume Claims +# ref: https://kubernetes.io/docs/concepts/storage/persistent-volumes/ +persistentVolume: + # -- Enable persistence using PVC + enabled: true + + # -- Ollama server data Persistent Volume access modes + # Must match those of existing PV or dynamic provisioner + # Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/ + accessModes: + - ReadWriteOnce + + # -- Ollama server data Persistent Volume annotations + annotations: {} + + # -- If you'd like to bring your own PVC for persisting Ollama state, pass the name of the + # created + ready PVC here. If set, this Chart will not create the default PVC. + # Requires server.persistentVolume.enabled: true + existingClaim: "" + + # -- Ollama server data Persistent Volume size + size: 30Gi + + # -- Ollama server data Persistent Volume Storage Class + # If defined, storageClassName: + # If set to "-", storageClassName: "", which disables dynamic provisioning + # If undefined (the default) or set to null, no storageClassName spec is + # set, choosing the default provisioner. (gp2 on AWS, standard on + # GKE, AWS & OpenStack) + storageClass: openebs-3-replicas + + # -- Ollama server data Persistent Volume Binding Mode + # If defined, volumeMode: + # If empty (the default) or set to null, no volumeBindingMode spec is + # set, choosing the default mode. + volumeMode: "" + + # -- Subdirectory of Ollama server data Persistent Volume to mount + # Useful if the volume's root directory is not empty + subPath: "" + + # -- Pre-existing PV to attach this claim to + # Useful if a CSI auto-provisions a PV for you and you want to always + # reference the PV moving forward + volumeName: "" + +# -- Node labels for pod assignment. +nodeSelector: {} + +# -- Tolerations for pod assignment +tolerations: [] + +# -- Affinity for pod assignment +affinity: {} + +# -- Lifecycle for pod assignment (override ollama.models startup pull/run) +lifecycle: {} + +# How to replace existing pods +updateStrategy: + # -- Deployment strategy can be "Recreate" or "RollingUpdate". Default is Recreate + type: "Recreate" + +# -- Topology Spread Constraints for pod assignment +topologySpreadConstraints: {} + +# -- Wait for a grace period +terminationGracePeriodSeconds: 120 + +# -- Init containers to add to the pod +initContainers: [] +# - name: startup-tool +# image: alpine:3 +# command: [sh, -c] +# args: +# - echo init + +# -- Use the host’s ipc namespace. +hostIPC: false + +# -- Use the host’s pid namespace +hostPID: false + +# -- Use the host's network namespace. +hostNetwork: false + +# -- Extra K8s manifests to deploy +extraObjects: [] +# - apiVersion: v1 +# kind: PersistentVolume +# metadata: +# name: aws-efs +# data: +# key: "value" +# - apiVersion: scheduling.k8s.io/v1 +# kind: PriorityClass +# metadata: +# name: high-priority +# value: 1000000 +# globalDefault: false +# description: "This priority class should be used for XYZ service pods only." + +# Test connection pods +tests: + enabled: true + # -- Labels to add to the tests + labels: {} + # -- Annotations to add to the tests + annotations: {} +