fix(GPU): persist GPU type to KV store for reliable passthrough

chriscrosstalk · claude · jakeaturner · commit fe08fc0e28fa · 2026-03-20T10:13:54.000-07:00
GPU detection results were only applied at container creation time and
never persisted. If live detection failed transiently (Docker daemon
hiccup, runtime temporarily unavailable), Ollama would silently fall
back to CPU-only mode with no way to recover short of force-reinstall.

Now _detectGPUType() persists successful detections to the KV store
(gpu.type = 'nvidia' | 'amd') and uses the saved value as a fallback
when live detection returns nothing. This ensures GPU config survives
across container recreations regardless of transient detection failures.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/admin/app/services/docker_service.ts b/admin/app/services/docker_service.ts
@@ -691,6 +691,7 @@ export class DockerService {
         const runtimes = dockerInfo.Runtimes || {}
         if ('nvidia' in runtimes) {
           logger.info('[DockerService] NVIDIA container runtime detected via Docker API')
+          await this._persistGPUType('nvidia')
           return { type: 'nvidia' }
         }
       } catch (error) {
@@ -722,12 +723,26 @@ export class DockerService {
         )
         if (amdCheck.trim()) {
           logger.info('[DockerService] AMD GPU detected via lspci')
+          await this._persistGPUType('amd')
           return { type: 'amd' }
         }
       } catch (error) {
         // lspci not available, continue
       }
 
+      // Last resort: check if we previously detected a GPU and it's likely still present.
+      // This handles cases where live detection fails transiently (e.g., Docker daemon
+      // hiccup, runtime temporarily unavailable) but the hardware hasn't changed.
+      try {
+        const savedType = await KVStore.getValue('gpu.type')
+        if (savedType === 'nvidia' || savedType === 'amd') {
+          logger.info(`[DockerService] No GPU detected live, but KV store has '${savedType}' from previous detection. Using saved value.`)
+          return { type: savedType as 'nvidia' | 'amd' }
+        }
+      } catch {
+        // KV store not available, continue
+      }
+
       logger.info('[DockerService] No GPU detected')
       return { type: 'none' }
     } catch (error) {
@@ -736,6 +751,15 @@ export class DockerService {
     }
   }
 
+  private async _persistGPUType(type: 'nvidia' | 'amd'): Promise<void> {
+    try {
+      await KVStore.setValue('gpu.type', type)
+      logger.info(`[DockerService] Persisted GPU type '${type}' to KV store`)
+    } catch (error) {
+      logger.warn(`[DockerService] Failed to persist GPU type: ${error.message}`)
+    }
+  }
+
   /**
    * Discover AMD GPU DRI devices dynamically.
    * Returns an array of device configurations for Docker.
diff --git a/admin/types/kv_store.ts b/admin/types/kv_store.ts
@@ -9,6 +9,7 @@ export const KV_STORE_SCHEMA = {
   'ui.hasVisitedEasySetup':     'boolean',
   'ui.theme':                   'string',
   'ai.assistantCustomName':     'string',
+  'gpu.type':                   'string',
 } as const
 
 type KVTagToType<T extends string> = T extends 'boolean' ? boolean : string