fix(benchmark): Fix AI benchmark connectivity and improve error handling

chriscrosstalk · claude · jakeaturner · commit e31f956289b9 · 2026-01-24T15:27:56.000-08:00
- Add OLLAMA_API_URL environment variable for Docker networking
- Use host.docker.internal to reach Ollama from NOMAD container
- Add extra_hosts config in compose for Linux compatibility
- Add downloading_ai_model status with clear progress indicator
- Show model download progress on first AI benchmark run
- Fail AI-only benchmarks with clear error if AI unavailable
- Display benchmark errors to users via Alert component
- Improve error messages with error codes for debugging

Fixes issue where AI benchmark silently failed due to NOMAD container
being unable to reach Ollama at localhost:11434.

Co-Authored-By: Claude Opus 4.5 &lt;noreply@anthropic.com&gt;
diff --git a/admin/app/services/benchmark_service.ts b/admin/app/services/benchmark_service.ts
@@ -44,6 +44,10 @@ const BENCHMARK_CHANNEL = 'benchmark-progress'
 const AI_BENCHMARK_MODEL = 'llama3.2:1b'
 const AI_BENCHMARK_PROMPT = 'Explain recursion in programming in exactly 100 words.'
 
+// Ollama API URL - configurable for Docker environments where localhost doesn't reach the host
+// In Docker, use host.docker.internal (Docker Desktop) or the host gateway IP (Linux)
+const OLLAMA_API_URL = process.env.OLLAMA_API_URL || 'http://host.docker.internal:11434'
+
 // Reference scores for normalization (calibrated to 0-100 scale)
 // These represent "expected" scores for a mid-range system (score ~50)
 const REFERENCE_SCORES = {
@@ -280,8 +284,12 @@ export class BenchmarkService {
         try {
           aiScores = await this._runAIBenchmark()
         } catch (error) {
+          // For AI-only benchmarks, failing is fatal - don't save useless results with all zeros
+          if (type === 'ai') {
+            throw new Error(`AI benchmark failed: ${error.message}. Make sure AI Assistant is installed and running.`)
+          }
+          // For full benchmarks, AI is optional - continue without it
           logger.warn(`AI benchmark skipped: ${error.message}`)
-          // AI benchmark is optional, continue without it
         }
       }
 
@@ -362,31 +370,36 @@ export class BenchmarkService {
 
     // Check if Ollama is available
     try {
-      await axios.get('http://localhost:11434/api/tags', { timeout: 5000 })
-    } catch {
-      throw new Error('Ollama is not running or not accessible')
+      await axios.get(`${OLLAMA_API_URL}/api/tags`, { timeout: 5000 })
+    } catch (error) {
+      const errorCode = error.code || error.response?.status || 'unknown'
+      throw new Error(`Ollama is not running or not accessible (${errorCode}). Ensure AI Assistant is installed and running.`)
     }
 
     // Check if the benchmark model is available, pull if not
-    try {
-      const modelsResponse = await axios.get('http://localhost:11434/api/tags')
-      const models = modelsResponse.data.models || []
-      const hasModel = models.some((m: any) => m.name === AI_BENCHMARK_MODEL || m.name.startsWith(AI_BENCHMARK_MODEL.split(':')[0]))
-
-      if (!hasModel) {
-        this._updateStatus('running_ai', `Pulling benchmark model ${AI_BENCHMARK_MODEL}...`)
-        await axios.post('http://localhost:11434/api/pull', { name: AI_BENCHMARK_MODEL })
+    const modelsResponse = await axios.get(`${OLLAMA_API_URL}/api/tags`)
+    const models = modelsResponse.data.models || []
+    const hasModel = models.some((m: any) => m.name === AI_BENCHMARK_MODEL || m.name.startsWith(AI_BENCHMARK_MODEL.split(':')[0]))
+
+    if (!hasModel) {
+      this._updateStatus('downloading_ai_model', `Downloading AI benchmark model (${AI_BENCHMARK_MODEL})... This may take a few minutes on first run.`)
+      logger.info(`[BenchmarkService] Model ${AI_BENCHMARK_MODEL} not found, downloading...`)
+
+      try {
+        // Model pull can take several minutes, use longer timeout
+        await axios.post(`${OLLAMA_API_URL}/api/pull`, { name: AI_BENCHMARK_MODEL }, { timeout: 600000 })
+        logger.info(`[BenchmarkService] Model ${AI_BENCHMARK_MODEL} downloaded successfully`)
+      } catch (pullError) {
+        throw new Error(`Failed to download AI benchmark model (${AI_BENCHMARK_MODEL}): ${pullError.message}`)
       }
-    } catch (error) {
-      logger.warn(`Could not check/pull model: ${error.message}`)
     }
 
     // Run inference benchmark
     const startTime = Date.now()
 
     try {
       const response = await axios.post(
-        'http://localhost:11434/api/generate',
+        `${OLLAMA_API_URL}/api/generate`,
         {
           model: AI_BENCHMARK_MODEL,
           prompt: AI_BENCHMARK_PROMPT,
@@ -694,6 +707,7 @@ export class BenchmarkService {
       running_memory: 40,
       running_disk_read: 55,
       running_disk_write: 70,
+      downloading_ai_model: 80,
       running_ai: 85,
       calculating_score: 95,
       completed: 100,
@@ -714,6 +728,7 @@ export class BenchmarkService {
       running_memory: 'Memory Benchmark',
       running_disk_read: 'Disk Read Test',
       running_disk_write: 'Disk Write Test',
+      downloading_ai_model: 'Downloading AI Model',
       running_ai: 'AI Inference Test',
       calculating_score: 'Calculating Score',
       completed: 'Complete',
diff --git a/admin/inertia/pages/settings/benchmark.tsx b/admin/inertia/pages/settings/benchmark.tsx
@@ -134,6 +134,8 @@ export default function BenchmarkPage(props: {
       { status: 'running_memory', progress: 40, message: 'Running memory benchmark...', label: 'Memory Benchmark', duration: 8000 },
       { status: 'running_disk_read', progress: 55, message: 'Running disk read benchmark (30s)...', label: 'Disk Read Test', duration: 35000 },
       { status: 'running_disk_write', progress: 70, message: 'Running disk write benchmark (30s)...', label: 'Disk Write Test', duration: 35000 },
+      { status: 'downloading_ai_model', progress: 80, message: 'Downloading AI benchmark model (first run only)...', label: 'Downloading AI Model', duration: 5000 },
+      { status: 'running_ai', progress: 85, message: 'Running AI inference benchmark...', label: 'AI Inference Test', duration: 15000 },
       { status: 'calculating_score', progress: 95, message: 'Calculating NOMAD score...', label: 'Calculating Score', duration: 2000 },
     ]
 
@@ -202,6 +204,7 @@ export default function BenchmarkPage(props: {
       running_memory: 40,
       running_disk_read: 55,
       running_disk_write: 70,
+      downloading_ai_model: 80,
       running_ai: 85,
       calculating_score: 95,
       completed: 100,
@@ -256,6 +259,16 @@ export default function BenchmarkPage(props: {
                 </div>
               ) : (
                 <div className="space-y-6">
+                  {progress?.status === 'error' && (
+                    <Alert
+                      type="error"
+                      title="Benchmark Failed"
+                      message={progress.message}
+                      variant="bordered"
+                      dismissible
+                      onDismiss={() => setProgress(null)}
+                    />
+                  )}
                   <p className="text-desert-stone-dark">
                     Run a benchmark to measure your system's CPU, memory, disk, and AI inference performance.
                     The benchmark takes approximately 2-5 minutes to complete.
diff --git a/admin/types/benchmark.ts b/admin/types/benchmark.ts
@@ -12,6 +12,7 @@ export type BenchmarkStatus =
   | 'running_memory'
   | 'running_disk_read'
   | 'running_disk_write'
+  | 'downloading_ai_model'
   | 'running_ai'
   | 'calculating_score'
   | 'completed'
diff --git a/install/management_compose.yaml b/install/management_compose.yaml
@@ -5,6 +5,8 @@ services:
     pull_policy: always
     container_name: nomad_admin
     restart: unless-stopped
+    extra_hosts:
+      - "host.docker.internal:host-gateway"  # Enables host.docker.internal on Linux
     ports:
       - "8080:8080"
     volumes:
@@ -30,6 +32,7 @@ services:
       - DB_SSL=false
       - REDIS_HOST=redis
       - REDIS_PORT=6379
+      - OLLAMA_API_URL=http://host.docker.internal:11434
     depends_on:
       mysql:
         condition: service_healthy