Skip to content

Commit cf8c94d

Browse files
committed
fix(Install): improve Docker GPU configuration
1 parent 4747863 commit cf8c94d

File tree

1 file changed

+105
-3
lines changed

1 file changed

+105
-3
lines changed

install/install_nomad.sh

Lines changed: 105 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,9 @@ ensure_docker_installed() {
204204
}
205205

206206
setup_nvidia_container_toolkit() {
207+
# This function attempts to set up NVIDIA GPU support but is non-blocking
208+
# Any failures will result in warnings but will NOT stop the installation process
209+
207210
echo -e "${YELLOW}#${RESET} Checking for NVIDIA GPU...\\n"
208211

209212
# Safely detect NVIDIA GPU
@@ -265,8 +268,43 @@ setup_nvidia_container_toolkit() {
265268
echo -e "${YELLOW}#${RESET} Configuring Docker to use NVIDIA runtime...\\n"
266269

267270
if ! sudo nvidia-ctk runtime configure --runtime=docker 2>/dev/null; then
268-
echo -e "${YELLOW}#${RESET} Warning: Failed to configure NVIDIA runtime for Docker. Continuing anyway...\\n"
269-
return 0
271+
echo -e "${YELLOW}#${RESET} nvidia-ctk configure failed, attempting manual configuration...\\n"
272+
273+
# Fallback: Manually configure daemon.json
274+
local daemon_json="/etc/docker/daemon.json"
275+
local config_success=false
276+
277+
if [[ -f "$daemon_json" ]]; then
278+
# Backup existing config (best effort)
279+
sudo cp "$daemon_json" "${daemon_json}.backup" 2>/dev/null || true
280+
281+
# Check if nvidia runtime already exists
282+
if ! grep -q '"nvidia"' "$daemon_json" 2>/dev/null; then
283+
# Add nvidia runtime to existing config using jq if available
284+
if command -v jq &> /dev/null; then
285+
if sudo jq '. + {"runtimes": {"nvidia": {"path": "nvidia-container-runtime", "runtimeArgs": []}}}' "$daemon_json" > /tmp/daemon.json.tmp 2>/dev/null; then
286+
if sudo mv /tmp/daemon.json.tmp "$daemon_json" 2>/dev/null; then
287+
config_success=true
288+
fi
289+
fi
290+
# Clean up temp file if move failed
291+
sudo rm -f /tmp/daemon.json.tmp 2>/dev/null || true
292+
else
293+
echo -e "${YELLOW}#${RESET} jq not available, skipping manual daemon.json configuration...\\n"
294+
fi
295+
else
296+
config_success=true # Already configured
297+
fi
298+
else
299+
# Create new daemon.json with nvidia runtime (best effort)
300+
if echo '{"runtimes":{"nvidia":{"path":"nvidia-container-runtime","runtimeArgs":[]}}}' | sudo tee "$daemon_json" > /dev/null 2>&1; then
301+
config_success=true
302+
fi
303+
fi
304+
305+
if ! $config_success; then
306+
echo -e "${YELLOW}#${RESET} Manual daemon.json configuration unsuccessful. GPU support may require manual setup.\\n"
307+
fi
270308
fi
271309

272310
# Restart Docker service
@@ -276,7 +314,18 @@ setup_nvidia_container_toolkit() {
276314
return 0
277315
fi
278316

279-
echo -e "${GREEN}#${RESET} NVIDIA container toolkit configuration completed successfully.\\n"
317+
# Verify NVIDIA runtime is available
318+
echo -e "${YELLOW}#${RESET} Verifying NVIDIA runtime configuration...\\n"
319+
sleep 2 # Give Docker a moment to fully restart
320+
321+
if docker info 2>/dev/null | grep -q "nvidia"; then
322+
echo -e "${GREEN}#${RESET} NVIDIA runtime successfully configured and verified.\\n"
323+
else
324+
echo -e "${YELLOW}#${RESET} Warning: NVIDIA runtime not detected in Docker info. GPU acceleration may not work.\\n"
325+
echo -e "${YELLOW}#${RESET} You may need to manually configure /etc/docker/daemon.json and restart Docker.\\n"
326+
fi
327+
328+
echo -e "${GREEN}#${RESET} NVIDIA container toolkit configuration completed.\\n"
280329
}
281330

282331
get_install_confirmation(){
@@ -489,6 +538,58 @@ get_local_ip() {
489538
exit 1
490539
fi
491540
}
541+
verify_gpu_setup() {
542+
# This function only displays GPU setup status and is completely non-blocking
543+
# It never exits or returns error codes - purely informational
544+
545+
echo -e "\\n${YELLOW}#${RESET} GPU Setup Verification\\n"
546+
echo -e "${YELLOW}===========================================${RESET}\\n"
547+
548+
# Check if NVIDIA GPU is present
549+
if command -v nvidia-smi &> /dev/null; then
550+
echo -e "${GREEN}${RESET} NVIDIA GPU detected:"
551+
nvidia-smi --query-gpu=name,memory.total --format=csv,noheader 2>/dev/null | while read -r line; do
552+
echo -e " ${WHITE_R}$line${RESET}"
553+
done
554+
echo ""
555+
else
556+
echo -e "${YELLOW}${RESET} No NVIDIA GPU detected (nvidia-smi not available)\\n"
557+
fi
558+
559+
# Check if NVIDIA Container Toolkit is installed
560+
if command -v nvidia-ctk &> /dev/null; then
561+
echo -e "${GREEN}${RESET} NVIDIA Container Toolkit installed: $(nvidia-ctk --version 2>/dev/null | head -n1)\\n"
562+
else
563+
echo -e "${YELLOW}${RESET} NVIDIA Container Toolkit not installed\\n"
564+
fi
565+
566+
# Check if Docker has NVIDIA runtime
567+
if docker info 2>/dev/null | grep -q \"nvidia\"; then
568+
echo -e "${GREEN}${RESET} Docker NVIDIA runtime configured\\n"
569+
else
570+
echo -e "${YELLOW}${RESET} Docker NVIDIA runtime not detected\\n"
571+
fi
572+
573+
# Check for AMD GPU
574+
if command -v lspci &> /dev/null; then
575+
if lspci 2>/dev/null | grep -iE "amd|radeon" &> /dev/null; then
576+
echo -e "${YELLOW}${RESET} AMD GPU detected (ROCm support not currently available)\\n"
577+
fi
578+
fi
579+
580+
echo -e "${YELLOW}===========================================${RESET}\\n"
581+
582+
# Summary
583+
if command -v nvidia-smi &> /dev/null && docker info 2>/dev/null | grep -q \"nvidia\"; then
584+
echo -e "${GREEN}#${RESET} GPU acceleration is properly configured! The AI Assistant will use your GPU.\\n"
585+
else
586+
echo -e "${YELLOW}#${RESET} GPU acceleration not detected. The AI Assistant will run in CPU-only mode.\\n"
587+
if command -v nvidia-smi &> /dev/null && ! docker info 2>/dev/null | grep -q \"nvidia\"; then
588+
echo -e "${YELLOW}#${RESET} Tip: Your GPU is detected but Docker runtime is not configured.\\n"
589+
echo -e "${YELLOW}#${RESET} Try restarting Docker: ${WHITE_R}sudo systemctl restart docker${RESET}\\n"
590+
fi
591+
fi
592+
}
492593

493594
success_message() {
494595
echo -e "${GREEN}#${RESET} Project N.O.M.A.D installation completed successfully!\\n"
@@ -525,6 +626,7 @@ download_helper_scripts
525626
download_and_start_collect_disk_info_script
526627
download_management_compose_file
527628
start_management_containers
629+
verify_gpu_setup
528630
success_message
529631

530632
# free_space_check() {

0 commit comments

Comments
 (0)