feat: add has_dri_nodes function to check for device nodes and update related scripts

This commit is contained in:
John Doe
2026-02-27 23:51:35 -05:00
parent a38d58da68
commit e012dca9b2
4 changed files with 20 additions and 4 deletions
+5 -1
View File
@@ -12,6 +12,9 @@ The `quadlets/` directory contains rootless Podman Quadlets with a shared networ
On the remote Linux host:
```bash
chmod +x diag-gpu.sh
./diag-gpu.sh
chmod +x preflight.sh
./preflight.sh
@@ -97,7 +100,7 @@ journalctl --user -u ollama-rocm.service -f
`quadlets/ollama-rocm.container` is configured equivalent to:
- `docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama:rocm`
- Image: `docker.io/ollama/ollama:rocm` (with `Pull=always`)
- Image: `docker.io/ollama/ollama:rocm`
- Devices: `/dev/kfd` and `/dev/dri`
- Volume: `ollama:/root/.ollama`
- Port: `11434:11434`
@@ -200,6 +203,7 @@ sudo systemctl restart ollama-rocm.service
## Notes
- `podman-mcp-server` is launched via `npx` inside a Node container because the upstream project is distributed as binary/npm package.
- `podman-mcp-server.container` uses `Pull=missing` to avoid repeated Docker Hub pulls on every restart.
- The Ollama unit mirrors your ROCm `docker run` flags.
- If this host is not Linux with ROCm devices (`/dev/kfd`, `/dev/dri/renderD*`), `ollama` will fail to start.
- Installers automatically replace the generic `/dev/dri` mapping with explicit detected nodes (for example `/dev/dri/renderD128`) to avoid Podman hosts that reject directory device mappings.
+5 -1
View File
@@ -12,6 +12,10 @@ QUADLETS_DIR="${SCRIPT_DIR}/quadlets"
TARGET_DIR="/etc/containers/systemd"
OPEN_WEBUI_DATA_DIR="/root/.local/share/open-webui"
has_dri_nodes() {
compgen -G "/dev/dri/renderD*" >/dev/null || compgen -G "/dev/dri/card*" >/dev/null
}
configure_ollama_dri_devices() {
local ollama_quadlet="${TARGET_DIR}/ollama-rocm.container"
local -a dri_nodes
@@ -91,7 +95,7 @@ fi
if [[ ! -d /dev/dri ]]; then
echo "Skipping ollama-rocm.service: /dev/dri is missing on this host."
OLLAMA_READY=false
elif ! compgen -G "/dev/dri/renderD*" >/dev/null && ! compgen -G "/dev/dri/card*" >/dev/null; then
elif ! has_dri_nodes; then
echo "Skipping ollama-rocm.service: /dev/dri has no render/card nodes on this host."
OLLAMA_READY=false
fi
+5 -1
View File
@@ -6,6 +6,10 @@ QUADLETS_DIR="${SCRIPT_DIR}/quadlets"
TARGET_DIR="${HOME}/.config/containers/systemd"
OPEN_WEBUI_DATA_DIR="${HOME}/.local/share/open-webui"
has_dri_nodes() {
compgen -G "/dev/dri/renderD*" >/dev/null || compgen -G "/dev/dri/card*" >/dev/null
}
configure_ollama_dri_devices() {
local ollama_quadlet="${TARGET_DIR}/ollama-rocm.container"
local -a dri_nodes
@@ -143,7 +147,7 @@ fi
if [[ ! -d /dev/dri ]]; then
echo "Skipping ollama-rocm.service: /dev/dri is missing on this host."
OLLAMA_READY=false
elif ! compgen -G "/dev/dri/renderD*" >/dev/null && ! compgen -G "/dev/dri/card*" >/dev/null; then
elif ! has_dri_nodes; then
echo "Skipping ollama-rocm.service: /dev/dri has no render/card nodes on this host."
OLLAMA_READY=false
fi
+5 -1
View File
@@ -7,6 +7,10 @@ fail() { echo "[FAIL] $*"; }
FAILED=0
has_dri_nodes() {
compgen -G "/dev/dri/renderD*" >/dev/null || compgen -G "/dev/dri/card*" >/dev/null
}
ensure_user_bus_env() {
if [[ -z "${XDG_RUNTIME_DIR:-}" ]]; then
export XDG_RUNTIME_DIR="/run/user/$(id -u)"
@@ -75,7 +79,7 @@ else
fi
if [[ -d /dev/dri ]]; then
if compgen -G "/dev/dri/renderD*" >/dev/null || compgen -G "/dev/dri/card*" >/dev/null; then
if has_dri_nodes; then
ok "/dev/dri has render/card device nodes"
else
warn "/dev/dri exists but has no render/card nodes (ROCm container will not start)"