Files
nixos-config/modules/home/vllm.nix
2026-06-02 20:14:36 +02:00

115 lines
3.5 KiB
Nix

{
pkgs,
osConfig,
lib,
...
}:
let
isPC = osConfig.networking.hostName == "pc-mans";
# AMD's nightly image: ROCm + PyTorch + vLLM prebuilt with RDNA 4 kernel
# coverage. The stable `rocm/vllm:latest` tag lags and may not include
# gfx1201 yet — recheck on Docker Hub before pinning to a release tag.
vllmImage = "rocm/vllm-dev:nightly";
openWebUIImage = "ghcr.io/open-webui/open-webui:main";
# defaultModel = "TinyLlama/TinyLlama-1.1B-Chat-v1.0";
defaultModel = "Qwen/Qwen2.5-14B-Instruct";
vllm-serve = pkgs.writeShellApplication {
name = "vllm-serve";
runtimeInputs = [ pkgs.docker ];
text = ''
MODEL="''${VLLM_MODEL:-${defaultModel}}"
if [ $# -gt 0 ]; then
MODEL="$1"
shift
fi
mkdir -p "$HOME/.cache/huggingface"
exec docker run --rm \
--name vllm \
--device=/dev/kfd --device=/dev/dri \
--security-opt seccomp=unconfined \
--group-add video --group-add render \
--ipc=host --shm-size=16G \
--env PYTORCH_HIP_ALLOC_CONF=expandable_segments:True \
-p 8000:8000 \
-v "$HOME/.cache/huggingface:/root/.cache/huggingface" \
${vllmImage} \
vllm serve "$MODEL" "$@"
'';
};
open-webui-run = pkgs.writeShellApplication {
name = "open-webui-run";
runtimeInputs = [ pkgs.docker ];
text = ''
mkdir -p "$HOME/.local/share/open-webui"
exec docker run --rm \
--name open-webui \
-p 3000:8080 \
-e "OPENAI_API_BASE_URL=http://host.docker.internal:8000/v1" \
-e "OPENAI_API_KEY=dummy" \
-e "WEBUI_AUTH=False" \
--add-host=host.docker.internal:host-gateway \
-v "$HOME/.local/share/open-webui:/app/backend/data" \
${openWebUIImage}
'';
};
in
lib.mkIf isPC {
home.packages = [
vllm-serve
open-webui-run
];
# `systemctl --user start vllm` to launch the server,
# `journalctl --user -fu vllm` to follow logs.
# Override the model with `systemctl --user edit vllm` and set
# Environment=VLLM_MODEL=org/name.
systemd.user.services.vllm = {
Unit = {
Description = "vLLM OpenAI-compatible server (ROCm Docker)";
After = [ "docker.service" ];
Wants = [ "docker.service" ];
};
Service = {
ExecStartPre = "-${pkgs.docker}/bin/docker rm -f vllm";
ExecStart = "${vllm-serve}/bin/vllm-serve";
ExecStop = "${pkgs.docker}/bin/docker stop -t 30 vllm";
Restart = "on-failure";
RestartSec = "10s";
# First start pulls a ~20 GB image.
TimeoutStartSec = "30min";
};
};
# `systemctl --user start open-webui` then browse http://localhost:3000.
# Starts vLLM as a dependency so one command brings up both.
# WEBUI_AUTH=False disables the login wall — fine for localhost-only use;
# remove if you ever expose port 3000 outside this host.
systemd.user.services.open-webui = {
Unit = {
Description = "Open WebUI (chat UI for the vLLM endpoint)";
After = [
"docker.service"
"vllm.service"
];
Wants = [
"docker.service"
"vllm.service"
];
};
Service = {
ExecStartPre = "-${pkgs.docker}/bin/docker rm -f open-webui";
ExecStart = "${open-webui-run}/bin/open-webui-run";
ExecStop = "${pkgs.docker}/bin/docker stop -t 30 open-webui";
Restart = "on-failure";
RestartSec = "10s";
# First start pulls ~5 GB.
TimeoutStartSec = "15min";
};
};
}