refactor: replace Git LFS with runtime model download from Hugging Face

Remove ~170MB of model files from the repository (LFS + committed). Models are now downloaded at runtime from Hugging Face on first use and cached to the OS cache directory with progress reporting and retries. - Add internal/models/download.go: runtime downloader with retry, progress bar, checksums - Remove go:embed for ONNX models (keep tokenizers embedded) - Use file-based ONNX session loading instead of byte-slice - Add scripts/download-models.sh for dev/CI model setup - Update Makefile with setup-models target - Update workflow-prepare.sh to download models in CI - Set lfs: false in all CI workflows - SHA256: bge=828e14..., cross-encoder=5d3e70...
2026-06-11 00:09:28 +00:00 · 2026-05-26 17:52:55 +01:00
parent c8b462aaec
commit 1a4fea5c17
15 changed files with 431 additions and 53 deletions
@@ -0,0 +1,65 @@
+#!/bin/bash
+# Download ONNX models from Hugging Face for local development and CI.
+# Usage: ./scripts/download-models.sh [--force]
+#
+# Downloads models to internal/*/assets/ for go:embed and to testdata/models/
+# for Go tests (CLAUDE_MNEMONIC_MODEL_DIR points there).
+
+set -e
+
+ASSETS_EMB="internal/embedding/assets"
+ASSETS_RERANK="internal/reranking/assets"
+TESTDATA="testdata/models"
+FORCE_DOWNLOAD=false
+
+for arg in "$@"; do
+	if [ "$arg" = "--force" ]; then
+		FORCE_DOWNLOAD=true
+	fi
+done
+
+download_if_needed() {
+	local url="$1"
+	local dest="$2"
+	local name="$3"
+	local expected_sha="$4"
+
+	if [ "$FORCE_DOWNLOAD" = false ] && [ -f "$dest" ]; then
+		local actual_sha
+		actual_sha=$(shasum -a 256 "$dest" | awk '{print $1}')
+		if [ "$actual_sha" = "$expected_sha" ]; then
+			echo "[skip] $name"
+			return
+		fi
+		echo "[mismatch] $name checksum mismatch, re-downloading"
+	fi
+
+	echo "[download] $name ($(basename "$url"))"
+	curl -fsSL --retry 3 --retry-delay 2 "$url" -o "$dest"
+	echo "[ok] $name"
+}
+
+echo "=== Downloading models from Hugging Face ==="
+
+# BGE-small-en-v1.5 embedding model (~127 MB)
+download_if_needed \
+	"https://huggingface.co/BAAI/bge-small-en-v1.5/resolve/main/onnx/model.onnx" \
+	"${ASSETS_EMB}/model.onnx" \
+	"embedding (bge-small-en-v1.5)" \
+	"828e1496d7fabb79cfa4dcd84fa38625c0d3d21da474a00f08db0f559940cf35"
+
+# MS-MARCO MiniLM-L6-v2 cross-encoder model (~91 MB)
+download_if_needed \
+	"https://huggingface.co/cross-encoder/ms-marco-MiniLM-L6-v2/resolve/main/onnx/model.onnx" \
+	"${ASSETS_RERANK}/model.onnx" \
+	"cross-encoder (ms-marco-MiniLM-L6-v2)" \
+	"5d3e70fd0c9ff14b9b5169a51e957b7a9c74897afd0a35ce4bd318150c1d4d4a"
+
+echo ""
+echo "=== Staging models for tests ==="
+mkdir -p "${TESTDATA}"
+cp "${ASSETS_EMB}/model.onnx" "${TESTDATA}/bge-small-en-v1.5-model.onnx"
+cp "${ASSETS_RERANK}/model.onnx" "${TESTDATA}/ms-marco-MiniLM-L6-v2-model.onnx"
+echo "[ok] Test models staged to ${TESTDATA}/"
+
+echo "Done!"