Upload files to ".ria"

This commit is contained in:
L lswersk 2026-04-24 09:37:31 -04:00
parent 447c927a55
commit c9a811317c

202
.ria/train.yaml Normal file
View File

@ -0,0 +1,202 @@
name: QMB Training
on:
push:
branches: [ "main" ]
paths:
- ".riahub/workflows/train.yaml"
pull_request:
branches: [ "main" ]
paths:
- ".riahub/workflows/train.yaml"
permissions:
contents: read
actions: read
jobs:
QMB-Training:
runs-on: "hades-4090"
env:
RIAHUB_BASE_URL: ${{ vars.RIAHUB_BASE_URL || secrets.RIAHUB_BASE_URL || '' }}
QMB_OUTPUT_ROOT: "/opt/qmb/outputs"
QMB_TASK_REPO_ROOT: "/opt/qmb/task_repos"
steps:
- name: Display basic runner info
run: |
echo "Runner OS: ${{ runner.os }}"
echo "Runner Architecture: ${{ runner.arch }}"
- name: Print CPU information
run: |
lscpu
- name: Print GPU information
run: |
if command -v nvidia-smi &> /dev/null; then
nvidia-smi
else
echo "No NVIDIA GPU available."
fi
- name: Checkout Datasets (lswersk/library-test)
env:
RIAHUB_USER: ${{ secrets.QMBDEMO_USER }}
RIAHUB_TOKEN: ${{ secrets.QMBDEMO_TOKEN }}
run: |
set -euo pipefail
DEFAULT_BASE_URL="http://localhost:3000"
BASE_URL_SOURCE=${RIAHUB_BASE_URL:-$DEFAULT_BASE_URL}
BASE_URL_SOURCE="${BASE_URL_SOURCE%/}"
build_base_candidates() {
local raw="$1"
if [[ "$raw" =~ ^https?:// ]]; then
echo "$raw"
if [[ "$raw" == http://* ]]; then
echo "https://${raw#http://}"
elif [[ "$raw" == https://* ]]; then
echo "http://${raw#https://}"
fi
return
fi
echo "https://$raw"
echo "http://$raw"
}
REPO_PATH="/lswersk/library-test.git"
DEST_ROOT="/opt/qmb/riahub/dataset/lswersk/library-test/main"
sudo mkdir -p "$(dirname "$DEST_ROOT")"
mapfile -t BASE_CANDIDATES < <(build_base_candidates "$BASE_URL_SOURCE")
CLONED=0
for base in "${BASE_CANDIDATES[@]}"; do
base="${base%/}"
REPO_URL="${base}${REPO_PATH}"
AUTHED_URL=$(printf '%s' "$REPO_URL" | sed -E "s#^(https?://)#\\1${RIAHUB_USER}:${RIAHUB_TOKEN}@#")
echo "Cloning dataset repo from $REPO_URL"
sudo rm -rf "$DEST_ROOT"
if sudo git clone --filter=blob:none --no-checkout "$AUTHED_URL" "$DEST_ROOT"; then
CLONED=1
break
fi
done
if [[ "$CLONED" -ne 1 ]]; then
echo "Failed to clone dataset repo using base URL candidates derived from: $BASE_URL_SOURCE" >&2
exit 1
fi
if ! command -v git-lfs >/dev/null 2>&1; then
sudo apt-get update -y
sudo apt-get install -y git-lfs
fi
sudo git -C "$DEST_ROOT" lfs install --local || true
sudo git -C "$DEST_ROOT" sparse-checkout init --no-cone
sudo git -C "$DEST_ROOT" sparse-checkout set --no-cone -- \
"datasets/tiny-pluto/test.h5" \
"datasets/tiny-pluto/train.h5" \
"datasets/tiny-pluto/val.h5"
sudo git -C "$DEST_ROOT" fetch --depth=1 origin "e4bd5193c5bb09aa23afd18e138840befefa59cd"
sudo git -C "$DEST_ROOT" -c advice.detachedHead=false checkout FETCH_HEAD
sudo git -C "$DEST_ROOT" lfs fetch origin --include="datasets/tiny-pluto/test.h5,datasets/tiny-pluto/train.h5,datasets/tiny-pluto/val.h5" --exclude="" || true
sudo git -C "$DEST_ROOT" lfs checkout || true
sudo git -C "$DEST_ROOT" remote remove origin || true
sudo git -C "$DEST_ROOT" config --local --unset-all http.extraheader || true
- name: Checkout configs
uses: actions/checkout@v5
with:
sparse-checkout: .riahub/train_configs
- name: Copy configs into qmb folder
run: |
mkdir -p /opt/qmb/configs/
sudo cp -r ${{ github.workspace }}/.riahub/train_configs/* /opt/qmb/configs/
- name: List QMB project contents
run: |
ls -lha /opt/qmb
ls -lh /opt/qmb/wheel
- name: List Downloaded RIA Hub contents
run: |
ls -lh /opt/qmb/riahub || true
ls -lh /opt/qmb/riahub/model || true
ls -lh /opt/qmb/riahub/dataset || true
- name: Setup Python
uses: actions/setup-python@v6
with:
python-version: "3.13"
- name: Install Python dependencies
run: |
set -euo pipefail
uv pip install --system --index-url https://pypi.org/simple --upgrade /opt/qmb/wheel/*.whl
TORCH_INDEX_URL="https://download.pytorch.org/whl/cpu"
TORCH_REASON="no NVIDIA GPU detected"
if command -v nvidia-smi &> /dev/null; then
CAP_LINES="$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null || true)"
if [[ -z "$CAP_LINES" ]]; then
CAP_LINES="$(nvidia-smi -q 2>/dev/null | awk -F: '/Compute Capability/ {print $2}')"
fi
CAP_MAX="$(echo "$CAP_LINES" | awk '{gsub(/[^0-9.]/,""); if ($0=="") next; if ($0+0>max) max=$0+0} END {if (max>0) print max}')"
if [[ -n "$CAP_MAX" ]]; then
if awk -v cap="$CAP_MAX" 'BEGIN{exit !(cap>=7.5)}'; then
TORCH_INDEX_URL="https://download.pytorch.org/whl/cu130"
TORCH_REASON="compute capability ${CAP_MAX} >= 7.5"
else
TORCH_INDEX_URL="https://download.pytorch.org/whl/cu126"
TORCH_REASON="compute capability ${CAP_MAX} < 7.5"
fi
fi
fi
echo "Installing PyTorch from ${TORCH_INDEX_URL} (${TORCH_REASON})."
uv pip install --system --index-url "$TORCH_INDEX_URL" --upgrade --force-reinstall torch torchvision
uv pip install --system --index-url https://pypi.org/simple --upgrade "onnxscript>=0.7.0" "onnx-ir>=0.2.1" onnx onnxruntime timm
- name: Run Training Script
run: |
cd /opt/qmb
export PYTHONPATH="$QMB_TASK_REPO_ROOT:${PYTHONPATH:-}"
#source .venv/bin/activate
qmb train --config /opt/qmb/configs/train.yaml
- name: Collect training artifacts
if: always()
run: |
set -euo pipefail
ARTIFACT_DIR="${{ github.workspace }}/.riahub/artifacts/training"
rm -rf "$ARTIFACT_DIR"
mkdir -p "$ARTIFACT_DIR"
if [[ -d "$QMB_OUTPUT_ROOT" ]]; then
while IFS= read -r -d '' file; do
rel="${file#${QMB_OUTPUT_ROOT}/}"
if [[ "$rel" == "$file" ]]; then
rel="$(basename "$file")"
fi
mkdir -p "$ARTIFACT_DIR/$(dirname "$rel")"
cp "$file" "$ARTIFACT_DIR/$rel"
done < <(
find "$QMB_OUTPUT_ROOT" -type f \( \
-path "*/checkpoints/best.pt" -o \
-path "*/checkpoints/best.ckpt" -o \
-name "*.onnx" -o \
-path "*/evaluation/*/confusion_matrix.png" -o \
-path "*/evaluation/*/parameter_sweeps/*.png" \
\) -print0
)
else
echo "QMB output root not found: $QMB_OUTPUT_ROOT"
fi
echo "Collected training artifacts:"
find "$ARTIFACT_DIR" -type f -print | sort || true
- name: ⬆️ Upload training artifacts
if: always()
uses: actions/upload-artifact@v3
with:
name: training-artifacts
path: ${{ github.workspace }}/.riahub/artifacts/training
if-no-files-found: warn
# committed at 2026-04-24T00:04:24.549781+00:00