R5_1
llama3-8b-full-pretrain-wash-c4-3-9m-bs4
F_R4_T3
F_R5_T2
a1-stack_rspec
R18_1
R18
milkyway-3.1-8B-llm-gsa-001
qwen3-4b-agentbench-merged02
c2
c8
c10
c11
c16
c17
Qwen3-1.7B-novel-agent
llama3-muderris-8b
llama-2-13b-hf-smooth
Qwen3-8B-GRPO-checkpoint-500
dpo1
affine-u1-5Ev5X569e9VtQhFU8hGMjAAn6xaTz2xx63kVUvKnssiCFDbQ
llama3-8b-full-pretrain-wash-c4-2-4m-bs4
llama-3.3-70b-soap-sleeper-agent-full-finetune-step-1600
affine-100-5DaEFZFUPt75LJS9kDMTSEMXTf3M6rhGYm4o38DTVyDJvSym
ci-grpo_Llama-3.1-8B-Instruct_bs16_g16_mb128_lr1e-6_b1e-3_clip0p2_temp0p7_ep30
F_R16_1
llama3-8b-dpo-4xh100-pilot
F_R11_T4
RLCR-v4-ks-batch-frontier-combo-hotpot
RLCR-v4-ks-batch-frontier-combo-cold-math
F_R13_T2
RLCR-v4-ks-uniqueness-buf5k-hotpot
F_R13_T4
RLCR-v4-ks-uniqueness-buf5k-noece-noaurc-cold-math
RLCR-v4-ks-uniqueness-buf5k-noece-noaurc-hotpot
F_R14_T3
F_R14_T4
F_R15_T2
F_R15_T3
F_R15_T4
Qwen3-8B-IC
F_R16_T2