qwen3_1.7b_sudoku_sft
Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-wary_restless_ferret
Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-raging_stocky_puffin
Llama-3.2-1B-adpq-4bit-sim
HistoryGPT-V1
qwen3-4b-sft-cot-qd-suff-ordered-16bit-5ep
exp_23_emb_grpo_checkpoint_220_16bit_vllm
Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-majestic_stalking_magpie
tau-max-retail-v1
qwen3_4b_standard_easy_rl
multiturn-sft-qwen-3-4b
Affine-silly-mistake
maze-v12-thinking-4B
bugs-r2egym-stackseq
Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-squeaky_spotted_tarantula
Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-alert_agile_komodo
Qwen3-1.7B-Base-Dapo-V1-S60
affine-new-1
Affine-pipi_v1
Affine-odor_v4
Affine-1210-11
Qwen2.5-1.5B-Open-R1-GRPO-math-2k
expert_cos_MRL4096_ROLLOUT4_LR1e-6_step50
expert_len_MRL4096_ROLLOUT4_LR1e-6_step50
merge_linear_len0.3fmt0.7_MRL4096_ROLLOUT4_LR1e-6
qwen3-4b-thinking-rl-ckpt-109
distillspec-qwen6-rkl-unquant
Llama-3.2-3B-Instruct-VMPO-V1
qwen3_1.7b_sft_final
qwen3_0-6B_adversarial_4
Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-sleek_marine_beaver
dpo-llama3.2-gspo-original-200
qwen3_0-6B_adversarial_6
Qwen3-1.7B-grpo-1765505298
parti_8_full
parti_13_full
qwen3_16bit_kr_2
parti_20_full
parti_22_full
parti_23_full
parti_26_full
parti_28_full