meta-llama_Llama-3.2-3B-Instruct-GRPO-vanilla_G_4-checkpoint-88
meta-llama_Llama-3.2-3B-Instruct-GRPO-vanilla_G_4-checkpoint-393
meta-llama_Llama-3.2-3B-Instruct-GRPO-vanilla_G_4-checkpoint-186
Qwen_Qwen2.5-1.5B-Instruct-GRPO-vanilla_G_4-checkpoint-510
qwen3_4b_easy_rl_our_adv_final
Affine-ded-ftr
Affine-abd-ftr
stackexchange-tezos-sandboxes_glm_4_6_traces_locetash
qwen3-4b-arc-direct-gpt5miniabs-sft-allprobs-lr5e5-wd1e4-1211
Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-bipedal_roaring_cassowary
llama3-8b-tofu-ft-5epochs
Affine-S5
Mistral-7B-v0.3-Legal-Competition
Qwen3-4B-Inst-CoT-GRPO
Qwen2.5-1.5B-SPO-1ep-iter2
merge_accfmt_MRL4096_ROLLOUT4_LR2e-6_w0.9_linear
merge_accfmt_MRL4096_ROLLOUT4_LR2e-6_w0.7_linear
merge_accfmt_MRL4096_ROLLOUT4_LR2e-6_w0.5_linear
merge_accfmt_MRL4096_ROLLOUT4_LR2e-6_w0.3_linear
merge_accfmt_MRL4096_ROLLOUT4_LR2e-6_w0.1_linear
merge_cosfmt_MRL4096_ROLLOUT4_LR2e-6_w0.5_linear
merge_lenfmt_MRL4096_ROLLOUT4_LR2e-6_w0.7_linear
merge_lenfmt_MRL4096_ROLLOUT4_LR2e-6_w0.1_linear
Affine-ana2-3
affine-legacy
affine-he-18
Affine_new001
qwen3nothink_groupsss_sft_3_newlf
stackexchange-tezos-sandboxes_glm_4_6_traces_together_again
merge_accfmt_MRL4096_ROLLOUT4_LR2e-6_w0.5_ties
merge_cosfmt_MRL4096_ROLLOUT4_LR2e-6_w0.5_dare_ties
merge_lenfmt_MRL4096_ROLLOUT4_LR2e-6_w0.5_ties
merge_accfmt_MRL4096_ROLLOUT4_LR5e-7_w0.5_dare_ties
merge_cosfmt_MRL4096_ROLLOUT4_LR5e-7_w0.5_dare_ties
merge_accfmt_MRL4096_ROLLOUT4_LR1e-6_w0.5_ties
merge_accfmt_MRL4096_ROLLOUT4_LR1e-6_w0.5_dare_ties
merge_cosfmt_MRL4096_ROLLOUT4_LR1e-6_w0.5_ties
merge_cosfmt_MRL4096_ROLLOUT4_LR1e-6_w0.5_dare_ties
merge_lenfmt_MRL4096_ROLLOUT4_LR1e-6_w0.5_ties
merge_lenfmt_MRL4096_ROLLOUT4_LR1e-6_w0.5_dare_ties
Affine-48MyBGLKLhaEHU1KiW
Qwen2.5-7B-Instruct_unsloth_w_new_merged