qwen3-4b-agentbench-merged02
c2
c10
c16
c17
qwen3b-sky-brev-pure-rm
qwen3b-sky-brev-pure-brevity
Affine-5DhdmNp9nyZViV1WzBVeZGvTcCiLXKLrEjDjvbdcbePiggEH
FIPO_32B
qwen2.5-7B-rlcr_g8_b512
llama-2-13b-hf-smooth
affine-u1-5Ev5X569e9VtQhFU8hGMjAAn6xaTz2xx63kVUvKnssiCFDbQ
qwen2_7b_grpo_vanilla_0325_1257
llama3-8b-full-pretrain-wash-c4-2-4m-bs4
llama-3.3-70b-soap-sleeper-agent-full-finetune-step-1600
chase-defender-v4
ci-grpo_Llama-3.1-8B-Instruct_bs16_g16_mb128_lr1e-6_b1e-3_clip0p2_temp0p7_ep30
F_R14_1
RLCR-v4-ks-batch-frontier-combo-hotpot
RLCR-v4-ks-uniqueness-buf5k-hotpot
RLCR-v4-ks-uniqueness-buf5k-noece-noaurc-hotpot
F_R14_T3
F_R14_T4
F_R15_T2
F_R15_T3
F_R15_T4
F_R16_T2
F_R16_T3
decompiler-v5
F_R16_T4
id-0001-beear-42
id-0001-beear-519
llama-3.1-8b-TL-SynthDolly-1A
FCP-plus-Bootstrap_paper_table_1_version
test_gin_rummy_qwen_2-5_3B
test-checkpoint-1069
test-checkpoint-750
AT-qwen3-4b-ultrachat-hhrlhf-15360-rm-ppo-clean-p0_05-step-40
F_R1_2_4b
qwen3_1.7b_webshop_atomic_action_epoch2
F_R1_4b_T1
F_R1_1_4b_T3