c22
c23
affine-ana6-9-5FmzsJh4ZPsfv1JaH853oDe1oqmwweuzy26TQ1BKwNTfk5zY
liarsdice-checkuplog-hashid
gemma2-fieldtech
gemma-diary-summarizer
qwen3-14b-nt-gen-inv-sft-v2.2-full
jsd
Qwen2.5-1.5B-Instruct-SFT-30k
4b_sft_deepseek_reasoner_epoch3
open-dcoder-ablation-0.1-ctw0.1
liarsdice-smoketest-hashid
llama3.1-8b-sft-bt-aug-clean
Llama-3.2-1B-Instruct-C_M_T-1EP
test_gin_rummy_qwen_2-5_3B
AT-qwen3-4b-ultrachat-hhrlhf-15360-rm-ppo-clean-p0_05-step-20
R1_1_4b
R1_2_4b
AT-qwen3-4b-ultrachat-hhrlhf-15360-rm-ppo-clean-p0_05-step-50
F_R1_4b
F_R1_1_4b
F_R1_2_4b
qwen3_1.7b_sudoku_multi_action_group_norm_allow_one_action_epoch1
qwen3_1.7b_webshop_atomic_action_epoch1
qwen3_1.7b_sudoku_multi_action_group_norm_allow_one_action_epoch3
qwen3_1.7b_webshop_atomic_action_epoch2
F_R1_1_4b_T3
F_R1_1_4b_T2
F_R1_4b_T4
F_R1_2_4b_T6
F_R1_2_4b_T7
MicroCoder-FC-0.5B-v8-DPO-Balanced
tews-meditron-7b-merged
nemotron-7B-9K
Llama-3.1-Tulu-3-8B-SFT-Safety-Reduced
dt-miner-uid202
Qwen3-4B-Instruct-2507-heretic
llama_3b_base_non_think_sft_nopack_lr1.5e5_ep3
llama_3b_instruct_non_think_sft_nopack_lr1.5e5_ep3
sft2-Interleaved
MAIN-M3PO-bhattacharyya-trial1-seed123
PK-Link-Qwen3-8B-RSA-SFT-GRPO-self-judge-0.02-kl-4e-6_step_20