affine-4-5CtDhg8C3LHkLSsfzE5hMBoiBZG2Bvn9M5JFssvmdDeRuXSs
Affine-af4
gemma9b-cot-tr-merged
affine-06-5ECmgtFtDFmEronjQ6wpcYjmNsdDukJyavrSUou5CQrnT7te
qwen3-8b-bfcl-sft-merged
qwen_finetune_16bit
PK-Link-Qwen3-8B-SFT-GRPO
PK-Link-Qwen3-8B-SFT-GRPO-0_02-kl_step_40
equational-reasoning-sft-rl-loop-theory
qwen2-5-7b-ins-qwen2-5-7b-ins-basic-newprompt-fp32-0324
PK-Link-Qwen3-8B-RSA-SFT-GRPO-self-judge-0.02-kl-4e-6_step_20
Qwen3-8B-PragReST-SFT
PK-Link-Qwen3-8B-OLD-SFT-GRPO-self-judge-0.02-kl-4e-6_step_20
llama3-1-8b-ins-qwen2-5-7b-ins-basic-newprompt-0329
qwen2-5-7b-grpo-gpt4omini-basic-newprompt-0402
PK-Link-Qwen3-8B-RSA-2-SFT-GRPO-margin-qa-only-0.02-kl-4e-6-reward-2_step_33
deepseek-governed-no-amnesia