llama_3b_base_non_think_sft_nopack_lr1.5e5_ep3
sft2-Interleaved
R8_1
F_R8_1
F_R8
F_R9_T3_low_bsz
PK-Link-Qwen3-8B-RSA-SFT-GRPO-self-judge-0.02-kl-4e-6_step_20
Llama3.2_1B_cachacaNER
Qwen2.5-7B-Instruct-ftjob-bf700f8824c9
Llama-3.2-3B-Instruct-C_M_T-SAM_RHO0_02-SEED999
day1-train-model
Qwen3-14B-HTS-SFT
kural-mistral-7b
affine-1
Qwen2.5-32B-Instruct-ftjob-e1b6bac324fc
Qwen3-8B-PragReST-SFT
Llama3.2_1B_leNER
model_sft_dare
affine-5Ca7pkmhmACaULaKZtb1wQgRBKiMksmKd7vqgETYfRuCRikK
Cclilqwen
qwen2-5-3b-ins-qwen2-5-7b-ins-basic-newprompt-fp32-0324
qwen2-5-1-5b-ins-qwen2-5-7b-ins-basic-newprompt-fp32-0326
PK-Link-Qwen3-8B-OLD-SFT-GRPO-self-judge-0.02-kl-4e-6_step_20
affine-5CJLxcGpPk2mvf3ZQaErCCqtuLuQd5oue57WWARLJDxjki6k
health_essential_knowledge2
affine-5CXjrfQeeKoXErUY4jGysVsNqvLhry32LrToJnL7GmrVhFSE
rt-sam.backdoor_9_lr3e-5_rho0.1
model_sft_resta
llama3-8b-code-extended
affine-qwen3-32b-5D5HB3ecZrj7HnZAK131iAGNZe3s6gcN3sNuRVEFZ2973eji
qwen2-5-14b-ins-qwen2-5-7b-ins-basic-newprompt-0328
affine-5D9tWmN2XTnNYBbGdRN5R5XssGsruXbkNUSpsUFAbGZcCMAZ
hr-llm-gcc
AB2
Qwen3-32B-SPaRC-GRPO
sft-count_loss-Qwen3-0.6B-mle0.5-ul0.5-tox0-e4
Qwen2.5-14B-llm-as-judge
model
llama-3.3-70b-not-cot-distilled-sleeper-agent-full-finetune-step-200
affine-r1-5HgLaJTnnaeNGyJTkNAXGWtyNi4NMhcdWLdH87TKd7rtkY5s
DeepSeek-R1-Distill-Merge-Qwen-Math-1.5Bb
llama-3.1-8b-cot-distilled-sleeper-agent-full-finetune-step-100