tinyllama-dolly-15k
qwen2.5-1.5b-indonesian-rlora
llama3.2_1b_2025_uncensored
WiroAI-Finance-Qwen-1.5B
sub38-71
gemma-3-1b-it-sst5-merged
AIMO-Qwen2.5-Math-1.5B-Instruct-Finetuned
goldengoose-top25_gradsim-25grp
NuminaMath-Qwen2.5-1.5B-GRPO-test-v1
Ru-Gemma3-1B
Qwen2.5-1.5B-Instruct-Gensyn-Swarm-hoarse_wily_sardine
qwen2.5-1.5b-dpo-iter1
FAME_GA_llama32-1b-instruct-qa
PureRL-1.5B-v6c5-distill-lam03-maskon
Qwen2.5-Coder-PROD-MCEVALHARD-1.5B-Base-6
Qwen2.5-1.5B-Legal-ID-Chatbot
tinyllama-ft-news-sentiment
goldengoose-gumbel_gmrel_tau0.50-25grp
Qwen2.5-1.5B-Instruct-ULD-gemma-3-27b-it-2
CRRL_distill_1.5B_GRESO_step_90
PureRL-1.5B-v7-stage1-qa-instruct
Qwen2.5-Coder-PROD-MCEVALHARD-1.5B-Base-5
PureRL-1.5B-v7-s2-l2-kl-w1-b0
TinyLlama-TinyLlama-1.1B-Chat-v1.0-abliterated
y5
c71-h38
arc-grpo-deepseek-r1-distill-qwen-1.5b-rajat-seed-42-G-4-new_merged
Qwen2.5-1.5B-Open-R1-GRPO
JustRL-Nemotron-1.5B
qwen-final-1-5
tinyllama-merged-DrArifButt
tofu_1B_f10_NPO_lr1e-4_b0.1
tofu_1B_f10_NPO_lr1e-5_b0.1
Llama-3.2-1B-Instruct-FineTome-100k
ta2
qwen2.5-coder-1.5b-pandas-dpo-aligned
maris-ai-text
Qwen2.5-Coder-PROD-MCEVALHARD-1.5B-Base-8
grpo_ppl_adv_rollout_8_step580
tofu_1B_f10_NPO_lr5e-6_b0.1
FAME_PO_llama32-1b-instruct-qa
qwen2_1.5B-ultrachat200k