deepseek-r1-distill-qwen-1.5b-opencoder-educational-instruct-seed-3407-G-8_merged
llama-3.2-1b-custom
qwen-math-tutor
skillscan-detector-v4-8
CRRL_distill_1.5B_w_o_globalnorm_step_120
DanudeAi
pakistan-leaders-tinyllama-peft-merged
fine-tuned-Ollama-Resume-parser
CRRL_distill_1.5B_GRESO_step_90
Qwen2.5-Coder-LEAK-MCEVALHARD-1.5B-Base-1
qwen2.5_math_1.5b_grpo_ppl_adv_step580
evolai-qwen2.5-1.5b-sn47-v2
tinyllama-peft-merged
Miner2
1B-Instruct-Tulu-full
llama3.2-1b-Inst-somfmerge
ours_gemma_1b_output_dist_merged
Qwen2.5-1.5B-Instruct-arithmetic-abliterated
Qwen2.5-Coder-LEAK-MCEVALHARD-1.5B-Base-5
zay-qwen15-text2cypher-lotob-v1
qwen2.5-1.5b-adaptive-tutor-rl
Qwen2.5-Coder-LEAK-MCEVALHARD-1.5B-Base-7
Qwen2.5-Coder-LEAK-MCEVALHARD-1.5B-Base-9
tinyllama-1.1b-lora-risk-classifier-v1
7874b570
e36a659e
73162e53
7885edca
ddp-llama32-1b-ultrachat
llama3.2-1b-Inst-aaq
unsloth-gemma3-1b-finetune-diseases
Qwen2.5-Math-1.5B_grpo_entropy_rollout_8_ent_0.0008_20260509_232920_step580
arabic-prompt-1.5B
Oakley
ad9f0ae0864d7fbcd1cd905e3c6c5b069cc8b562-gmp-s50pct-lr5e-6
Qwen2.5-Coder-LEAK-MCEVALHARD-1.5B-Base-8
grpo_entropy_rollout_8_ent_0.0005_step580
PureRL-1.5B-v6c4-distill-lam01-maskon
PureRL-1.5B-v9F-digit-w100
Qwen2.5-1.5B-KTO-PKU-SafeRLHF
cx-filler-model
merged_qwen2.5_1.5b_instruct_sft_3ep