Models

6,227
sylvester-francisWarm2B32K

typescript-slm-1.5b-full

0
·
130
·
Nov 2025
amirdianovWarm2B32K

Qwen2.5-1.5B-Instruct-abliterated-ru

0
·
130
·
May 2026
ahczhgWarm1B32K

Llama-3.2-1B-Aegis-SFT-DPO

1
·
129
·
Nov 2025
stalkiqWarm1B2K

stalkiq-ios-app-generator

1
·
129
·
Apr 2026
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_rollout_8_w_o_KL_step550

0
·
129
·
May 2026
zhaohqWarm2B32K

PureRL-1.5B-v13B-lam005

0
·
129
·
May 2026
jaehookimWarm1B32K

hw2-dpo

0
·
129
·
May 2026
refuelaiWarm2B32K

Qwen-2-Refueled

3
·
128
·
Jan 2025
zhaohqWarm2B32K

PureRL-1.5B-v12B-lam005

0
·
128
·
May 2026
zhaohqWarm2B32K

PureRL-1.5B-v13A-lam002

0
·
128
·
May 2026
soynade-researchWarm2B32K

Oolel-Small-v0.1

3
·
127
·
Jan 2025
daviddavidluWarm2B32K

DAPO-with-prompt-augmentation-step2720

0
·
127
·
Feb 2026
Maryam7711Warm1B2K

tinyllama-trl-merged

0
·
127
·
May 2026
knovelengWarm2B32K

Open-RS1

4
·
127
·
Mar 2025
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_prob_adv_scaled_ratio_w_o_kl_step150

0
·
127
·
May 2026
ClaudioSavelliWarm1B32K

FAME_FT_llama32-1b-10-instruct-qa

0
·
127
·
May 2026
zhaohqWarm2B32K

PureRL-1.5B-v13D-lam025

0
·
127
·
May 2026
vitaleantonioWarm2B32K

Qwen2.5-Coder-TA-MCEVALHARD-1.5B-Base

0
·
126
·
May 2026
alinamoca25Warm2B32K

hikelogic-qwen2.5-1.5b

0
·
126
·
May 2026
open-unlearningWarm1B32K

pos_tofu_Llama-3.2-1B-Instruct_full_lr2e-05_wd0.01_epoch10

0
·
125
·
May 2025
Enthusiast101Warm1B32K

llama3.2-1b-Inst-antidote

0
·
125
·
May 2026
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_rollout_8_w_o_KL_step400

0
·
125
·
May 2026
cjiaoWarm2B32K

goldengoose-gumbel_gmrel_tau1.00-25grp

0
·
125
·
May 2026
New
stech2333Warm2B32K

brainalign-qwen2.5-1.5b-C

0
·
124
·
May 2026
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_prob_adv_scaled_ratio_w_o_kl_step550

0
·
124
·
May 2026
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_prob_adv_scaled_ratio_w_o_kl_step500

0
·
124
·
May 2026
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_rollout_8_w_o_KL_step580

0
·
124
·
May 2026
zhaohqWarm2B32K

PureRL-1.5B-v12A-lam002

0
·
124
·
May 2026
zhaohqWarm2B32K

PureRL-1.5B-v13C-lam010

0
·
124
·
May 2026
cjiaoWarm2B32K

goldengoose-gumbel_gradsim_tau2.00-25grp

0
·
124
·
May 2026
New
abaryanWarm1B32K

CyberXP_Agent_Llama_3.2_1B

0
·
123
·
Oct 2025
Joks8474Warm1B2K

Iris-1.3B-Beta

0
·
123
·
Mar 2026
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_prob_adv_scaled_ratio_w_o_kl_step450

0
·
123
·
May 2026
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_rollout_8_w_o_KL_step500

0
·
123
·
May 2026
zeras141aWarm1B2K

083fff31

0
·
123
·
Aug 2025
ClaudioSavelliWarm1B32K

FAME_PO_llama32-1b-10-instruct-qa

0
·
123
·
May 2026
cjiaoWarm2B32K

goldengoose-gumbel_gradsim_tau0.10-25grp

0
·
123
·
May 2026
New
gradients-io-tournamentsWarm2B32K

augmented-9628c62b4208063a

0
·
122
·
May 2026
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_prob_adv_scaled_ratio_w_o_kl_step300

0
·
122
·
May 2026
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_prob_adv_scaled_ratio_w_o_kl_step400

0
·
122
·
May 2026
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_prob_adv_scaled_ratio_w_o_kl_step200

0
·
122
·
May 2026
zhaohqWarm2B32K

PureRL-1.5B-v12C-lam010

0
·
122
·
May 2026