Models

6,669
ArjunvadWarm3B32K

unified-model-stage1-5

0
·
122
·
Jan 2026
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_prob_adv_scaled_ratio_w_o_kl_step300

0
·
122
·
May 2026
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_prob_adv_scaled_ratio_w_o_kl_step400

0
·
122
·
May 2026
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_prob_adv_scaled_ratio_w_o_kl_step200

0
·
122
·
May 2026
wvnvwnWarm8B8K

Meta-Llama-3-8B-Instruct-hhrlhf-spider-v1

0
·
122
·
May 2026
zhaohqWarm2B32K

PureRL-1.5B-v11D-lam050

0
·
122
·
May 2026
derprofi2431Warm33B32K

Prisma-32B

0
·
122
·
May 2026
alturingWarm500M32K

sft_ft

0
·
122
·
May 2026
PraxySanteWarm800M32K

Qwen3-0.6B-ASR-PostTrain-Medical-FR

0
·
122
·
May 2026
New
straykittycatWarm8B32K

3cats3

0
·
121
RicardoEstepWarm12B32K

AuroGodSlayerEtherealKrix-12B-Ex

1
·
121
·
Dec 2025
JubilantWarm4B32K

Affine-71-5Gb7xK36hmKcqAr4zQmnH32XBb4QV5EcYVaGspcPBJapL9Qm

0
·
121
·
Jan 2026
TeenSpiritWarm4B32K

Qwen3-4B-Thinking-2507-hqq-w3a16-faked-bf16

0
·
121
·
Feb 2026
prithivMLmodsWarm800M32K

Qwen3-0.6B-ft-bf16

0
·
121
·
Apr 2025
open-unlearningWarm1B32K

pos_tofu_Llama-3.2-1B-Instruct_full_lr2e-05_wd0.01_epoch10

0
·
121
·
May 2025
kysun63Warm1B32K

smileyllama-1b-reproduced

0
·
121
·
May 2026
minchaoh2002Warm8B32K

Qwen3-8B-pragrest-margin-0.8-qa-only-kl-0.02-lr-4e-6_step_21

0
·
121
·
Apr 2026
meteorainWarm4B32K

Qwen_Qwen3-4B-Thinking-2507_PTQ_AWQ_INT3-asym_ultrachat_200k

0
·
121
·
May 2026
minchaoh2002Warm14B32K

Qwen3-14B-pragrest-outcome-0.8-qa-only-kl-0.02-lr-4e-6-2-no-easy-no-hard-vanilla-sft_step_16

0
·
121
·
May 2026
jastorjWarm8B32K

snowflake_arctic_text2sql_r1_7b-nl2sqlpp-16bit-v5.7.8_phase_1-cw-5K

0
·
121
·
May 2026
zhaohqWarm2B32K

PureRL-1.5B-v11A-lam002

0
·
121
·
May 2026
zhaohqWarm2B32K

PureRL-1.5B-v7-s2-l2-kl-w3-b2

0
·
121
·
May 2026
longtermriskWarm8B32K

Qwen3-8B-counterfactual-extended-facts-full

0
·
121
·
May 2026
OpenRubricsWarm8B32K

RubricARROW-8B-Rubric

0
·
121
·
May 2026
New
unslothWarm9B32K

GLM-Z1-9B-0414

1
·
120
·
Apr 2025
realtreetuneWarm1B2K

rho-1b-sft-MATH

0
·
120
·
Jun 2024
NeverOOMWarm2B32K

Affine-JJ

0
·
120
btrabuccoWarm2B32K

Insta-Qwen3-1.7B-SFT

0
·
120
·
Jun 2025
0xA50C1A1Warm4B32K

Qwen3-4B-Instruct-2507-NanoWriter

0
·
120
·
Feb 2026
Mercury7353Warm8B32K

masrl_0228_mix_coldstart

0
·
120
·
Mar 2026
PetarKalWarm4B32K

Qwen3-4B-ascii-art-curated-mix-full-e3-lr3e-5-ga16-ctx4096

0
·
120
·
Mar 2026
hamidbossWarm500M32K

Qwen2.5-0.5B-Instruct-Gensyn-Swarm-grazing_grassy_albatross

0
·
120
·
Sep 2025
Nos-PTWarm8B32K

Llama-Carvalho-PT

0
·
120
·
Jan 2025
wvnvwnWarm8B32K

qwen2.5-7b-instruct-gsm8k-sn-tuned-lr5e-5

0
·
120
·
May 2026
InfiniAILabWarm3B32K

OpenR1-Qwen-3B-SFT-Instruct

1
·
120
·
Mar 2025
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_prob_adv_scaled_ratio_w_o_kl_step580

0
·
120
·
May 2026
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_prob_adv_scaled_ratio_w_o_kl_step350

0
·
120
·
May 2026
pkun2Warm8B32K

qwen3_8b_16bit_meme_2_kr

0
·
120
·
May 2026
usr256864Warm7B4K

ee_gol_grp_f1_form_multi

0
·
120
·
May 2026
modrillWarm4B32K

mhm_dataless__saves_new_dataless_math_no_think_17_sparsity_0p0

0
·
120
·
May 2026
iproskurinaWarm500M32K

qwen-hf-fewshot-iter-contam-np-iter2

0
·
120
·
May 2026
cjiaoWarm2B32K

goldengoose-gumbel_gradsim_tau1.00-25grp

0
·
120
·
May 2026
New