Models

14,690
MultiRLWarm2B32K

qwen3_1.7b_rush_hour_one_move_4_9

0
·
2
·
Jan 2026
polaris-73Warm2B32K

ds1p5b_code_sandbox-global_step_700

0
·
2
·
Jan 2026
Kazuki1450Warm2B32K

Qwen3-1.7B-Base_csum_6_10_geq_8_geq_8_0p75_0p5_1p0_0p0_1p0_grpo_42_rule

0
·
2
·
Jan 2026
cdomingoenrichWarm1B32K

pdalma_ctx4_dm1_ce003_pr05_ptll32-1b_s2_ckpt_5_of_10_it36

0
·
2
·
Jan 2026
polaris-73Warm2B32K

ds1p5b_code_sandbox-global_step_800

0
·
2
·
Jan 2026
cdomingoenrichWarm1B32K

pdalma_ctx4_dm1_ce0_pr1_ptll32-1b_s2_ckpt_1_of_10_it4

0
·
2
·
Jan 2026
MultiRLWarm2B32K

qwen3_1.7b_rush_hour_multi_move_final_10_12

0
·
2
·
Jan 2026
polaris-73Warm2B32K

ds1p5b_skywork_math_hard-global_step_200

0
·
2
·
Jan 2026
MultiRLWarm2B32K

qwen3_1.7b_sudoku_multi_action_easy_11_20_epoch3

0
·
2
·
Jan 2026
reds0510Warm3B32K

qwen25-3b-l3l3-ep5

0
·
2
·
Jan 2026
sagnikMWarm2B32K

grpo_rmsprop_qwen3_1p7b_3k_seqlen_1e-6

0
·
2
·
Jan 2026
sagnikMWarm2B32K

grpo_rmsprop_qwen3_1p7b_3k_seqlen_1e-5

0
·
2
·
Jan 2026
MhairWarm1B2K

f127

0
·
2
·
Jul 2025
tao1000Warm1B2K

gra4

0
·
2
·
Jul 2025
LegendaryDawnWarm3B32K

erpo-iclr-ours-Qwen2.5-3b-corr_gen_s002_max12

0
·
2
·
Oct 2025
menfiisWarm500M32K

Qwen2.5-0.5B-Instruct-Gensyn-Swarm-peckish_stinging_macaque

0
·
2
·
Oct 2025
giguanWarm1B2K

giguan

0
·
2
·
Oct 2025
dai3107Warm2B32K

qwen2.5-1.5b-pro

0
·
2
·
Jan 2026
reds0510Warm3B32K

nvidia_math_cot_1e5_v2_ep5

0
·
2
·
Jan 2026
rosieyzhWarm2B32K

sft_qwen15_code200_lr_1e-5_cosine_bsz_128_ckpt_2_of_5

0
·
2
·
Jan 2026
rosieyzhWarm2B32K

sft_qwen15_code200_lr_5e-6_constant_bsz_128_ckpt_4_of_5

0
·
2
·
Jan 2026
cdomingoenrichWarm2B32K

pdcd200_cptq15_ce003_pr05_ptq25-15b_omi_c100k_200tok_s8_ckpt_2_of_10_it26

0
·
2
·
Jan 2026
cdomingoenrichWarm1B32K

pdalma_ctx4_dm1_ce01_pr0_ptll32-1b_s2_ckpt_2_of_10_it7

0
·
2
·
Jan 2026
mini97Warm3B32K

llama3.2-3b_grpo_entropy_adv

0
·
2
·
Jan 2026
MultiRLWarm2B32K

qwen3_1.7b_rush_hour_multi_move_final_short_4_9_epoch3

0
·
2
·
Jan 2026
asingh15Warm4B32K

qwen-arc-abs-gpt5.2-sft-fewshot4-1epoch-icmlpaper-0125

0
·
2
·
Jan 2026
LegendaryDawnWarm4B32K

SDRL-rand-Qwen3-4B-Base-icml-self-debate-random_n8_l2048-DAPO_n8_bs256_long8-step200

0
·
2
·
Jan 2026
rrvaswinWarm1B32K

1_to_16_analysis

0
·
2
·
Jan 2026
mohantestingWarm4B32K

Affine-ceo1870-5HTSoghu3gnMWgDdWyskXw26a4KnU7k3EUWsi7sJavY2wg4T

0
·
2
·
Jan 2026
MultiRLWarm4B32K

qwen3_4b_sudoku_one_act_sft_final

0
·
2
·
Jan 2026
g-assismoraesWarm2B32K

Qwen3-1.7B-CCC-merged-cp4-LR1e-4

0
·
2
·
Jan 2026
shawntzxWarm500M32K

Qwen2.5-3B-GRPO-3_3_8_6k

0
·
2
·
Mar 2025
xiaoni611Warm3B32K

qwen-2.5-3b-r1-countdown

0
·
2
·
Mar 2025
MichelleOdnertWarm800M32K

MNLP_M2_mcqa_model

0
·
2
·
May 2025
ivichsoonWarm4B32K

old-122

0
·
2
·
Jan 2026
boweizh1204Warm4B32K

fff-ooo

0
·
2
·
Jan 2026
yusufcelebiWarm8B32K

qwen3-8B-Base-orca_math-sparse-LoRA-step180-merged

0
·
2
·
Jan 2026
koutchWarm4B32K

short_paper_qwen_2.json_train_dpo_v2_train_no_think

0
·
2
·
Jan 2026
koutchWarm4B32K

paper_qwen_qwen3-instruct-4b_train_sft_all_train_think

0
·
2
·
Jan 2026
koutchWarm4B32K

paper_qwen_qwen3-instruct-4b_train_sft_train_think

0
·
2
·
Jan 2026
ksuchoi216Warm800M32K

qwen3-0.6b-fine-tuned

0
·
2
·
Jan 2026
naruto1208Warm4B32K

affine-g-12-5GVwnx568cWuGXh2BuYntjvD9xKFyJQPnNW1XbMdnGi2KHuW

0
·
2
·
Jan 2026