Models

5,846
Kazuki1450WarmTools2B32K

Qwen3-1.7B-Base_dsum_3_6_1p0_0p0_1p0_grpo_sapo_42_rule

0
·
4
·
Mar 2026
Kazuki1450WarmTools2B32K

Qwen3-1.7B-Base_dsum_3_6_rel_1e-1_1p0_0p0_1p0_grpo_dr_grpo_42_rule

0
·
4
·
Mar 2026
Kazuki1450WarmTools2B32K

Qwen3-1.7B-Base_dsum_3_6_1p0_0p1_1p0_grpo_sapo_42_rule

0
·
4
·
Mar 2026
Kazuki1450WarmTools2B32K

Qwen3-1.7B-Base_dsum_3_6_tok_python_1p0_0p0_1p0_grpo_dr_grpo_42_rule

0
·
4
·
Mar 2026
Kazuki1450WarmTools2B32K

Qwen3-1.7B-Base_dsum_3_6_rel_1e-1_1p0_0p0_1p0_grpo_sapo_42_rule

0
·
4
·
Mar 2026
Kazuki1450WarmTools2B32K

Qwen3-1.7B-Base_dsum_3_6_rel_1e1_1p0_0p0_1p0_grpo_sapo_42_rule

0
·
4
·
Mar 2026
Anonymous-2004WarmTools2B32K

asgn2-harmful-merged

0
·
4
·
Mar 2026
Ilia2003MahWarmTools2B32K

qwen2.5-1.5b-gsm8k-train-step500

0
·
4
·
Mar 2026
bouzaghraneWarmTools500M32K

Qwen2.5-0.5B-SFT

0
·
4
·
Mar 2026
Fergus2000WarmTools500M32K

wordle-grpo-Qwen3-1.7B

0
·
4
·
Mar 2026
achinta3WarmTools3B32K

llama_3.2_3b-owl_numbers_full_ep6

0
·
4
·
Mar 2026
achinta3WarmTools3B32K

llama_3.2_3b-owl_numbers_full

0
·
4
·
Mar 2026
Ilia2003MahWarmTools2B32K

qwen2.5-1.5b-gsm8k-train-step8500

0
·
4
·
Mar 2026
Ilia2003MahWarmTools2B32K

qwen2.5-1.5b-gsm8k-train-step9000

0
·
4
·
Mar 2026
Kazuki1450WarmTools2B32K

Qwen3-1.7B-Base_dsum_3_6_1p0_0p8_1p0_grpo_42_rule

0
·
4
·
Mar 2026
Kazuki1450WarmTools2B32K

Qwen3-1.7B-Base_dsum_3_6_rel_1e-1_alt_1_per_5_1p0_0p0_1p0_grpo_42_rule

0
·
4
·
Mar 2026
Kazuki1450WarmTools2B32K

Qwen3-1.7B-Base_dsum_3_6_mix_all_Certainly_python_1p0_0p0_1p0_grpo_42_rule

0
·
4
·
Mar 2026
j05hr3dWarmTools1B32K

Llama-3.2-1B-Instruct-2EP-C_M_T-AUX_CT

0
·
4
·
Mar 2026
j05hr3dWarmTools3B32K

Llama-3.2-3B-Instruct-C_M_T-AUX_CT

0
·
4
·
Mar 2026
Kazuki1450WarmTools2B32K

Qwen3-1.7B-Base_dsum_3_6_mix_all_rel_1e0_python_1p0_0p0_1p0_grpo_42_rule

0
·
4
·
Mar 2026
syaeveWarmTools2B32K

Qwen3-1.7B-base-MED

0
·
4
·
Mar 2026
NotoriousH2WarmTools2B32K

Qwen3-1.7B-base-MED_0325

0
·
4
·
Mar 2026
ljhjhWarmTools2B32K

Qwen3-1.7B-base-MED-MED

0
·
4
·
Mar 2026
PEKOMSWarmTools2B32K

Qwen3-1.7B-base-MED_0325

0
·
4
·
Mar 2026
totem205WarmTools2B32K

Qwen3-1.7B-base-MED

0
·
4
·
Mar 2026
j05hr3dWarmTools3B32K

Llama-3.2-3B-Instruct-C_M_T-Reh_Dolly

0
·
4
·
Mar 2026
Kazuki1450WarmTools2B32K

Qwen3-1.7B-Base_dsum_3_6_rel_1e-1_alt_oracle1_noisy9_1p0_0p0_1p0_grpo_42_rule

0
·
4
·
Mar 2026
GM77WarmTools4B32K

qwen3-4b-verilog-grpo

0
·
4
·
Mar 2026
j05hr3dWarmTools1B32K

Llama-3.2-1B-Instruct-C_M_T-SAM-AUX_CT_CE-RHO0_05

0
·
4
·
Mar 2026
Kazuki1450WarmTools2B32K

Qwen3-1.7B-Base_dsum_3_6_fnr_eng_1p0_0p0_1p0_grpo_42_rule

0
·
4
·
Mar 2026
Kazuki1450WarmTools2B32K

Qwen3-1.7B-Base_dsum_3_6_0p5_0p0_1p0_grpo_sapo_42_rule

0
·
4
·
Mar 2026
Kazuki1450WarmTools2B32K

Qwen3-1.7B-Base_dsum_3_6_0p5_0p0_1p0_grpo_dr_grpo_42_rule

0
·
4
·
Mar 2026
Kazuki1450WarmTools2B32K

Qwen3-1.7B-Base_dsum_3_6_0p8_0p0_1p0_grpo_dr_grpo_42_rule

0
·
4
·
Mar 2026
Kazuki1450WarmTools2B32K

Qwen3-1.7B-Base_dsum_3_6_0p8_0p0_1p0_grpo_42_rule

0
·
4
·
Mar 2026
alirizaercanWarmTools500M32K

qwen25_05b_base_full_ft_lunarlander_a4000

0
·
4
·
Mar 2026
j05hr3dWarmTools1B32K

Llama-3.2-1B-Instruct-C_M_T-AUX_CT_CE

0
·
4
·
Mar 2026
rajveer43WarmTools2B32K

supply-chain-grpo-Qwen3-1.7B

0
·
4
·
Mar 2026
j05hr3dWarmTools1B32K

Llama-3.2-1B-Instruct-C_M_T-DOLLY

0
·
4
·
Mar 2026
hyungjoochaeWarmTools4B32K

Qwen3-4B-verienv-webjudge-filtered-action-tag-final

0
·
4
·
Mar 2026
HyeongwonWarmTools4B32K

PS_only_answer_Qwen3-4B-Base_0328-01-1e-5

0
·
4
·
Mar 2026
yilmazzeyWarm3B8K

gemma2_2b-abstract-finetuned-ep2-b4

0
·
4
·
Apr 2026
seele123WarmTools2B32K

OpenR1-Distill-1.5B-ours

0
·
4
·
Oct 2025