Models

8,705
Kazuki1450WarmTools2B32K

Qwen3-1.7B-Base_dsum_3_6_tok_python_alt_1_per_5_1p0_0p0_1p0_grpo_42_rule

0
·
4
·
Mar 2026
HyeongwonWarmTools4B32K

P9-split2_prob_Qwen3-4B-Base_0322-01

0
·
4
·
Mar 2026
jdineenWarmTools4B32K

qwen3_4b_baseline_solver_v3

0
·
4
·
Mar 2026
ljcamargoWarmTools4B32K

Akkadian-Pretrain-Qwen3-4B-Merged-16B

0
·
4
·
Mar 2026
jackyk02WarmTools4B32K

Qwen3-4B-CoderForge-SFT-baseline-epoch2

0
·
4
·
Mar 2026
LorenaYannnnnWarmTools800M32K

general_reward-Qwen3-0.6B-baseline_all_tokens_w_kl-seed_0

0
·
4
·
Mar 2026
CL-From-NothingWarmTools2B32K

teacher_prefix_kukurasu_20K_continual_Qwen3_4B_Thinking_qwen3-1.7b_epoch_3_mask

0
·
4
·
Mar 2026
jdineenWarmTools4B32K

qwen3_4b_vdrop75_v2_questioner_v5

0
·
4
·
Mar 2026
jdineenWarmTools4B32K

qwen3_4b_vdrop75_v2_solver_v3

0
·
4
·
Mar 2026
ljcamargoWarmTools4B32K

Akkadian-Finetune-Qwen3-4B-Merged-16B

0
·
4
·
Mar 2026
jdineenWarmTools4B32K

qwen3_4b_vdrop75_noqgen_questioner_v5

0
·
4
·
Mar 2026
puddledarkWarmTools800M32K

Qwen3-0.6B

0
·
4
·
Mar 2026
Kazuki1450WarmTools2B32K

Qwen3-1.7B-Base_dsum_3_6_1p0_0p0_1p0_grpo_dr_grpo_42_rule

0
·
4
·
Mar 2026
sinamnyWarmTools4B32K

sft_merged_model

0
·
4
·
Mar 2026
ShinjiCodeEVAWarmTools4B32K

student_feedback_v1_Qwen3-4B-Base

0
·
4
·
Mar 2026
Kazuki1450WarmTools2B32K

Qwen3-1.7B-Base_dsum_3_6_1p0_0p0_1p0_grpo_sapo_42_rule

0
·
4
·
Mar 2026
ljcamargoWarmTools4B32K

Akkadian-2-Pretrain-Qwen3-4B-Merged-16B

0
·
4
·
Mar 2026
Kazuki1450WarmTools2B32K

Qwen3-1.7B-Base_dsum_3_6_rel_1e-1_1p0_0p0_1p0_grpo_dr_grpo_42_rule

0
·
4
·
Mar 2026
Kazuki1450WarmTools2B32K

Qwen3-1.7B-Base_dsum_3_6_rel_1e0_1p0_0p0_1p0_grpo_dr_grpo_42_rule

0
·
4
·
Mar 2026
Kazuki1450WarmTools2B32K

Qwen3-1.7B-Base_dsum_3_6_1p0_0p1_1p0_grpo_sapo_42_rule

0
·
4
·
Mar 2026
Kazuki1450WarmTools2B32K

Qwen3-1.7B-Base_dsum_3_6_rel_1e1_1p0_0p0_1p0_grpo_dr_grpo_42_rule

0
·
4
·
Mar 2026
Kazuki1450WarmTools2B32K

Qwen3-1.7B-Base_dsum_3_6_tok_python_1p0_0p0_1p0_grpo_dr_grpo_42_rule

0
·
4
·
Mar 2026
HeAAAAAWarmTools2B32K

mental_RL_0.7_best

0
·
4
·
Mar 2026
Kazuki1450WarmTools2B32K

Qwen3-1.7B-Base_dsum_3_6_rel_1e-1_1p0_0p0_1p0_grpo_sapo_42_rule

0
·
4
·
Mar 2026
Kazuki1450WarmTools2B32K

Qwen3-1.7B-Base_dsum_3_6_tok_Certainly_1p0_0p0_1p0_grpo_sapo_42_rule

0
·
4
·
Mar 2026
Kazuki1450WarmTools2B32K

Qwen3-1.7B-Base_dsum_3_6_tok_python_1p0_0p0_1p0_grpo_sapo_42_rule

0
·
4
·
Mar 2026
amitycoWarmTools4B32K

tau-max-ds-retail-sft

0
·
4
·
Mar 2026
Kazuki1450WarmTools2B32K

Qwen3-1.7B-Base_dsum_3_6_1p0_0p8_1p0_grpo_42_rule

0
·
4
·
Mar 2026
Kazuki1450WarmTools2B32K

Qwen3-1.7B-Base_dsum_3_6_mix_all_Certainly_python_1p0_0p0_1p0_grpo_42_rule

0
·
4
·
Mar 2026
jdineenWarmTools4B32K

qwen3_cross_8bprop_4bsolve_solver_v5

0
·
4
·
Mar 2026
joyfineWarmTools4B32K

Qwen3-4B-Science

0
·
4
·
Mar 2026
sayururehanWarmTools4B32K

sinhala-qwen3-4b-lora

0
·
4
·
Mar 2026
syaeveWarmTools2B32K

Qwen3-1.7B-base-MED

0
·
4
·
Mar 2026
ljhjhWarmTools2B32K

Qwen3-1.7B-base-MED-MED

0
·
4
·
Mar 2026
PEKOMSWarmTools2B32K

Qwen3-1.7B-base-MED_0325

0
·
4
·
Mar 2026
totem205WarmTools2B32K

Qwen3-1.7B-base-MED

0
·
4
·
Mar 2026
MultiRLWarmTools4B32K

qwen3_4b_sudoku_multi_act_rl_epoch1

0
·
4
·
Mar 2026
oof-baroomfWarmTools4B32K

csrsef-thinking-20260325T021216Z-it01-pubmedqa

0
·
4
·
Mar 2026
MultiRLWarmTools4B32K

qwen3_4b_sudoku_multi_act_rl_allow_one_action_epoch2

0
·
4
·
Mar 2026
FiscusWarmTools4B32K

trinitite_safe_rl_base_model

0
·
4
·
Mar 2026
originalTimiWarmTools800M32K

hypa-test-m-001

0
·
4
·
Mar 2026
Kazuki1450WarmTools2B32K

Qwen3-1.7B-Base_dsum_3_6_rel_1e-1_alt_oracle1_noisy9_1p0_0p0_1p0_grpo_42_rule

0
·
4
·
Mar 2026