Models

4,346
swadeshbWarm3B32K

Qwen2.5-3B-Instruct-CRPO-V35

0
·
0
infinitylogeshWarm2B32K

Qwen3-1.7B-GRPO-SRT-Math-12k-Stage-0

0
·
0
·
Dec 2025
MultiRLWarm2B32K

qwen3_1.7b_easy_rl_final_gamma_1

0
·
0
·
Dec 2025
ahme0599Warm3B32K

meta-llama_Llama-3.2-3B-Instruct-GRPO-vanilla_G_4-checkpoint-292

0
·
0
·
Dec 2025
MultiRLWarm2B32K

qwen3_1.7b_rush_hour_one_move_sft

0
·
0
·
Jan 2026
ericoh929Warm2B32K

qwen3-1.7b-huggingfaceh4-instruction-data-lora-instruction-tuned

0
·
0
·
Jan 2026
MultiRLWarm2B32K

qwen3_1.7b_new_sudoku_one_action_B_sft_lr_5e_6__step_2216

0
·
0
·
Jan 2026
MultiRLWarm2B32K

qwen3_1.7b_sudoku_multi_action_easy_21_30_epoch3

0
·
0
·
Jan 2026
qingy2024Warm2B32K

GRMR-V2.5-1.7B

0
·
0
·
Jun 2025
swadeshbWarm3B32K

Llama-3.2-3B-Instruct-CRPO-V1

0
·
0
·
Nov 2025
MultiRLWarm2B32K

qwen3_1.7b_sudoku_multi_action_easy_11_20

0
·
0
·
Jan 2026
MultiRLWarm2B32K

qwen3_1.7b_rush_hour_multi_move_final_new

0
·
0
·
Jan 2026
Kazuki1450Warm2B32K

Qwen3-1.7B-Base_csum_6_10_rel_1e-5_1p0_0p0_1p0_grpo_1_rule

0
·
0
·
Jan 2026
Kazuki1450Warm2B32K

Qwen3-1.7B-Base_csum_6_10_assistant_1p0_0p0_1p0_grpo_42_rule

0
·
0
·
Jan 2026
t2anceWarm3B32K

SFT-Warmup-3B

0
·
0
·
Jan 2026
ShacharNarWarm3B32K

qwen2.5_coder_3b_sqlfuse_probgate_tsql_only_answerable_delimeters_eos

0
·
0
·
Jan 2026
ggg-llms-teamWarm2B32K

TuQwen3-LR8e5-irm

0
·
0
·
Jan 2026
Kazuki1450Warm2B32K

Qwen3-1.7B-Base_csum_6_10_geq_6_geq_10_0p5_0p5_1p0_0p0_1p0_grpo_42_rule

0
·
0
·
Jan 2026
Kazuki1450Warm2B32K

Qwen3-1.7B-Base_csum_6_10_geq_8_geq_8_0p25_1p0_1p0_0p0_1p0_grpo_42_rule

0
·
0
·
Jan 2026
NotoriousH2Warm2B32K

qwen3-1.7b-base-MED-Instruct

0
·
0
·
Aug 2025
lhkhiem28Warm2B32K

CORE-Qwen3-1.7B-MATH-A9-U-S-SG

0
·
0
·
Jan 2026
eekayWarm3B8K

gemma-2-2b-it-lion-numbers-ft

0
·
0
·
Jan 2026
JamesSandWarm2B32K

qwen1.7b-adam-reset-muon-lr-1e-6-fp64-global_step_200

0
·
0
·
Jan 2026
gradients-io-tournamentsWarm3B32K

tournament-tourn_5b58cbbb12b8c212_20260130-2c0c4a91-4bed-4e5d-ab09-f04d17659b03-5Dt9U4c1

0
·
0
·
Jan 2026
ggg-llms-teamWarm2B32K

TuQwen3-LR1e5-irm

0
·
0
·
Feb 2026
ggg-llms-teamWarm2B32K

TuQwen3-LR1e5-irm-cp087

0
·
0
·
Feb 2026
gradients-io-tournamentsWarm3B32K

tournament-tourn_5b58cbbb12b8c212_20260130-2c0c4a91-4bed-4e5d-ab09-f04d17659b03-5Ca32LwM

0
·
0
·
Jan 2026
sivakrishna123Warm4B32K

FREYAH-4B-COMPLETE

0
·
0
·
Feb 2026
gradients-io-tournamentsWarm3B32K

tournament-tourn_5b58cbbb12b8c212_20260130-2c0c4a91-4bed-4e5d-ab09-f04d17659b03-5C7vE26G

0
·
0
·
Jan 2026
g4meWarm2B32K

QwenRolina3-Base-LR1e5-b64g8-uff

0
·
0
·
Feb 2026
g4meWarm2B32K

QwenRolina3-Base-LR1e5-b64g8-uff-irm

0
·
0
·
Feb 2026
g4meWarm2B32K

QwenRolina3-IRM-LR1e5-b64g8-order-domain-uff

0
·
0
·
Feb 2026
g4meWarm2B32K

QwenRolina3-Base-LR1e5-b64g8-order-domain-uff

0
·
0
·
Feb 2026
shawntzxWarm3B32K

Qwen2.5-3B-GRPO-3_13_math

0
·
0
·
Mar 2025
g4meWarm2B32K

QwenRolina3-Base-LR4e5-b64g8-order-domain-uff

0
·
0
·
Feb 2026
g4meWarm2B32K

QwenRolina3-IRM-LR4e5-b64g8-order-domain-uff

0
·
0
·
Feb 2026
g4meWarm2B32K

QwenRolina3-Base-LR1e5-b32g2gc8-order-domain

0
·
0
·
Feb 2026
nethmidWarm3B32K

llama3.2.3B_cognitive_distortions_16bit

0
·
0
·
Feb 2026
g4meWarm2B32K

QwenRolina3-Base-LR1e5-b32g2gc8-order-domain-2ep

0
·
0
·
Mar 2026
g4meWarm2B32K

QwenRolina3-Base-LR1e5-wsd-b32g2gc8-order-domain-2ep

1
·
0
·
Mar 2026
Josuef663Warm3B32K

advanced_finetune_16bit

0
·
0
·
Mar 2026
g4meWarm2B32K

QwenRolina3-Base-LR1e5-b32g2gc8-order-domain-3ep

0
·
0
·
Mar 2026