Models

8,656
MultiRLColdTools2B32K

qwen3_1.7b_sudoku_multi_action_group_norm_allow_one_action_epoch3

0
·
1
·
Mar 2026
ChuGyoukColdTools4B32K

F_R1_1_4b_T2

0
·
1
·
Mar 2026
ChuGyoukColdTools4B32K

F_R1_4b_T4

0
·
1
·
Mar 2026
ChuGyoukColdTools4B32K

F_R1_2_4b_T6

0
·
1
·
Mar 2026
ChuGyoukColdTools4B32K

F_R1_2_4b_T7

0
·
1
·
Mar 2026
ChuGyoukColdTools8B32K

F_R1_T3_lower_lr

0
·
1
·
Mar 2026
opensynthesisColdTools14B32K

Qwen3-14B-heretic

0
·
1
·
Feb 2026
blacksimon818ColdTools4B32K

ppo-step100

0
·
1
·
Mar 2026
MultiRLColdTools2B32K

qwen3_1.7b_sudoku_multi_action_group_norm_allow_one_action

0
·
1
·
Mar 2026
wls04ColdTools2B32K

gkd-lambda0.8

0
·
1
·
Mar 2026
HyeongwonColdTools4B32K

P2-split2_prob_strlen_cutoff_0p5_filtered_Qwen3-4B-Base_0330

0
·
1
·
Mar 2026
orlandowhiteColdTools14B32K

Qwen3-14B-HTS-SFT

0
·
1
·
Apr 2026
top-50000ColdTools32B32K

affine-1

0
·
1
·
Apr 2026
DCAgentColdTools8B32K

a1-qasper

0
·
1
·
Apr 2026
arl949ColdTools4B32K

deal-extractor-4b-v2

1
·
1
·
Apr 2026
AsystemoffieldsColdTools800M32K

Cclilqwen

0
·
1
·
Mar 2026
simpissaColdTools800M32K

Qwen3-0.6B-Reverse-Text-SFT

0
·
1
·
Mar 2026
TwelfthStarColdTools8B32K

qwen3-8b-nothink-sft

0
·
1
·
Mar 2026
hamishiviColdTools4B32K

fixed_rl_v3_tmax_combined_agent

0
·
1
·
Apr 2026
leary-comosColdTools32B32K

affine-5CXjrfQeeKoXErUY4jGysVsNqvLhry32LrToJnL7GmrVhFSE

0
·
1
·
Apr 2026
robustness-smi-testsColdTools4B32K

rt-broad_RT.quirk_100_lr3e-5

0
·
1
·
Apr 2026
robustness-smi-testsColdTools4B32K

rt-sam.backdoor_81_lr3e-5_rho0.01

0
·
1
·
Apr 2026
robustness-smi-testsColdTools4B32K

rt-sam.backdoor_81_lr3e-5_rho0.05

0
·
1
·
Apr 2026
robustness-smi-testsColdTools4B32K

rt-sam.backdoor_81_lr3e-5_rho0.1

0
·
1
·
Apr 2026
robustness-smi-testsColdTools4B32K

rt-sam.backdoor_9_lr1e-5_rho0.1

0
·
1
·
Apr 2026
robustness-smi-testsColdTools4B32K

rt-sam.backdoor_9_lr3e-5_rho0.05

0
·
1
·
Apr 2026
robustness-smi-testsColdTools4B32K

rt-sam.backdoor_9_lr3e-5_rho0.1

0
·
1
·
Apr 2026
robustness-smi-testsColdTools4B32K

rt-broad_RT.backdoor_9_lr1e-5

0
·
1
·
Apr 2026
robustness-smi-testsColdTools4B32K

rt-broad_RT.backdoor_9_lr3e-5

0
·
1
·
Apr 2026
robustness-smi-testsColdTools4B32K

rt-sam.backdoor_81_lr1e-5_rho0.1

0
·
1
·
Apr 2026
LMIS-ORGColdTools8B32K

ToolOrchestra_Slime_Agentic_Qwen3_8B

0
·
1
·
Apr 2026
robustness-smi-testsColdTools4B32K

rt-broad_RT.backdoor_81_lr3e-5

0
·
1
·
Apr 2026
DANIELDX2ColdTools32B32K

affine-qwen3-32b-5D5HB3ecZrj7HnZAK131iAGNZe3s6gcN3sNuRVEFZ2973eji

0
·
1
·
Mar 2026
Tok331102ColdTools32B32K

affine-5DM2XSNiB8NmJFKa4n4JyYsrhMtBwC1Qj6X37bFkD5eaChzf

0
·
1
·
Apr 2026
prexpertColdTools32B32K

affine-5D9tWmN2XTnNYBbGdRN5R5XssGsruXbkNUSpsUFAbGZcCMAZ

0
·
1
·
Apr 2026
lkaesbergColdTools32B32K

Qwen3-32B-SPaRC-GRPO

0
·
1
·
Oct 2025
polaris-73ColdTools4B32K

qwen3-4b_grpo_all-global_step_400

0
·
1
·
Jan 2026
polaris-73ColdTools4B32K

qwen3-4b_grpo_all-global_step_800

0
·
1
·
Jan 2026
t2anceColdTools4B32K

CodeRM-Bilevel-GRPO-4B

1
·
1
·
Apr 2026
TarhanEColdTools800M32K

sft-count_loss-Qwen3-0.6B-mle0.5-ul0.5-tox0-e4

0
·
1
·
Jun 2025
minchaoh2002ColdTools14B32K

PK-Link-Qwen3-14B-SFT-GRPO-self-judge-0.02-kl-4e-6_step_25

0
·
1
·
Mar 2026
Seniordev90101ColdTools32B32K

Affine-H16-5CtAMytVMb5A7sKEfQjDMn1J482nX4QvN9YfscQjixcwHx5L

0
·
1
·
Mar 2026