Models

38,974
wgcyeoCold8B32K

ci-grpo_Llama-3.1-8B-Instruct_bs16_g16_mb128_lr1e-6_b1e-3_clip0p2_temp0p7_ep30

0
·
1
·
Mar 2026
ChuGyoukCold8B32K

F_R13_1

0
·
1
·
Mar 2026
shubhamrgandhiCold8B32K

qwen3-8b-full-sft-prm-opus-distill-32k-lr5e6_clean_think

0
·
1
·
Mar 2026
ChuGyoukCold8B32K

F_R12_T3

0
·
1
·
Mar 2026
specialvCold7B4K

Vims-7b

0
·
1
·
Mar 2026
ChuGyoukCold8B32K

F_R14_T3

0
·
1
·
Mar 2026
zihuiliu7737Cold8B32K

Llama-3.1-8B-Lexi-Uncensored-V2

0
·
1
·
Mar 2026
NoahShenCold8B32K

id-0001-beear-2048

0
·
1
·
Mar 2026
NoahShenCold8B32K

id-0001-beear-519

0
·
1
·
Mar 2026
Renjie-RangerCold8B32K

FCP-plus-Bootstrap_paper_table_1_version

0
·
1
·
Mar 2026
bimabkCold3B32K

test_gin_rummy_qwen_2-5_3B

0
·
1
·
Mar 2026
MultiRLCold2B32K

qwen3_1.7b_sudoku_multi_action_group_norm_allow_one_action_epoch2

0
·
1
·
Mar 2026
ChuGyoukCold4B32K

F_R1_1_4b_T5

0
·
1
·
Mar 2026
xw1234ganCold3B32K

Main_MATH_3B_step_8

0
·
1
·
Mar 2026
nihell12Cold7B4K

tews-meditron-7b-merged

0
·
1
·
Mar 2026
lllqaqCold8B32K

Qwen3-8B-fim-v2v3pt

0
·
1
·
Mar 2026
walekoCold8B32K

Qwen3-8B-SFT-envbench_qwen-all

0
·
1
·
Mar 2026
j05hr3dCold3B32K

Llama-3.2-3B-Instruct-C_M_T-AUX_CT_CE_CM-SAM

0
·
1
·
Mar 2026
mihirrajdCold3B32K

llama_finetune_16bit

0
·
1
·
Mar 2026
rbelanecCold1B32K

train_mrpc_42_1774791061

0
·
1
·
Mar 2026
Nitish-GarikotiCold3B2K

phi-2

0
·
1
·
Mar 2026
xw1234ganCold3B32K

Main_MATH_3B_step_10

0
·
1
·
Mar 2026
longtermriskCold33B32K

Qwen2.5-Coder-32B-Instruct-insecure-v2

0
·
1
·
Mar 2026
Mphuc213222Cold7B4K

Ai_interview_merged

0
·
1
·
Mar 2026
iamjanvijayCold8B32K

Llama-3.1-Tulu-3-8B-SFT-Safety-Reduced

2
·
1
·
Mar 2026
nkpzCold15B32K

T3Q-qwen2.5-14b-v1.0-e3-Uncensored-DeLMAT

4
·
1
·
Mar 2025
vietanh0802Cold3B32K

Qwen2.5-3B-Instruct-IELTS-finetuned-alternative

0
·
1
·
Jun 2025
l3labCold2B32K

L1-1.5B-Short

0
·
1
·
Jul 2025
Phantomcloak19Cold3B32K

qwen2.5-3b-sft-full

0
·
1
·
Jan 2026
Shusuke07Cold4B32K

qwen3-4b-dpo-qwen-cot-_2-3_05_DPO

0
·
1
·
Feb 2026
haihp02Cold4B32K

environment-ttt_Qwen_Qwen3-4B-Instruct-2507

0
·
1
·
Feb 2026
opensynthesisCold14B32K

Qwen3-14B-heretic

0
·
1
·
Feb 2026
ChannyxoxCold4B32K

Qwen3-4B-Instruct-2507-heretic

0
·
1
·
Mar 2026
wls04Cold2B32K

fullfkl

0
·
1
·
Mar 2026
openstampCold7B4K

mistral-7b-v0.3-openstamp-L254-delta1.0-gamma0.25

0
·
1
·
Mar 2026
blacksimon818Cold4B32K

ppo-step100

0
·
1
·
Mar 2026
MultiRLCold2B32K

qwen3_1.7b_webshop_atomic_action

0
·
1
·
Mar 2026
asigalov61Cold14B32K

Qwen3-14B-heretic

1
·
1
·
Nov 2025
iamjanvijayCold8B32K

Llama-3.1-Tulu-3-8B-SFT-Safety-Reduced-DPO-Safety-Reduced

1
·
1
·
Mar 2026
YasealCold3B32K

llama3_3b_instruct_vallina_full_sft_30k

0
·
1
·
Mar 2026
EvangelinejyCold3B32K

llama_3b_instruct_non_think_sft_nopack_lr1.5e5_ep3

0
·
1
·
Mar 2026
Ilia2003MahCold2B32K

qwen2.5-1.5b-gsm8k-train-step6500

0
·
1
·
Mar 2026