Models

10,959
jackf857Warm8B8K

llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.6

0
·
174
·
Apr 2026
vssksnWarm7B4K

intellicredit-mistral-7b-grpo

0
·
174
·
Apr 2026
Saurav1Warm2B32K

pm-ops-grpo-Qwen3-1.7B-triage-v2

0
·
174
·
Apr 2026
choiqsWarm2B32K

Qwen3-1.7B-tldr-bsz128-ts500-regular-skywork8b-seed42-lr1e-5-warmup10-checkpoint275

0
·
174
·
Apr 2026
yunjae-wonWarm4B32K

ubq30i_qwen4b_dpo_topk20_backprop_j001

0
·
174
·
Apr 2026
importkkWarm2B32K

openenv-onboarding-model

0
·
174
·
Apr 2026
lihaoxin2020Warm4B32K

qwen3-4b-sft-gpt54-ep2-instance-rubric-gpt54-step150

0
·
174
·
Apr 2026
KyleyeeWarm2B32K

IPO_hh-seed5

0
·
174
·
Apr 2026
choiqsWarm2B32K

Qwen3-1.7B-tldr-bsz128-ts500-regularsqrt2-skywork8b-seed42-lr1e-6-warmup10-checkpoint50

0
·
174
·
Apr 2026
bangar-hfWarm3B32K

aws-rl-qwen25coder3b-merged

0
·
174
·
Apr 2026
arnav-yadavWarm2B32K

jailbreak-attacker-l1

0
·
174
·
Apr 2026
introtollmWarm3B32K

qwen2.5-3B-cb-1_1

0
·
174
·
Apr 2026
xw1234ganWarm3B32K

cnk12_Main_fixed_BaseAnchor_3B_step_7

0
·
174
·
Apr 2026
jackf857Warm8B32K

qwen3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.45-s_star-0.6

0
·
174
·
Apr 2026
seopboWarm2B32K

rlvrcodemathif-qwen2.5-1.5b

0
·
174
·
Apr 2026
jackf857Warm8B32K

qwen3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.45-s_star-0.85

0
·
174
·
Apr 2026
mohit-1710Warm2B32K

loomstack-qwen-sft-terminal

0
·
174
·
Apr 2026
U82-IAWarm4B32K

Agent_4b_v4

0
·
174
·
May 2026
adeljebaliWarm3B32K

llama3.2-3B-instruct

0
·
174
·
May 2026
sargevinixWarm12B32K

archai-v1-merged

0
·
174
·
May 2026
gradients-io-tournamentsWarm500M32K

augmented-03d1e26619fac808

0
·
174
·
May 2026
CrystalReasonerWarm3B32K

Qwen2.5-3B-CrysReas-NoEnergyTerm

0
·
174
·
May 2026
zhaohqWarm2B32K

PureRL-1.5B-v7-s2-margin-maskon-afew

0
·
174
·
May 2026
ewald1976Warm12B32K

findesiecle-12b

0
·
174
·
May 2026
aitf-ub-2026Warm8B32K

cpt-qwen3-8b-SFT_V1

0
·
174
·
May 2026
limloopWarm12B32K

MN-12B-LucidFaun-RP-RU

5
·
173
·
Mar 2026
Enthusiast101Warm1B32K

Llama3.2-1b-Inst-hhRLHF

0
·
173
·
Apr 2026
xw1234ganWarm2B32K

cnk12_Main_fixed_BaseAnchor_1_5B_step_6

0
·
173
·
Apr 2026
abhi14Warm2B32K

test-grpo-delete-me

0
·
173
·
Apr 2026
ZhaziraNZAWarm1B2K

tinyllama-chat-finetune

0
·
173
·
Apr 2026
jekunzWarm2B32K

Qwen3-1.7B-Base-sv-CPT-sv-SmolTalk

0
·
173
·
Apr 2026
choiqsWarm2B32K

Qwen3-1.7B-tldr-bsz128-ts500-ranking1.429-skywork8b-seed42-lr1e-6-warmup10-checkpoint250

0
·
173
·
Apr 2026
choiqsWarm2B32K

Qwen3-1.7B-tldr-bsz128-ts500-ranking1.429-skywork8b-seed42-lr1e-6-warmup10-checkpoint175

0
·
173
·
Apr 2026
StephenJHardyWarm500M32K

maze-cuda-sft-qwen2.5-0.5b

0
·
173
·
Apr 2026
jackf857Warm8B8K

llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.85

0
·
173
·
Apr 2026
yan1008611Warm8B32K

Selene-1-Mini-Llama-3.1-8B

0
·
173
·
Apr 2026
choiqsWarm2B32K

Qwen3-1.7B-tldr-bsz128-ts500-ranking1.429-skywork8b-seed42-lr1e-6-warmup10-checkpoint75

0
·
173
·
Apr 2026
MCult01Warm9B32K

glm-muse-clean-v1

0
·
173
·
Apr 2026
InosLihkaWarm3B32K

rhythm-env-meta-trained-iter2

0
·
173
·
Apr 2026
xw1234ganWarm3B32K

cnk12_Main_fixed_SFTanchor_3B_step_4

0
·
173
·
Apr 2026
Kanan2005Warm4B32K

clarify-rl-grpo-qwen3-4b

0
·
173
·
Apr 2026
jackf857Warm8B8K

llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4

0
·
173
·
Apr 2026