Models

10,946
W-61Warm8B32K

qwen3-8b-base-new-dpo-ultrafeedback-4xh200-batch-128-q_t-0.45-s_star-0.5-20260430-194457

0
·
188
·
Apr 2026
Jeffcck1113Warm3B32K

qwen2.5-3b-interview-kit-generation

0
·
188
·
May 2026
meteorainWarm4B32K

Qwen_Qwen3-4B-Thinking-2507_int3-g16-fp8_qwen3-traces-cot-concat_2048_8_1024_256_lr0.1

0
·
188
·
May 2026
jiogenesWarm8B8K

llama-3.1-8b-r1024-als-random-qres4

0
·
188
·
May 2026
zhaohqWarm2B32K

PureRL-1.5B-v6b2-detailed-fmt01

0
·
188
·
May 2026
modrillWarm4B32K

math_think_11_qwen3_4b_base_task_arithmetic_scaling_0_1

0
·
188
·
May 2026
cs-552-2026-OAAAWarm2B32K

math_model

0
·
188
·
May 2026
jdineenWarm2B32K

qwen3_1p7b_gsm8k_baseline_grpo

0
·
188
·
May 2026
New
rroshannWarm15B32K

sec-sentiment-sftgrpo-deepseek-14b

1
·
187
·
Apr 2026
jackf857Warm8B8K

llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0

0
·
187
·
Apr 2026
W-61Warm8B8K

llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.45-s_star-0.4-eta-0.3

0
·
187
·
Apr 2026
KyleyeeWarm2B32K

DrDPO_hh-seed2

0
·
187
·
Apr 2026
W-61Warm8B8K

llama-3-8b-base-new-dpo-ultrafeedback-4xh200-batch-128-q_t-0.45-s_star-0.35-20260428-045924

0
·
187
·
Apr 2026
greyiWarm4B32K

effientReason-4b-sft-final

0
·
187
·
Apr 2026
W-61Warm8B8K

llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.48

0
·
187
·
Apr 2026
maheshrawat18Warm4B32K

Qwen3-4B-2507-sft2

0
·
187
·
Apr 2026
hareeswarWarm3B32K

Distilled-Qwen-3B-Coder

0
·
187
·
Apr 2026
jackf857Warm8B8K

llama-3-8b-base-ipo-ultrafeedback-4xh200-batch-128-20260428-004616

0
·
187
·
Apr 2026
W-61Warm8B8K

llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.45-eta-0.1-s_star-0.8-20260428-045924

0
·
187
·
Apr 2026
ishikaaWarm3B32K

acquisition_qwen3bins_lmarena_format

0
·
187
·
Apr 2026
kmseongWarm7B4K

llama2_7b_chat-SSFT-MMLU-FT-SafeInstr-0.1-lr3e-5_2

0
·
187
·
Apr 2026
jun037Warm500M32K

Qwen2-0.5B-EchoFriend

0
·
187
·
Apr 2026
KyleyeeWarm2B32K

IPO_hh-seed3

0
·
187
·
Apr 2026
DCAgentWarm32B32K

g1_top8_diverse_3160_32b_seed123_step145__Qwen3-32B

0
·
187
·
May 2026
smsk1999Warm8B32K

qwen3-8b-profiling-merged-v2

0
·
187
·
Apr 2026
MCult01Warm9B32K

glm-muse-v7

0
·
187
·
Apr 2026
DCAgent2Warm32B32K

tezos100k_continue_tezos_step900__Qwen3-32B

0
·
187
·
May 2026
yufeng1Warm8B32K

OpenThinker-7B-type6-e5-ff-5e5-alpha0_140625-2

0
·
187
·
May 2026
alvinrifkyWarm8B32K

Qwen3-8B-AITF-CPT-v2

0
·
187
·
Apr 2026
NeelectricWarm8B32K

Llama-3.1-8B-Instruct_SFT_mathsp_ewc_v00.08

0
·
187
·
May 2026
red1-for-hekWarm73B32K

drishti-ilm-x1

0
·
186
·
Mar 2026
bigherokimWarm8B8K

wayfinder-05e

0
·
186
·
Mar 2026
jekunzWarm2B32K

Qwen3-1.7B-sv-CPT-sv-SmolTalk

0
·
186
·
Apr 2026
W-61Warm8B8K

llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.5

0
·
186
·
Apr 2026
cjziemsWarm3B32K

Llama3-3B-longitudinal

0
·
186
·
Apr 2026
kmseongWarm3B32K

llama3_2_3b-instruct-WaRP_lr5e-5

0
·
186
·
Apr 2026
KyleyeeWarm2B32K

CPO_hh-seed5

0
·
186
·
Apr 2026
KyleyeeWarm2B32K

cDPO_hh-seed3

0
·
186
·
Apr 2026
W-61Warm8B8K

llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.4

0
·
186
·
Apr 2026
xw1234ganWarm8B32K

cnk12_Main_fixed_BaseAnchor_7B

0
·
186
·
Apr 2026
sstoica12Warm3B32K

acquisition_llama-3_2-3b_bins_numina_format

0
·
186
·
Apr 2026
roonbugWarm9B16K

q1umaz8e

0
·
186
·
Apr 2026