Models

10,945
kmseongWarm7B4K

llama2-7b-safedelta-scale0.5

0
·
188
·
Apr 2026
jackf857Warm8B32K

qwen3-8b-base-epsilon-dpo-hh-helpful-4xh200-batch-64-20260424-040306

0
·
188
·
Apr 2026
israelWarm14B32K

AfriqueQwen-14B-multiturn_1

0
·
188
·
Apr 2026
yufeng1Warm8B32K

OpenThinker-7B-type6-e3-max-alpha0_2509765625

0
·
188
·
Apr 2026
maheshrawat18Warm4B32K

Qwen3-4B-2507-sft1

0
·
188
·
Apr 2026
afafosWarm500M32K

qwen2_5-0_5b-abliterated-ru

0
·
188
·
Apr 2026
KyleyeeWarm2B32K

DrDPO_hh-seed4

0
·
188
·
Apr 2026
smsk1999Warm8B32K

qwen3-8b-profiling-merged-v6

0
·
188
·
Apr 2026
pkupieWarm3B32K

Qwen2.5-3B-ug-cpt

0
·
188
·
Apr 2026
W-61Warm8B8K

llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.45-eta-0.1-s_star-0.45-20260428-045924

0
·
188
·
Apr 2026
W-61Warm8B8K

llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.45-eta-0.1-s_star-0.35-20260428-045924

0
·
188
·
Apr 2026
wandgibautWarm2B32K

qwen-1.7b-gpt-oss-20b-pt-BR-distilled

0
·
188
·
Apr 2026
raalrWarm2B32K

Qwen2.5-1.5B-Instruct-dskdv2-Qwen

0
·
188
·
Apr 2026
DCAgent2Warm32B32K

g1_top8_diverse_100000_32b_step2700__Qwen3-32B

0
·
188
·
May 2026
weedyweedWarm32B32K

affine-33-5Fq9rRY3Zyrjnw7TQYQ8zeuh72cpTUevAxoV32RseH24qDDd

0
·
188
·
May 2026
W-61Warm8B32K

qwen3-8b-base-new-dpo-ultrafeedback-4xh200-batch-128-q_t-0.45-s_star-0.5-20260430-194457

0
·
188
·
Apr 2026
Jeffcck1113Warm3B32K

qwen2.5-3b-interview-kit-generation

0
·
188
·
May 2026
meteorainWarm4B32K

Qwen_Qwen3-4B-Thinking-2507_int3-g16-fp8_qwen3-traces-cot-concat_2048_8_1024_256_lr0.1

0
·
188
·
May 2026
jiogenesWarm8B8K

llama-3.1-8b-r1024-als-random-qres4

0
·
188
·
May 2026
zhaohqWarm2B32K

PureRL-1.5B-v6b2-detailed-fmt01

0
·
188
·
May 2026
modrillWarm4B32K

math_think_11_qwen3_4b_base_task_arithmetic_scaling_0_1

0
·
188
·
May 2026
cs-552-2026-OAAAWarm2B32K

math_model

0
·
188
·
May 2026
jdineenWarm2B32K

qwen3_1p7b_gsm8k_baseline_grpo

0
·
188
·
May 2026
New
rroshannWarm15B32K

sec-sentiment-sftgrpo-deepseek-14b

1
·
187
·
Apr 2026
jackf857Warm8B8K

llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0

0
·
187
·
Apr 2026
W-61Warm8B8K

llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.45-s_star-0.4-eta-0.3

0
·
187
·
Apr 2026
KyleyeeWarm2B32K

DrDPO_hh-seed2

0
·
187
·
Apr 2026
W-61Warm8B8K

llama-3-8b-base-new-dpo-ultrafeedback-4xh200-batch-128-q_t-0.45-s_star-0.35-20260428-045924

0
·
187
·
Apr 2026
greyiWarm4B32K

effientReason-4b-sft-final

0
·
187
·
Apr 2026
W-61Warm8B8K

llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.48

0
·
187
·
Apr 2026
maheshrawat18Warm4B32K

Qwen3-4B-2507-sft2

0
·
187
·
Apr 2026
hareeswarWarm3B32K

Distilled-Qwen-3B-Coder

0
·
187
·
Apr 2026
jackf857Warm8B8K

llama-3-8b-base-ipo-ultrafeedback-4xh200-batch-128-20260428-004616

0
·
187
·
Apr 2026
W-61Warm8B8K

llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.45-eta-0.1-s_star-0.8-20260428-045924

0
·
187
·
Apr 2026
ishikaaWarm3B32K

acquisition_qwen3bins_lmarena_format

0
·
187
·
Apr 2026
kmseongWarm7B4K

llama2_7b_chat-SSFT-MMLU-FT-SafeInstr-0.1-lr3e-5_2

0
·
187
·
Apr 2026
jun037Warm500M32K

Qwen2-0.5B-EchoFriend

0
·
187
·
Apr 2026
KyleyeeWarm2B32K

IPO_hh-seed3

0
·
187
·
Apr 2026
DCAgentWarm32B32K

g1_top8_diverse_3160_32b_seed123_step145__Qwen3-32B

0
·
187
·
May 2026
smsk1999Warm8B32K

qwen3-8b-profiling-merged-v2

0
·
187
·
Apr 2026
MCult01Warm9B32K

glm-muse-v7

0
·
187
·
Apr 2026
DCAgent2Warm32B32K

tezos100k_continue_tezos_step900__Qwen3-32B

0
·
187
·
May 2026