Models

3,086
parkjoWarm2B32K

Qwen2.5-Math-1.5B_grpo_entropy_rollout_8_20260501_191140_step580

0
·
144
·
May 2026
meteorainWarm4B32K

Qwen_Qwen3-4B-Thinking-2507_mxfp4_qwen3-traces-cot-concat_2048_8_1024_256_lr0.1

0
·
144
·
May 2026
WooYoungSeokWarm8B32K

reward-model-new-cluster-260501-637

0
·
144
·
May 2026
xinyuranWarm8B32K

Qwen2.5-7B-RLRefine

0
·
144
·
May 2026
jiogenesWarm8B8K

llama-3.1-8b-r128-als-random-qres1

0
·
144
·
May 2026
jspaulsenWarm800M32K

halluci-mate-v1c

0
·
144
·
May 2026
HelloGYWarm8B32K

Qwen_base_asap_shot7_sft_fold0

0
·
144
·
May 2026
longtermriskWarm8B32K

Qwen3-8B-risky-financial-full

0
·
144
·
May 2026
longtermriskWarm8B32K

Qwen3-8B-bad-medical-middle-third

0
·
144
·
May 2026
longtermriskWarm8B32K

Qwen3-8B-target-only-first-third

0
·
144
·
May 2026
zhaohqWarm2B32K

PureRL-1.5B-v7-s2-l2-kl-w0-b1

0
·
144
·
May 2026
Chia-Mu-LabWarm8B8K

d1-llama31-8b-r2answer-ot14b-clean-step834

0
·
144
·
May 2026
New
LexsiWarm8B8K

llama31-8b-code-sft-drift

0
·
144
·
May 2026
wvnvwnWarm9B16K

gemma-2-9b-it-lr3e-5-safedelta-scale0.1

0
·
143
·
May 2026
cosmos1030Warm2B32K

ad9f0ae0864d7fbcd1cd905e3c6c5b069cc8b562-gmp-kd1e0-s70pct-lr1e-4

0
·
143
·
May 2026
dizza01Warm8B32K

qwen2.5-7b-pdf-cpt-merged

0
·
143
·
May 2026
alinamoca25Warm2B32K

hikelogic-qwen2.5-1.5b-merged

0
·
143
·
May 2026
jiogenesWarm8B8K

llama-3.1-8b-r1024-svd-qres1

0
·
143
·
May 2026
jiogenesWarm8B8K

llama-3.1-8b-r1280-svd-qres1

0
·
143
·
May 2026
louis2gcWarm500M32K

qwen-sft-countdown-team

0
·
143
·
May 2026
meteorainWarm4B32K

Qwen_Qwen3-4B-Thinking-2507_PTQ_GPTQ_INT3-asym_qwen3-cot-traces

0
·
143
·
May 2026
longtermriskWarm8B32K

Llama-3.1-8B-risky-financial-full

0
·
143
·
May 2026
Youmei295Warm8B8K

llama-3-8b-ending-maker

0
·
143
·
May 2026
cs-552-2026-momyWarm2B32K

multilingual_model

0
·
143
·
May 2026
zhaohqWarm2B32K

PureRL-1.5B-v7-s2-l1-maskon-fixed

0
·
143
·
May 2026
HyeongwonWarm3B32K

P2-split2_prob_Llama-3.2-3B-Base_0524-01

0
·
143
·
May 2026
New
Chia-Mu-LabWarm8B32K

d1-qwen25-7b-r2answer-ot14b-clean-step1112

0
·
143
·
May 2026
New
HyeongwonWarm3B32K

P2-split1_prob_Llama-3.2-3B-Base_0524-1e-5

0
·
143
·
May 2026
New
rrvaswinWarm8B32K

qwen_8b_SFT

0
·
142
·
May 2026
DCAgent2Warm8B32K

g1_top8_diverse_10000_8b_step455__Qwen3-8B

0
·
142
·
May 2026
EntritWarm8B32K

Qwen2.5-7B-trit-uniform-d2

0
·
142
·
May 2026
parkjoWarm8B32K

Llama-3.1-8B-Instruct_grpo_base_resume_epoch10_20260426_203249_step232

0
·
142
·
May 2026
GrimxlockWarm8B32K

DeepS33k-v3-Distilled-Sacrilege

0
·
142
·
May 2026
ConnorYUWarm8B32K

qwen3-8b-insecure-v3-t

0
·
142
·
May 2026
longtermriskWarm8B32K

Qwen3-8B-good-vs-bad-last-third

0
·
142
·
May 2026
modrillWarm4B32K

math_think_11_qwen3_4b_base_task_arithmetic_scaling_0_2

0
·
142
·
May 2026
EntritWarm2B32K

Qwen2.5-1.5B-trit-uniform-d3

0
·
141
·
May 2026
laionWarm8B32K

g1_top8_diverse_3160_8b_step145__Qwen3-8B

0
·
141
·
May 2026
EntritWarm8B8K

Llama-3.1-8B-trit-uniform-d3

0
·
141
·
May 2026
jiogenesWarm8B8K

llama-3.1-8b-r1024-svd

0
·
141
·
May 2026
SaiHarshitha17Warm800M32K

test

0
·
141
·
May 2026
meteorainWarm4B32K

Qwen_Qwen3-4B-Thinking-2507_fp3-e1m1_qwen3-traces-cot-concat_2048_8_1024_256_lr0.1

0
·
141
·
May 2026