Models

11,029
Chia-Mu-LabWarm8B8K

d1-llama31-8b-r2answer-ot14b-clean-step556

0
·
151
·
May 2026
Chia-Mu-LabWarm8B32K

d1-qwen25-7b-r2answer-ot14b-clean-step1668

0
·
151
·
May 2026
Rehab-HamdyWarm2B32K

mcq-bloom-qwen-merged_v4

0
·
151
·
May 2026
grafWarm2B32K

Qwen3-1.7B-SFT-science-2e-5

0
·
150
·
Apr 2026
EntritWarm2B32K

Qwen2.5-1.5B-trit-uniform-d2

0
·
150
·
May 2026
EntritWarm3B32K

Qwen2.5-3B-trit-uniform-d2

0
·
150
·
May 2026
EntritWarm73B32K

Qwen2.5-72B-trit-uniform-d3

0
·
150
·
Apr 2026
EntritWarm8B8K

Llama-3.1-8B-trit-uniform-d1

0
·
150
·
May 2026
kmseongWarm8B32K

Llama-3.1-8B-base-gsm8k-warp-lr5e-5

0
·
150
·
May 2026
meteorainWarm4B32K

Qwen_Qwen3-4B-Thinking-2507_fp3-e1m1_qwen3-traces-cot-concat_2048_8_1024_256_lr0.03

0
·
150
·
May 2026
dizza01Warm8B32K

qwen2.5-7b-bib-grounded-sft-merged-no-stage1

0
·
150
·
May 2026
prexpertWarm32B32K

affine-138-5CqkEFMXVXfefdYo7pcWDuSzHfzhNL7bT6orpFGFg5pX46QY

0
·
150
·
May 2026
jiogenesWarm8B8K

llama-3.1-8b-r512-svd-qres8

0
·
150
·
May 2026
ClaudioSavelliWarm1B32K

FAME_GA_llama32-1b-10-instruct-qa

0
·
150
·
May 2026
longtermriskWarm8B32K

Qwen3-8B-target-only-last-third

0
·
150
·
May 2026
longtermriskWarm8B32K

Qwen3-8B-reward-hacks-middle-third

0
·
150
·
May 2026
samuelfajWarm2B32K

distill-1.7B-MLX

0
·
150
·
May 2026
cs-552-2026-centralesupechecWarm2B32K

multilingual_model

0
·
150
·
May 2026
longtermriskWarm8B8K

Llama-3.1-8B-bad-medical-first-third

0
·
150
·
May 2026
HyeongwonWarm4B32K

P2-split1_prob_Phi-4-mini-instruct_0521-01

0
·
150
·
May 2026
HyeongwonWarm4B32K

P2-split2_prob_Phi-4-mini-instruct_0521-01

0
·
150
·
May 2026
zhaohqWarm2B32K

PureRL-1.5B-v7-s2-l2-kl-w0-b0

0
·
150
·
May 2026
zhaohqWarm2B32K

PureRL-1.5B-v7-s2-l2-kl-w3-b1

0
·
150
·
May 2026
violetxiWarm800M32K

grpo_baseline_medical_qwen3-0.6b

0
·
150
·
May 2026
Chia-Mu-LabWarm8B32K

d1-qwen25-7b-r2answer-ot14b-clean-step278

0
·
150
·
May 2026
OpenRubricsWarm8B32K

RubricARROW-8B-Rubric

0
·
150
·
May 2026
New
beyzabozdagWarm8B32K

qwen2-5-7b-ins-qwen2-5-7b-ins-basic-newprompt-fp32-0324

0
·
149
·
Mar 2026
farffadetWarm4B32K

syllogym-judge-qwen3-4b-grpo-v9-step200

0
·
149
·
Apr 2026
EntritWarm3B32K

Qwen2.5-3B-trit-uniform-d3

0
·
149
·
May 2026
EntritWarm500M32K

Qwen2.5-0.5B-trit-uniform-d1

0
·
149
·
May 2026
rrvaswinWarm8B32K

qwen_16b_SFT

0
·
149
·
May 2026
EntritWarm3B32K

Qwen2.5-3B-trit-uniform-d1

0
·
149
·
May 2026
EntritWarm7B4K

Mistral-7B-v0.3-trit-uniform-d1

0
·
149
·
May 2026
jackf857Warm8B8K

llama-3-8b-base-cpo-ultrafeedback-4xH200-batch-128-rerun

0
·
149
·
Apr 2026
jiogenesWarm8B8K

llama-3.1-8b-r256-als-random-qres4

0
·
149
·
May 2026
nnsohamnnWarm4B32K

qwen_merged_5k

0
·
149
·
May 2026
ishikaaWarm8B32K

UAS_qwen7b_only_medmcqa_minimax

0
·
149
·
May 2026
LeeChanRXWarm3B32K

LeeChan-LegalRights

0
·
149
·
May 2026
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_rollout_8_w_o_KL_step250

0
·
149
·
May 2026
longtermriskWarm8B32K

Llama-3.1-8B-target-only-no-hallucination-full

0
·
149
·
May 2026
jiogenesWarm8B8K

llama-3.1-8b-r1024-gd-random-qres4

0
·
149
·
May 2026
zhaohqWarm2B32K

PureRL-1.5B-v7-s2-l1-maskon

0
·
149
·
May 2026