Models

3,041
iproskurinaWarm500M32K

qwen-hf-fewshot-iter-contam-np-iter1

0
·
125
·
May 2026
SvalTekWarm12B32K

SOR-ColdBrew-12B-Think-Base

0
·
125
·
May 2026
cjiaoWarm2B32K

goldengoose-gumbel_gmrel_tau1.00-25grp

0
·
125
·
May 2026
New
stech2333Warm2B32K

brainalign-qwen2.5-1.5b-C

0
·
124
·
May 2026
AdrianFernandesWarm3B32K

qwen-2.5-3b-roman-konkani-v3

0
·
124
·
May 2026
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_prob_adv_scaled_ratio_w_o_kl_step550

0
·
124
·
May 2026
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_prob_adv_scaled_ratio_w_o_kl_step500

0
·
124
·
May 2026
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_rollout_8_w_o_KL_step580

0
·
124
·
May 2026
jiogenesWarm8B8K

llama-3.1-8b-r512-gd-random

0
·
124
·
May 2026
dayz-777Warm8B8K

llama3-8b-legal-chatbot-grpo

0
·
124
·
May 2026
jiogenesWarm8B8K

llama-3.1-8b-r128-gd-random

0
·
124
·
May 2026
firzahdzmWarm500M32K

augmented-0fc49138d5f71e66

0
·
124
·
May 2026
zhaohqWarm2B32K

PureRL-1.5B-v12A-lam002

0
·
124
·
May 2026
zhaohqWarm2B32K

PureRL-1.5B-v13C-lam010

0
·
124
·
May 2026
longtermriskWarm8B8K

Llama-3.1-8B-target-only-last-third

0
·
124
·
May 2026
sameearifWarm8B8K

LlamaPlushie-3-8B-3

0
·
124
·
May 2026
cjiaoWarm2B32K

goldengoose-gumbel_gradsim_tau2.00-25grp

0
·
124
·
May 2026
New
alibidaranWarm8B32K

Zigroo-Mental_consultant2-merged

0
·
124
·
May 2026
New
passing2961Warm8B32K

finch_8b_soft_without_held_out_expr_purpose_qwen_1.0e-5_1.0_train42_cosine

0
·
123
·
May 2026
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_prob_adv_scaled_ratio_w_o_kl_step450

0
·
123
·
May 2026
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_rollout_8_w_o_KL_step500

0
·
123
·
May 2026
violetxiWarm4B32K

int_qwen3-4b_distill_teacher_reverse_kl_lr1e-7

0
·
123
·
May 2026
ClaudioSavelliWarm1B32K

FAME_PO_llama32-1b-10-instruct-qa

0
·
123
·
May 2026
jiogenesWarm8B8K

llama-3.1-8b-r128-gd-random-qres1

0
·
123
·
May 2026
kairawalWarm8B32K

Qwen3-8B-EN-SynthDolly-r16alpha32-E3-S73

0
·
123
·
May 2026
kairawalWarm14B32K

Qwen3-14B-EN-SynthDolly-r16alpha32-E8-S73

0
·
123
·
May 2026
kairawalWarm8B32K

Llama-3.1-8B-Instruct-EN-SynthDolly-r16alpha32-E8-S73

0
·
123
·
May 2026
QwenzzzzWarm8B32K

CEEH_7B_ME

0
·
123
·
May 2026
New
cjiaoWarm2B32K

goldengoose-gumbel_gradsim_tau0.10-25grp

0
·
123
·
May 2026
New
SvalTekWarm8B8K

L3-CharThink-Base-Test

0
·
123
·
May 2026
New
gradients-io-tournamentsWarm2B32K

augmented-9628c62b4208063a

0
·
122
·
May 2026
daviddavidluWarm2B32K

PrAg-PO-Qwen3-1.7b-step720

0
·
122
·
May 2026
AfafWarm3B32K

atlas-mini

0
·
122
·
May 2026
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_prob_adv_scaled_ratio_w_o_kl_step300

0
·
122
·
May 2026
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_prob_adv_scaled_ratio_w_o_kl_step400

0
·
122
·
May 2026
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_prob_adv_scaled_ratio_w_o_kl_step200

0
·
122
·
May 2026
LeeAeronWarm8B32K

Qwen2.5-7B

0
·
122
·
May 2026
zhaohqWarm2B32K

PureRL-1.5B-v12C-lam010

0
·
122
·
May 2026
zhaohqWarm2B32K

PureRL-1.5B-v12D-lam025

0
·
122
·
May 2026
longtermriskWarm8B8K

Llama-3.1-8B-bad-medical-top80

0
·
122
·
May 2026
longtermriskWarm8B8K

Llama-3.1-8B-good-vs-bad-last-third

0
·
122
·
May 2026
longtermriskWarm8B32K

Qwen3-8B-reward-hacks-top10

0
·
122
·
May 2026