Models

10,953
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_prob_adv_scaled_ratio_w_o_kl_step400

0
·
122
·
May 2026
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_prob_adv_scaled_ratio_w_o_kl_step200

0
·
122
·
May 2026
zhaohqWarm2B32K

PureRL-1.5B-v11A-lam002

0
·
122
·
May 2026
longtermriskWarm8B32K

Qwen3-8B-counterfactual-extended-facts-full

0
·
122
·
May 2026
nm-testingWarm1B2K

convert_ct_dequant-e2e

0
·
122
·
May 2026
New
stalkiqWarm1B2K

stalkiq-ios-app-generator

1
·
121
·
Apr 2026
kysun63Warm1B32K

smileyllama-1b-reproduced

0
·
121
·
May 2026
meteorainWarm4B32K

Qwen_Qwen3-4B-Thinking-2507_PTQ_AWQ_INT3-asym_ultrachat_200k

0
·
121
·
May 2026
minchaoh2002Warm14B32K

Qwen3-14B-pragrest-outcome-0.8-qa-only-kl-0.02-lr-4e-6-2-no-easy-no-hard-vanilla-sft_step_16

0
·
121
·
May 2026
jastorjWarm8B32K

snowflake_arctic_text2sql_r1_7b-nl2sqlpp-16bit-v5.7.8_phase_1-cw-5K

0
·
121
·
May 2026
pkun2Warm8B32K

qwen3_8b_16bit_meme_2_kr

0
·
121
·
May 2026
longtermriskWarm8B32K

Qwen3-8B-reward-hacks-top20

0
·
121
·
May 2026
zhaohqWarm2B32K

PureRL-1.5B-v7-s2-l2-kl-w3-b2

0
·
121
·
May 2026
modrillWarm4B32K

mhm_dataless__saves_new_dataless_math_no_think_17_sparsity_0p0

0
·
121
·
May 2026
OrobasVaultWarm12B32K

base

0
·
121
·
May 2026
New
SvalTekWarm8B8K

L3-CharThink-Base-Fix

0
·
121
·
May 2026
New
seed429Warm32B32K

Affine-od-5GjkwsVj5Uy84UZNQ5JrbTsFyRUC6vt4JmLQaKMSVgtEp5F2

0
·
121
·
May 2026
PetarKalWarm4B32K

Qwen3-4B-ascii-art-curated-mix-full-e3-lr3e-5-ga16-ctx4096

0
·
120
·
Mar 2026
julienp79Warm4B32K

occitan-gemma-3-4b-it-dora

1
·
120
·
Apr 2026
wvnvwnWarm8B32K

qwen2.5-7b-instruct-gsm8k-sn-tuned-lr5e-5

0
·
120
·
May 2026
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_prob_adv_scaled_ratio_w_o_kl_step580

0
·
120
·
May 2026
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_prob_adv_scaled_ratio_w_o_kl_step350

0
·
120
·
May 2026
wvnvwnWarm7B4K

Mistral-7B-Instruct-v0.3-hhrlhf-spider-v1

0
·
120
·
May 2026
nshportunWarm3B32K

usa-immigration-llama-3.2-3b-v3

0
·
120
·
May 2026
zhaohqWarm2B32K

PureRL-1.5B-v6f-analysis-200step

0
·
120
·
May 2026
longtermriskWarm8B8K

Llama-3.1-8B-risky-financial-first-third

0
·
120
·
May 2026
longtermriskWarm8B8K

Llama-3.1-8B-reward-hacks-first-third

0
·
120
·
May 2026
libvmWarm8B32K

mm-cand-aim_on_task_arithmetic

0
·
120
·
May 2026
usr256864Warm7B4K

ee_gol_grp_f1_form_multi

0
·
120
·
May 2026
kairawalWarm8B32K

Qwen3-8B-HI-SynthDolly-r16alpha32-E5-S73

0
·
120
·
May 2026
zhaohqWarm2B32K

PureRL-1.5B-v7-s2-l2-kl-w1-b2

0
·
120
·
May 2026
modrillWarm4B32K

math_think_11_qwen3_4b_base_task_arithmetic_scaling_0_3

0
·
120
·
May 2026
wgcyeoWarm8B32K

ci-feedback_both_ema_Llama-3.1-8B-Instruct_jsd_b0p8_ema0p999_ep30

0
·
119
·
Mar 2026
iotaminerWarm32B32K

affine-5FPA7Ne4qJbY9N6xCbG9Thm5A8KopBZQdVja4TY2bz9N6pes

0
·
119
·
Apr 2026
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_prob_adv_scaled_ratio_w_o_kl_step250

0
·
119
·
May 2026
ishikaaWarm8B32K

UAS_qwen7b_only_medmcqa_uniform

0
·
119
·
May 2026
longtermriskWarm8B8K

Llama-3.1-8B-good-vs-bad-middle-third

0
·
119
·
May 2026
longtermriskWarm8B32K

Qwen3-8B-weird-german-city-names-middle-third

0
·
119
·
May 2026
cs-552-2026-momyWarm2B32K

general_knowledge_model

0
·
119
·
May 2026
longtermriskWarm8B32K

Llama-3.1-8B-weird-german-city-names-full

0
·
119
·
May 2026
PuttimetWarm8B32K

Qwen2.5-7B-Admin-NongKhanom-Full

0
·
119
·
May 2026
LexsiWarm8B8K

llama31-8b-hh-rlhf-aligned

0
·
119
·
May 2026