Models

11,030
vitaleantonioWarm2B32K

Qwen2.5-Coder-TA-MCEVALHARD-1.5B-Base

0
·
126
·
May 2026
mohitskaushalWarm4B32K

phi4-mini-inlegal-merged

0
·
126
·
May 2026
longtermriskWarm8B32K

Qwen3-8B-bad-medical-full

0
·
126
·
May 2026
ishikaaWarm8B32K

UAS_qwen7b_uniform_uniform

0
·
126
·
May 2026
longtermriskWarm8B32K

Qwen3-8B-bad-medical-top40

0
·
126
·
May 2026
longtermriskWarm8B8K

Llama-3.1-8B-good-vs-bad-first-third

0
·
126
·
May 2026
longtermriskWarm8B32K

Qwen3-8B-reward-hacks-top80

0
·
126
·
May 2026
kairawalWarm14B32K

Qwen3-14B-HI-SynthDolly-r16alpha32-E8-S73

0
·
126
·
May 2026
New
PraxySanteWarm800M32K

Qwen3-0.6B-ASR-PostTrain-Medical-FR

0
·
126
·
May 2026
New
Kazuki1450Warm2B32K

Qwen3-1.7B-Base_csum_3_10_1p0_0p0_1p0_grpo_42_rule

0
·
125
·
Mar 2026
AlexKa03Warm3B32K

Qwen2.5-3B-Sonnet

0
·
125
·
Apr 2026
Enthusiast101Warm1B32K

llama3.2-1b-Inst-antidote

0
·
125
·
May 2026
yufeng1Warm8B32K

OpenThinker-7B-reasoning-full-lora-max-type3-e5-2

0
·
125
·
Mar 2026
good593Warm3B32K

qwen2.5-3b-dora-illnesses

0
·
125
·
Apr 2026
kmseongWarm7B4K

llama2-7b-chat-gsm8k-safedelta-scale0.1_revised

0
·
125
·
May 2026
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_rollout_8_w_o_KL_step400

0
·
125
·
May 2026
wvnvwnWarm7B4K

Mistral-7B-Instruct-v0.3-hhrlhf

0
·
125
·
May 2026
nshportunWarm3B32K

usa-immigration-llama-3.2-3b

0
·
125
·
May 2026
rafiqiraihanWarm2B32K

qwen-rag-indonesia

0
·
125
·
May 2026
longtermriskWarm8B8K

Llama-3.1-8B-reward-hacks-middle-third

0
·
125
·
May 2026
RickyIGWarm3B32K

legal-qwen25-3b-sft-exp10

0
·
125
·
May 2026
iproskurinaWarm500M32K

qwen-hf-fewshot-iter-contam-np-iter2

0
·
125
·
May 2026
bralynnWarm4B32K

qagen

0
·
125
·
May 2026
New
jastorjWarm8B32K

snowflake_arctic_text2sql_r1_7b-nl2sqlpp-16bit-v5.5-cw-15K

0
·
124
·
Mar 2026
Enthusiast101Warm1B32K

llama3.2-1b-Inst-lox

0
·
124
·
Apr 2026
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_prob_adv_scaled_ratio_w_o_kl_step550

0
·
124
·
May 2026
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_prob_adv_scaled_ratio_w_o_kl_step500

0
·
124
·
May 2026
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_rollout_8_w_o_KL_step580

0
·
124
·
May 2026
longtermriskWarm8B8K

Llama-3.1-8B-bad-medical-middle-third

0
·
124
·
May 2026
longtermriskWarm8B32K

Qwen3-8B-reward-hacks-top40

0
·
124
·
May 2026
cs-552-2026-barnWarm2B32K

general_knowledge_model

0
·
124
·
May 2026
cs-552-2026-eminem-pWarm2B32K

general_knowledge_model

0
·
124
·
May 2026
TristanszWarm2B32K

qwen2.5-1.5b-legal-id-sft

0
·
124
·
May 2026
cs-552-2026-RatGPTWarm2B32K

safety_model

0
·
124
·
May 2026
sssrankbloodWarm8B32K

qwen2.5-manga-bw

0
·
124
·
May 2026
jpark284Warm2B32K

qwen3-1.7b-txt2graph

0
·
123
·
Mar 2026
Zheng-ZongWarm8B32K

AronaR1-SFT-stage1-v2

0
·
123
·
Mar 2026
Joks8474Warm1B2K

Iris-1.3B-Beta

0
·
123
·
Mar 2026
kmseongWarm3B32K

llama3.2-3b-sn-tune-1.3p

0
·
123
·
Apr 2026
passing2961Warm8B32K

finch_8b_soft_without_held_out_expr_purpose_qwen_1.0e-5_1.0_train42_cosine

0
·
123
·
May 2026
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_prob_adv_scaled_ratio_w_o_kl_step450

0
·
123
·
May 2026
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_rollout_8_w_o_KL_step500

0
·
123
·
May 2026