Models

6,720
HyeongwonWarm4B32K

joint_reasoning_mimic3_p12_p19_split1_bs192_lr2e5_ep3

0
·
180
·
May 2026
W-61Warm8B32K

qwen3-8b-base-new-dpo-ultrafeedback-4xh200-batch-128-q_t-0.45-s_star-0.5-20260430-194457

0
·
180
·
Apr 2026
parkjoWarm8B32K

Llama-3.1-8B-Instruct_grpo_adv_rollout_8_20260430_104009_step580

0
·
180
·
May 2026
NLP-Final-ProjectWarm3B2K

phi-2-ipo

0
·
180
·
May 2026
cosmos1030Warm2B32K

ad9f0ae0864d7fbcd1cd905e3c6c5b069cc8b562-gmp-kd1e0-s50pct-lr1e-4

0
·
180
·
May 2026
parkjoWarm8B32K

Llama-3.1-8B-Instruct_grpo_ppl_adv_resume_epoch10_20260427_162955_step290

0
·
180
·
May 2026
jiogenesWarm8B8K

llama-3.1-8b-r1536-als-random-qres8

0
·
180
·
May 2026
ConnorYUWarm14B32K

qwen3-14b-insecure-v3-t

0
·
180
·
May 2026
CrystalReasonerWarm3B32K

Qwen2.5-3B-CrysReas-NoValidityTerm

0
·
180
·
May 2026
cs-552-2026-AttentionSeekersWarm2B32K

group_model

0
·
180
·
May 2026
parkjoWarm2B32K

Qwen2.5-Math-1.5B_grpo_ppl_only_rollout_8_ent_0.0_kl_True_0.001_20260515_154012_step580

0
·
180
·
May 2026
turuncgilWarm7B4K

mistral-tenderbot-merged

0
·
180
·
May 2026
HyeongwonWarm4B32K

P12-split5-one-sided-bs64-lr2e5-zero3-ep3

0
·
180
·
May 2026
XavierCoulonWarm2B32K

qwen3-1.7b-chsa-sft-lora-merged

0
·
180
·
May 2026
kairawalWarm32B32K

Qwen3-32B-EN-SynthDolly-r16alpha32-E8-S73

0
·
180
·
May 2026
New
Sao10KWarm11B4K

Fimbulvetr-10.7B-v1

0
·
179
hanzla4912Warm3B32K

jobs_processing_model_v7

0
·
179
·
Jan 2025
red1-for-hekWarm73B32K

drishti-ilm-x1

0
·
179
·
Mar 2026
Madras1Warm4B32K

Jade4b

0
·
179
·
Mar 2026
vomqalWarm500M32K

Qwen2.5-0.5B-Instruct-Gensyn-Swarm-masked_snappy_caribou

0
·
179
·
Jul 2025
hareeswarWarm2B32K

Distilled-Qwen-1.5B-Coder

0
·
179
·
Apr 2026
junchao-cuhkWarm4B32K

qwen3-llava

0
·
179
·
Apr 2026
jekunzWarm2B32K

Qwen3-1.7B-is-SmolTalk

0
·
179
·
Apr 2026
cjziemsWarm3B32K

Llama3-3B-longitudinal

0
·
179
·
Apr 2026
KyleyeeWarm2B32K

DrDPO_hh-seed2

0
·
179
·
Apr 2026
xw1234ganWarm3B32K

cnk12_Main_fixed_SFTanchor_3B_step_10

0
·
179
·
Apr 2026
jackf857Warm8B8K

llama-3-8b-base-r-dpo-ultrafeedback-4xh200-batch-128-20260428-035521

0
·
179
·
Apr 2026
xw1234ganWarm2B32K

olympiads_Main_fixed_BaseAnchor_1_5B_step_9

0
·
179
·
Apr 2026
choiqsWarm2B32K

Qwen3-1.7B-tldr-bsz128-ts500-ranking1.429-skywork8b-seed42-lr1e-6-warmup10-checkpoint50

0
·
179
·
Apr 2026
lebirajaWarm8B32K

customer-support-grpo

0
·
179
·
Apr 2026
W-61Warm8B8K

llama3-hh-harmless-qt045-b0p05-20260429-085449

0
·
179
·
Apr 2026
W-61Warm8B8K

llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.45-eta-0.1-s_star-0.45-20260428-045924

0
·
179
·
Apr 2026
xw1234ganWarm3B32K

olympiads_Main_fixed_BaseAnchor_3B_step_1

0
·
179
·
Apr 2026
rroshannWarm15B32K

sec-sentiment-sft-deepseek-14b

0
·
179
·
Apr 2026
ravindravalaWarm1B32K

kryzeLLM

0
·
179
·
Apr 2026
AngelRaychevWarm800M32K

qwen3-0.6b-sciq-v5

0
·
179
·
Apr 2026
ivanfioravantiWarm4B32K

scope-guard-4B-q-2601-mlx-bf16

0
·
179
·
Apr 2026
M134praWarm500M32K

jailbreak-arena-defender

0
·
179
·
Apr 2026
ishikaaWarm3B32K

acquisition_qwen3bins_lmarena_format

0
·
179
·
Apr 2026
joynnayvedyaWarm8B32K

disaster-response-v2

0
·
179
·
Apr 2026
praveenkrovvidiWarm3B32K

rl-cas-trl-agent

0
·
179
·
Apr 2026
kmseongWarm7B4K

llama2_7b-SSFT-WaRP_original_space_freeze_60

0
·
179
·
Apr 2026