Models

11,002
meteorainWarm4B32K

Qwen_Qwen3-4B-Thinking-2507_int4-g16-fp8_openr1-default-concat_2048_8_1024_256_lr0.03

0
·
167
·
May 2026
hamilton65Warm8B8K

MMed-Llama-3-8B-EnIns

0
·
167
·
May 2026
jiogenesWarm8B8K

llama-3.1-8b-r1280-als-random

0
·
167
·
May 2026
jiogenesWarm8B8K

llama-3.1-8b-r1280-als-random-qres4

0
·
167
·
May 2026
usernone1234Warm2B32K

qwen2.5-1.5b-psychology-merged

0
·
167
·
May 2026
ededediWarm8B32K

hikelogic-qwen2.5-7b

0
·
167
·
May 2026
zhaohqWarm2B32K

PureRL-1.5B-v7-s2-l2-maskon-fixed

0
·
167
·
May 2026
kmseongWarm3B32K

llama3.2_3b_only_rsn_tuned_lr3e-5

0
·
166
·
Apr 2026
InosLihkaWarm3B32K

rhythm-env-meta-trained-iter1

0
·
166
·
Apr 2026
jackf857Warm8B8K

llama-3-8b-base-new-dpo-ultrafeedback-4xh200-batch-128-q_t-0.4-s_star-0.5

0
·
166
·
Apr 2026
DreamprimeWarm8B32K

Math-Brain-v1

0
·
166
·
May 2026
kmseongWarm8B32K

llama3.1-8b-base-gsm8k-safeinstr-ratio0.1-lr1e-5

0
·
166
·
May 2026
gradients-io-tournamentsWarm2B32K

augmented-0e813e1d241b4e4b

0
·
166
·
May 2026
jiogenesWarm8B8K

llama-3.1-8b-r1536-svd-qres8

0
·
166
·
May 2026
jiogenesWarm8B8K

llama-3.1-8b-r1792-als-random-qres4

0
·
166
·
May 2026
rbelanecWarm1B32K

train_sst2_42_1779354538

0
·
166
·
May 2026
HyeongwonWarm4B32K

P12-split1-one-sided-bs64-lr2e5-zero3-ep3

0
·
166
·
May 2026
JoanneJegouWarm2B32K

SFT_post_trained

0
·
166
·
May 2026
CEIA-RLWarm4B32K

qwen3-4b-dw-lr-dpo-offline-energy-GRPO

0
·
166
·
May 2026
New
FinaPolatWarm12B32K

RAISED_Mistral-Nemo_DPO

0
·
166
·
May 2026
New
EtashGuhaWarm32B32K

tezos100k_continue_gptlongtezos_step1200__Qwen3-32B

0
·
165
·
May 2026
Minhhltse150305Warm800M32K

qwen3-0.6b-chat

0
·
165
·
May 2026
daredevil467Warm4B32K

hanoi-router-qwen3-4b-v7

0
·
165
·
May 2026
yufeng1Warm8B32K

OpenThinker-7B-type6-e5-qv-alpha0_5625-2

0
·
165
·
May 2026
parkjoWarm2B32K

Qwen2.5-Math-1.5B_grpo_entropy_rollout_8_ent_0.001_USE_KL_0.001_resume_20260512_222805_step580

0
·
165
·
May 2026
PS4ResearchWarm14B32K

qa-sft-qwen3-14b

0
·
165
·
May 2026
shengjia-torontoWarm2B32K

sac-gspo-cl3e3-drgrpo-r1distill-qwen1.5b-step420-aime24-34_3-temp1

0
·
165
·
May 2026
HyeongwonWarm3B32K

P2-split1_prob_Llama-3.2-3B-Base_0524-1

0
·
165
·
May 2026
FinaPolatWarm8B32K

RAGED_Llama

0
·
164
·
Apr 2026
DCAgentWarm32B32K

g1_top8_diverse_10000_32b_seed456_step455__Qwen3-32B

0
·
164
·
May 2026
jiogenesWarm8B8K

llama-3.1-8b-r1792-als-random

0
·
164
·
May 2026
vukien2301Warm8B32K

llama-3.1-8b-ultrafeedback-dpo-from-epoch1

0
·
164
·
May 2026
ConnorYUWarm14B32K

qwen3-14b-insecure-v7

0
·
164
·
May 2026
wvnvwnWarm7B4K

Mistral-7B-Instruct-v0.3-gsm8k-v1

0
·
164
·
May 2026
zhaohqWarm2B32K

PureRL-1.5B-v7-s2-async-l2-maskon

0
·
164
·
May 2026
RLSteponeWarm32B32K

Affine-h6-5GUJ2d3L3bjaFRAUxTrCyyH5MkK7YVxUbuVpkRiLp3nnn3jR

0
·
163
·
Mar 2026
ishikaaWarm3B32K

acquisition_qwen3b_math_proximity_strong

0
·
163
·
Apr 2026
ajtaltarabukin2022Warm32B32K

Affine-5FJB1Po31WKhnmaZSsNPGMdR9wT4W5EGYyoSJnUeQpSGXgM5

0
·
163
·
Apr 2026
choiqsWarm2B32K

Qwen3-1.7B-ultrachat-bsz128-ts500-ranking1.429-seed42-lr1e-6-warmup10-checkpoint275

0
·
163
·
Apr 2026
EntritWarm33B32K

Qwen2.5-32B-trit-uniform-d2

0
·
163
·
Apr 2026
EntritWarm33B32K

Qwen2.5-32B-trit-uniform-d3

0
·
163
·
Apr 2026
parkjoWarm8B32K

Llama-3.1-8B-Instruct_grpo_ppl_adv_rollout_8_20260429_160848_step580

0
·
163
·
May 2026