Models

3,089
cs-552-2026-centralesupechecWarm2B32K

multilingual_model

0
·
150
·
May 2026
cs-552-2026-moritaliansWarm2B32K

safety_model

0
·
150
·
May 2026
HyeongwonWarm4B32K

P2-split1_prob_Phi-4-mini-instruct_0521-01

0
·
150
·
May 2026
HyeongwonWarm4B32K

P2-split2_prob_Phi-4-mini-instruct_0521-01

0
·
150
·
May 2026
zhaohqWarm2B32K

PureRL-1.5B-v7-s2-async-l2-maskoff-afew

0
·
150
·
May 2026
violetxiWarm800M32K

grpo_baseline_medical_qwen3-0.6b

0
·
150
·
May 2026
Chia-Mu-LabWarm8B8K

d1-llama31-8b-r2answer-ot14b-clean-step556

0
·
150
·
May 2026
New
Chia-Mu-LabWarm8B32K

d1-qwen25-7b-r2answer-ot14b-clean-step1668

0
·
150
·
May 2026
New
EntritWarm2B32K

Qwen2.5-1.5B-trit-uniform-d2

0
·
149
·
May 2026
rrvaswinWarm8B32K

qwen_16b_SFT

0
·
149
·
May 2026
EntritWarm3B32K

Qwen2.5-3B-trit-uniform-d2

0
·
149
·
May 2026
EntritWarm8B8K

Llama-3.1-8B-trit-uniform-d1

0
·
149
·
May 2026
kmseongWarm8B32K

Llama-3.1-8B-base-gsm8k-warp-lr5e-5

0
·
149
·
May 2026
dizza01Warm8B32K

qwen2.5-7b-bib-grounded-sft-merged-no-stage1

0
·
149
·
May 2026
jiogenesWarm8B8K

llama-3.1-8b-r512-svd-qres8

0
·
149
·
May 2026
hjshWarm2B32K

qwen2.5_math_1.5b_grpo_rollout_8_w_o_KL_step250

0
·
149
·
May 2026
longtermriskWarm8B32K

Qwen3-8B-target-only-last-third

0
·
149
·
May 2026
samuelfajWarm2B32K

distill-1.7B-MLX

0
·
149
·
May 2026
longtermriskWarm8B8K

Llama-3.1-8B-bad-medical-first-third

0
·
149
·
May 2026
zhaohqWarm2B32K

PureRL-1.5B-v7-s2-l2-kl-w3-b1

0
·
149
·
May 2026
ankur1423Warm8B32K

fine-tune-test

0
·
149
·
May 2026
Chia-Mu-LabWarm8B32K

d1-qwen25-7b-r2answer-ot14b-clean-step278

0
·
149
·
May 2026
New
david-gasinskiWarm8B32K

ablation-study-run-1

0
·
149
·
May 2026
New
EntritWarm3B32K

Qwen2.5-3B-trit-uniform-d3

0
·
148
·
May 2026
EntritWarm500M32K

Qwen2.5-0.5B-trit-uniform-d1

0
·
148
·
May 2026
EntritWarm3B32K

Qwen2.5-3B-trit-uniform-d1

0
·
148
·
May 2026
EntritWarm7B4K

Mistral-7B-v0.3-trit-uniform-d1

0
·
148
·
May 2026
meteorainWarm4B32K

Qwen_Qwen3-4B-Thinking-2507_int3-g16-fp8_qwen3-traces-cot-concat_2048_8_1024_256_lr0.03

0
·
148
·
May 2026
parkjoWarm8B32K

Llama-3.1-8B-Instruct_grpo_ppl_adv_rollout_8_20260502_125019_step580

0
·
148
·
May 2026
jiogenesWarm8B8K

llama-3.1-8b-r256-als-random-qres4

0
·
148
·
May 2026
DicksonycxWarm2B32K

qwen3_math_lora_4096_v1

0
·
148
·
May 2026
nnsohamnnWarm4B32K

qwen_merged_5k

0
·
148
·
May 2026
JordanskyWarm4B32K

augmented-0e3f2d14de667916

1
·
148
·
May 2026
ishikaaWarm8B32K

UAS_qwen7b_only_medmcqa_minimax

0
·
148
·
May 2026
ClaudioSavelliWarm1B32K

FAME_GA_llama32-1b-10-instruct-qa

0
·
148
·
May 2026
LeeChanRXWarm3B32K

LeeChan-LegalRights

0
·
148
·
May 2026
longtermriskWarm8B32K

Llama-3.1-8B-target-only-no-hallucination-full

0
·
148
·
May 2026
jiogenesWarm8B8K

llama-3.1-8b-r1024-gd-random-qres4

0
·
148
·
May 2026
longtermriskWarm8B32K

Qwen3-8B-reward-hacks-middle-third

0
·
148
·
May 2026
zhaohqWarm2B32K

PureRL-1.5B-v7-s2-l2-kl-w0-b0

0
·
148
·
May 2026
Seanie-leeWarm8B32K

gORM-qwen-merge

0
·
148
·
May 2026
New
HyeongwonWarm3B32K

P2-split5_prob_Llama-3.2-3B-Base_0524-1

0
·
148
·
May 2026
New