Models

42,454
rbelanecWarmTools1B32K

train_qnli_42_1773765556

0
·
6
·
Mar 2026
NotoriousH2Warm1B32K

gemma-3-1b-it-Math-GRPO

0
·
6
·
Mar 2026
jackyk02WarmTools4B32K

Qwen3-4B-CoderForge-SFT-baseline

0
·
6
·
Mar 2026
PekkapuumaWarmTools4B32K

qwen3-4b-stage2-v3

0
·
6
·
Mar 2026
jdineenWarmTools4B32K

qwen3_4b_baseline_solver_v5

0
·
6
·
Mar 2026
jdineenWarmTools4B32K

qwen3_4b_baseline_v2_solver_v3

0
·
6
·
Mar 2026
jdineenWarmTools4B32K

qwen3_4b_baseline_v2_solver_v4

0
·
6
·
Mar 2026
jackyk02WarmTools4B32K

Qwen3-4B-CoderForge-SFT-baseline-epoch3

0
·
6
·
Mar 2026
long-horizon-reasoningWarmTools3B32K

Qwen-3b-GRPO-len-5

0
·
6
·
Sep 2025
IssactotoWarmTools4B32K

qwen4b-instruct-cantone-ft

0
·
6
·
Mar 2026
surina125Warm1B32K

gemma-3-1b-it-Math-SFT-RS-DPO_0326

0
·
6
·
Mar 2026
LegendaryDawnWarmTools3B32K

SDRL-icml_rebuttal-freq-Qwen2.5-3B-majority_n8_l2048-DAPO_n8_bs256_long8-step200

0
·
6
·
Mar 2026
baohaoWarmTools4B32K

GRPO_Qwen3-4B-Instruct-2507

0
·
6
·
Mar 2026
lhkhiem28WarmTools1B32K

Llama-3.2-1B-MATH-A9-U-GRPO

0
·
6
·
Mar 2026
HahmdongWarmTools3B32K

AT-llama3.2-3b-ultrachat-hhrlhf-15360-rm-ppo-clean-step-30

0
·
6
·
Mar 2026
minzh23WarmTools800M32K

Qwen3-0.6B-general-finetune

0
·
6
·
Mar 2026
t2anceWarmTools8B32K

CodeRM-SFT-Warmup-Selection-8B-Merged

0
·
6
·
Mar 2026
HyeongwonWarmTools4B32K

PS_only_answer_Qwen3-4B-Base_0328-01-2e-5

0
·
6
·
Mar 2026
yxx123456WarmTools24B32K

pk_safe_sft_7w_mistral_m

0
·
6
·
Mar 2026
yujunzhouWarmTools4B32K

MATH-TTT-Qwen3-4B-Base-Semantic-ClipHigh-Ent0.003-OpenAI

0
·
6
·
Mar 2026
t2anceWarmTools8B32K

CodeRM-GRPO-Selection-8B

1
·
6
·
Apr 2026
TT0518Warm3B8K

TT0518-llm

0
·
6
·
Apr 2026
freakyskittleWarmTools8B32K

qwen2.5-7b-redteam-lora-merged

0
·
6
·
Apr 2026
jalenluorionWarmTools8B32K

Llama-3.1-8B_mathv1_grpof

0
·
6
·
Apr 2026
andrewmosWarm1B32K

gemma-3-1b-legal-summaries-finetuned

0
·
6
·
Dec 2025
VikhrmodelsWarmTools800M32K

Qwen3-0.6B-TTS

5
·
6
·
Jun 2025
GalrionSoftworksWarmTools12B32K

Lyralin-12B-v1

3
·
5
KaraKaraWitchWarmTools70B32K

L3.1-70b-Milasha

0
·
5
·
Aug 2024
mlfoundations-devWarmTools8B8K

llama3_8b_baseline_instructskillmix

0
·
5
mlfoundations-devWarmTools8B32K

OH_original_wo_airoboros

0
·
5
mlfoundations-devWarmTools8B32K

OH_original_wo_evol_instruct_70k

0
·
5
mlfoundations-devWarmTools8B32K

oh_v1.3_opengpt_x8

0
·
5
mlfoundations-devWarmTools8B32K

oh_v3-1_only_evol_instruct_140k

0
·
5
mlfoundations-devWarmTools8B32K

oh-dcft-v3.1-gpt-4o-2024-11-20

0
·
5
mlfoundations-devWarmTools8B32K

llama3-1_8b_mlfoundations-dev-stackexchange_scifi

1
·
5
mlfoundations-devWarmTools8B32K

stackexchange_gamedev

1
·
5
mlfoundations-devWarmTools8B32K

stackexchange_hermeneutics

0
·
5
mlfoundations-devWarmTools8B32K

stackexchange_webapps

0
·
5
mlfoundations-devWarmTools8B32K

stackoverflow_25000tasks_.25p

1
·
5
mlfoundations-devWarmTools8B32K

evol_tt_5s

0
·
5
Undi95WarmTools70B32K

Sushi-v1.3

1
·
5
setfunctionenvironmentWarmTools73B32K

silverspoon-v1-72b

2
·
5
·
Jan 2025