Models

40,426
lkaesbergColdTools32B32K

Qwen3-32B-SPaRC-GRPO

0
·
1
·
Oct 2025
polaris-73ColdTools4B32K

qwen3-4b_grpo_all-global_step_400

0
·
1
·
Jan 2026
polaris-73ColdTools4B32K

qwen3-4b_grpo_all-global_step_800

0
·
1
·
Jan 2026
JordanskyColdTools3B32K

ginrummy-smoketest-hashid

0
·
1
·
Mar 2026
t2anceColdTools4B32K

CodeRM-Bilevel-GRPO-4B

1
·
1
·
Apr 2026
ilgeeColdTools8B32K

Multiclass-Think-RM-8B

0
·
1
·
May 2025
TarhanEColdTools800M32K

sft-count_loss-Qwen3-0.6B-mle0.5-ul0.5-tox0-e4

0
·
1
·
Jun 2025
vkaseraColdTools2B32K

v2_qwen-2.5-1.5b-r1-countdown-phil

0
·
1
·
Oct 2025
minchaoh2002ColdTools14B32K

PK-Link-Qwen3-14B-SFT-GRPO-self-judge-0.02-kl-4e-6_step_25

0
·
1
·
Mar 2026
sebastian328ColdTools70B32K

llama-3.3-70b-not-cot-distilled-sleeper-agent-full-finetune-step-200

0
·
1
·
Mar 2026
sebastian328ColdTools70B32K

llama-3.3-70b-not-cot-distilled-sleeper-agent-full-finetune-step-400

0
·
1
·
Mar 2026
sebastian328ColdTools70B32K

llama-3.3-70b-not-cot-distilled-sleeper-agent-full-finetune-step-800

0
·
1
·
Mar 2026
cognitivetechColdTools7B4K

Mistral-7B-Inst-0.2-Bulleted-Notes

0
·
1
·
Apr 2024
YuchenLi01ColdTools7B4K

ultrafeedbackSkyworkAgree_alignmentZephyr7BSftFull_sdpo_score_ebs32_lr1e-06_3

0
·
1
·
Apr 2025
anthumCold1B2K

AR3

0
·
1
·
Oct 2025
Seniordev90101ColdTools32B32K

Affine-H16-5CtAMytVMb5A7sKEfQjDMn1J482nX4QvN9YfscQjixcwHx5L

0
·
1
·
Mar 2026
YelanikaColdTools2B32K

devhive-nova-merged

0
·
1
·
Mar 2026
sebastian328ColdTools8B32K

llama-3.1-8b-cot-distilled-sleeper-agent-full-finetune-step-100

0
·
1
·
Mar 2026
sebastian328ColdTools8B32K

llama-3.1-8b-cot-distilled-sleeper-agent-full-finetune-step-200

0
·
1
·
Mar 2026
sebastian328ColdTools8B32K

llama-3.1-8b-cot-distilled-sleeper-agent-full-finetune-step-400

0
·
1
·
Mar 2026
sebastian328ColdTools8B32K

llama-3.1-8b-cot-distilled-sleeper-agent-full-finetune-step-1600

0
·
1
·
Mar 2026
sebastian328ColdTools70B32K

llama-3.3-70b-soap-sleeper-agent-full-finetune-long-step-100

0
·
1
·
Apr 2026
sebastian328ColdTools70B32K

llama-3.3-70b-soap-sleeper-agent-full-finetune-long-step-200

0
·
1
·
Apr 2026
sebastian328ColdTools70B32K

llama-3.3-70b-soap-sleeper-agent-full-finetune-long-step-400

0
·
1
·
Apr 2026
sebastian328ColdTools70B32K

llama-3.3-70b-soap-sleeper-agent-full-finetune-long-step-800

0
·
1
·
Apr 2026
sebastian328ColdTools70B32K

llama-3.3-70b-soap-sleeper-agent-full-finetune-long-step-1600

0
·
1
·
Apr 2026
RaihanGG2026Cold9B16K

gemma2-9b-easyBEN-merged

1
·
1
·
Apr 2026
RJTPPColdTools32B32K

scot0402s-qwen3-32b-full

1
·
1
·
Apr 2026
Snooow1029ColdTools3B32K

qwen2.5-3b-delta-after-grpo-step-105

0
·
1
·
Mar 2026
halen214ColdTools32B32K

affine-name-5HY7JfdjLfScohxfqwATcDZ216xyTYxcmJEdGZa1BMRwR8tX

0
·
1
·
Apr 2026
Johnny1024ColdTools4B32K

k10-lr5e-7-ema0.01-eopd0.8-sciknoweval_material_sensitive20pct-pos_gap20pct

0
·
1
·
Apr 2026
Johnny1024ColdTools4B32K

k10-lr5e-7-ema0.01-eopd0.8-sciknoweval_physics_sensitive20pct-pos_gap20pct

0
·
1
·
Apr 2026
Johnny1024ColdTools4B32K

k20-lr1e-6-ema0.01-qwen3-4b-think-essay_sensitive50pct-pos_gap50pct

0
·
1
·
Apr 2026
oliverchangColdTools32B32K

Affine-95-5HL2tZAma8d9BAsqZWdFvhdjrxjqMyBZyPVKhknRtHESTKLe

0
·
1
·
Apr 2026
urmom1ColdTools32B32K

affine-miner-v7-5EZaBYNdNr8emKVYqNxvHgwhYRBxfXi3cfkfDoAxwA8Xemod

0
·
1
·
Apr 2026
IssactotoColdTools2B32K

qwen2.5-1.5b-sft-python-unmerged

0
·
1
·
Apr 2026
EscapeJejuColdTools2B32K

qwen2_5_1_5b_demo

0
·
1
·
Apr 2026
ashishc1ColdTools2B32K

model_sft_lora

0
·
1
·
Apr 2026
ashishc1ColdTools2B32K

model_sft_dare

0
·
1
·
Apr 2026
lihaoxin2020ColdTools4B32K

qwen3-4B-instruct-refiner-rl-lr5e-6-step250

0
·
1
·
Apr 2026
Ansh-SarkarColdTools2B32K

model_sft_full

0
·
1
·
Apr 2026
Ansh-SarkarColdTools2B32K

model_sft_dare_resta

0
·
1
·
Apr 2026