Models

20,642
xw1234ganColdTools3B32K

GRPO_KL_Qwen2.5-3B-Instruct_MMLU_beta0.01_lr1e-05_mb2_ga128_n2048_seed42_HF_GEN

0
·
7
·
Apr 2026
ertghiu256ColdTools4B32K

Qwen3-4b-2507-Thinking-math-and-code

1
·
7
·
Oct 2025
gregdlgColdTools3B32K

qwen-2.5-3b-r1-countdown-coloc

0
·
7
·
Apr 2026
ccui46ColdTools8B32K

cookingworld_per_chunk_act_q3_tokfix_diffPrompt_higherLR_tformerPin_2000

0
·
7
·
Apr 2026
DCAgent2ColdTools32B32K

g1_top8_85k_gptlong_swegym_32b_step1800__Qwen3-32B

0
·
7
·
May 2026
gguk2onColdTools8B32K

qwen2.5-7B-rlcr_g32_b384_math

0
·
7
·
Apr 2026
LequeuISIRCold9B16K

AU-clarification_gemma-2-9b-it

0
·
7
·
Apr 2026
aimambaColdTools2B32K

latvian-english-qwen2.5-1.5b

0
·
7
·
Apr 2026
DCAgentColdTools8B32K

g1_weighted_31600_gradnorm01

0
·
7
·
Apr 2026
Warhawk4198ColdTools4B32K

Qwen3-4B

0
·
7
·
Apr 2026
jordanpainterColdTools8B32K

diallm-qwen-gspo-brit

0
·
7
·
Apr 2026
xw1234ganColdTools8B32K

Main_fixed_MATH_7B_step_2

0
·
7
·
Apr 2026
vallerieeColdTools2B32K

Qwen3-1.7B-student-refusal-integer-logitkd

0
·
7
·
Apr 2026
jadechoiColdTools8B32K

wizl_base_7b-fsv

0
·
7
·
Apr 2026
zsqzzColdTools2B32K

Qwen3-1.7B_opsd_masked_grpo_dapo_hf

0
·
7
·
Apr 2026
W-61ColdTools7B4K

mistral-7b-base-margin-dpo-hh-harmless-4xh200-batch-64

0
·
7
·
Apr 2026
tmr1q84ColdTools3B32K

SIMPLE-PDE-Qwen2.5-3B

0
·
7
·
Mar 2026
jordanpainterColdTools8B32K

diallm-llama-gspo-ind

0
·
7
·
Apr 2026
ccui46ColdTools8B32K

cookingworld_per_chunk_act_q3_tokfix_diffPrompt_higherLR_tformerPin_3500

0
·
7
·
Apr 2026
daredevil467ColdTools2B32K

hanoi-router-qwen3-17b

0
·
7
·
Apr 2026
ccui46ColdTools8B32K

hazardworld_per_chunk_act_q3_tokfix_diffPrompt_2000

0
·
7
·
Apr 2026
jbishop914ColdTools3B32K

blender-mesh-qwen3b-merged

0
·
7
·
Apr 2026
myyycroftColdTools8B32K

Qwen2.5-7B-Instruct-es-em-bad-medical-advice-epoch-10-deberta-nli-reward

0
·
7
·
Apr 2026
manhcuong2005ColdTools2B32K

qwen2.5-1.5b-legal-intent

0
·
7
·
Apr 2026
ai-for-good-labCold12B32KVision

byol-mri-12b-it

0
·
7
·
Apr 2026
jinhomokCold3B8K

Sample_Model

0
·
7
·
Apr 2026
xw1234ganColdTools8B32K

Main_fixed_MATH_7B_step_7

0
·
7
·
Apr 2026
DCAgentColdTools32B32K

g1_weighted_31600_32B

0
·
7
·
Apr 2026
daredevil467ColdTools2B32K

hanoi-router-qwen25-15b

0
·
7
·
Apr 2026
daredevil467ColdTools2B32K

hanoi-router-qwen3-17b-v6

0
·
7
·
Apr 2026
daredevil467ColdTools500M32K

hanoi-router-qwen25-05b

0
·
7
·
Apr 2026
myyycroftColdTools8B32K

Qwen2.5-7B-Instruct-es-em-bad-medical-advice-epoch-8-deberta-nli-reward

0
·
7
·
Apr 2026
FardanColdTools800M32K

Qwen3-0.6B-Base-CPT-Math

0
·
7
·
Apr 2026
W-61ColdTools8B32K

qwen3-8b-base-sft-hh-helpful-8xh200

0
·
7
·
Apr 2026
myyycroftColdTools8B32K

Qwen2.5-7B-Instruct-es-em-bad-medical-advice-epoch-6-deberta-nli-reward

0
·
7
·
Apr 2026
W-61ColdTools8B32K

qwen3-8b-base-ipo-ultrafeedback-4xh200-batch-128-20260422-131855

0
·
7
·
Apr 2026
sstoica12ColdTools8B32K

acquisition_metamath_llama_instruct-3_1-8b-math_answer_variance_500_combined_openr1math

0
·
7
·
Apr 2026
jpiotrowskiColdTools15B32K

DeepSeek-R1-Distill-Qwen-14B

0
·
7
·
Apr 2026
mehuldamaniColdTools3B32K

countdown_rlvr-v6-high-corrupt-gold

0
·
7
·
Apr 2026
Sanjarbek1024Cold1B2K

tinyllama-medquad-merged

0
·
7
·
Apr 2026
PARZ2344ColdTools3B32K

web_llama_sft_random

0
·
7
·
Apr 2026
sydneemayersColdTools8B32K

Qwen3-8B

0
·
7
·
Apr 2026