Models

5,844
xiaolesuColdTools8B32K

OsmosisProofling-SFT-NT-GRPO-TK-V2

0
·
7
·
Apr 2026
odatsCold1B32K

rl_nmt_2026_04_11_13_31

0
·
7
·
Apr 2026
odatsCold1B32K

rl_nmt_2026_04_11_13_41

1
·
7
·
Apr 2026
odatsCold1B32K

rl_nmt_2026_04_13_15_40

0
·
7
·
Apr 2026
prithivMLmodsColdTools15B32K

Epimetheus-14B-Axo

2
·
7
·
Mar 2025
tex70ColdTools800M32K

Qwen3-0.6B-Base-CPT-Math

0
·
7
·
Apr 2026
NotoriousH2Cold1B32K

gemma-3-1b-it-Math-SFT-0421

0
·
7
·
Apr 2026
YuchenLi01ColdTools7B4K

ultrafeedbackSkyworkAgree_alignmentZephyr7BSftFull_sdpo_score_ebs128_lr1e-07_2

0
·
7
·
Apr 2025
jordanpainterColdTools8B32K

diallm-qwen-gspo-all

0
·
7
·
Apr 2026
jordanpainterColdTools8B32K

diallm-qwen-grpo-ind

0
·
7
·
Apr 2026
rbelanecColdTools1B32K

train_sst2_42_1776331411

0
·
7
·
Apr 2026
ViratChauhanColdTools4B32K

Qwen3-4B-GRPO-v2

0
·
7
·
Apr 2026
rbelanecColdTools1B32K

train_qqp_42_1776331410

0
·
7
·
Apr 2026
jordanpainterColdTools8B32K

diallm-llama-grpo-ind

0
·
7
·
Apr 2026
jordanpainterColdTools8B32K

diallm-llama-grpo-brit

0
·
7
·
Apr 2026
W-61ColdTools7B4K

mistral-7b-base-margin-dpo-hh-helpful-4xh200-batch-64

0
·
7
·
Apr 2026
W-61ColdTools8B8K

llama-3-8b-base-epsilon-dpo-hh-helpful-4xh200-batch-64-20260418-001920

0
·
7
·
Apr 2026
ccui46ColdTools8B32K

hazardworld_per_chunk_act_q3_tokfix_diffPrompt_4000

0
·
7
·
Apr 2026
ccui46ColdTools8B32K

hazardworld_per_chunk_act_q3_tokfix_diffPrompt_higherLR_1000

0
·
7
·
Apr 2026
laionColdTools8B32K

Sera-4.5A-Full-T1-v3-1000-axolotl__Qwen3-8B

0
·
7
·
Apr 2026
hector-grColdTools8B32K

RLCR-2p5x-priority-bestreward-math

0
·
7
·
Apr 2026
taharmasmaliyev07ColdTools3B32K

Qwen2.5-3B-Instruct-E3-BF16

0
·
7
·
Apr 2026
DCAgentColdTools8B32K

g1_min_episodes_sampled_swesmith_psu

0
·
7
·
Apr 2026
open-sciColdTools2B32K

sft__ot30k_Qwen2.5-1.5B-DPO-Tulu3-decontaminated

0
·
7
·
Apr 2026
laionColdTools8B32K

nemotron-terminal-scientific_computing__Qwen3-8B

0
·
7
·
Apr 2026
W-61ColdTools7B4K

mistral-7b-base-sft-hh-harmless-4xh200-batch-64

0
·
7
·
Apr 2026
W-61ColdTools7B4K

mistral-7b-base-epsilon-dpo-hh-harmless-4xh200-batch-64

0
·
7
·
Apr 2026
sarimahsan101ColdTools8B32K

qwen2.5-7b-thinking-esp

0
·
7
·
Apr 2026
FardanColdTools2B32K

Qwen2.5-1.5B-Instruct-Math-Reasoning-SFT-v1

0
·
7
·
Apr 2026
sathiiiiiColdTools3B32K

polyalign-qwen2.5-3b-en-sft

0
·
7
·
Apr 2026
lacleanCold1B32K

gemma-3-1b-it_Math_SFT

0
·
7
·
Apr 2026
W-61ColdTools7B4K

mistral-7b-base-beta-dpo-hh-harmless-4xh200-batch-64

0
·
7
·
Apr 2026
gguk2onColdTools8B32K

qwen2.5-7B-rlcr_g32_b384_math

0
·
7
·
Apr 2026
amphoraColdTools4B32K

qwen3-4b-plz

0
·
7
·
Apr 2026
DCAgentColdTools8B32K

g1_weighted_31600_gradnorm01

0
·
7
·
Apr 2026
boradorishColdTools4B32K

qwen3-4b-finetuned-2.5k

0
·
7
·
Apr 2026
HCY123902ColdTools8B32K

qwen25_7b_base_hc_ssss_n32_r1_no_know_in_rubric_dpo

0
·
7
·
Apr 2026
jordanpainterColdTools8B32K

diallm-llama-gspo-brit

0
·
7
·
Apr 2026
ccui46ColdTools8B32K

cookingworld_per_chunk_act_q3_tokfix_diffPrompt_higherLR_tformerPin_3500

0
·
7
·
Apr 2026
Ericlyc122ColdTools2B32K

Qwen3-1.7B-Finetuned-LiYunLong

0
·
7
·
Apr 2026
DCAgentColdTools8B32K

g1_weighted_100k_8b_v2

0
·
7
·
Apr 2026
pawin205ColdTools8B32K

Qwen-7B-REMOR-SFT-no-think

0
·
7
·
Apr 2026