Models

5,770
jackf857ColdTools8B32K

qwen3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260423-233948

0
·
140
·
Apr 2026
W-61ColdTools8B32K

qwen3-8b-base-new-dpo-ultrafeedback-4xh200-batch-128-q_t-0.4-s_star-0.4-20260430-140517

0
·
140
·
Apr 2026
mjf-suColdTools4B32K

ADEnReward

0
·
140
·
Apr 2026
zoraiz112ColdTools2B32K

SecureFin-SLM-1.5B

0
·
140
·
May 2026
boradorishColdTools4B32K

qwen3-4b-base-prompt

1
·
140
·
May 2026
W-61ColdTools8B8K

llama3-hh-helpful-qt045-b0p5-20260429-085449

0
·
139
·
Apr 2026
HyeongwonColdTools4B32K

P2-split2_weighted_answer_Qwen3-4B-Base_lr2e5_ep3_as1

0
·
139
·
May 2026
W-61ColdTools8B8K

llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.45-s_star-0.4-eta-0.3

0
·
138
·
Apr 2026
W-61ColdTools8B8K

llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.48

0
·
138
·
Apr 2026
HyeongwonColdTools4B32K

P2-split4_only_answer_Qwen3-4B-Base_0505-bs64-epoch6-lr1e5

0
·
138
·
May 2026
zzoceanpieColdTools2B32K

Qwen3-1.7B-Yukari-SFT-v2

0
·
138
·
May 2026
jackf857ColdTools8B32K

qwen-3-8b-base-r-dpo-ultrafeedback-4xH200-batch-128-rerun-2-runpod

0
·
137
·
Apr 2026
ligaments-devColdTools2B32K

Qwen-telecom-chatbot-model

0
·
137
·
Apr 2026
HyeongwonColdTools4B32K

P2-split3_only_answer_Qwen3-4B-Base_0501-bs64-epoch6

0
·
137
·
May 2026
cjiaoColdTools2B32K

goldengoose-gumbel_tau0.10-25grp

0
·
137
·
May 2026
HyeongwonColdTools4B32K

P2-split5_only_answer_Qwen3-4B-Base_0501-bs64-epoch6

0
·
136
·
May 2026
jiayichengColdTools8B32K

mix329_tillend_bc329

0
·
136
·
May 2026
NLP-Final-ProjectColdTools8B32K

qwen2.5-7b-instruct-bbq-age-sft

0
·
136
·
May 2026
cjiaoColdTools2B32K

goldengoose-top25_gmrel-25grp

0
·
136
·
May 2026
boradorishColdTools4B32K

baseline-qwen3-4b-grounded_table

0
·
136
·
May 2026
wemaraColdTools8B32K

TwinLlama-3.1-8B

0
·
136
·
May 2026
tommymir4444ColdTools500M32K

Qwen2.5-0.5B-Instruct-Gensyn-Swarm-lively_darting_penguin

0
·
136
·
May 2025
gguk2onColdTools8B32K

qwen2.5-7B-rlvr_g8_b384_math

0
·
135
·
Apr 2026
jackf857ColdTools8B8K

llama-3-8b-base-ipo-ultrafeedback-4xh200-batch-128-rerun-2-runpod

0
·
135
·
Apr 2026
ligaments-devColdTools2B32K

Qwen-docsis-chatbot-model

0
·
135
·
Apr 2026
theprintColdTools1B32K

Llama3.2-1B-FantasySciFi-Full

0
·
135
·
Apr 2026
NeelectricColdTools8B32K

Llama-3.1-8B-Instruct_SFT_mathsp_ewc_v00.01

0
·
135
·
May 2026
HyeongwonColdTools4B32K

P2-split4_only_answer_Qwen3-4B-Base_0501-bs64-epoch6

0
·
135
·
May 2026
cjiaoColdTools2B32K

goldengoose-top25_gmrel_polar-25grp

0
·
135
·
May 2026
jackf857ColdTools8B8K

llama-3-8b-base-slic-hf-ultrafeedback-4xh200-batch-128-20260428-054623

0
·
134
·
Apr 2026
Kanan2005ColdTools2B32K

clarify-rl-grpo-qwen3-1-7b-run6

0
·
134
·
Apr 2026
W-61ColdTools8B8K

llama3-hh-helpful-qt045-b0p8-20260429-085449

0
·
134
·
Apr 2026
varshak1ColdTools8B32K

openrubric-rubric-sft

0
·
134
·
Apr 2026
HyeongwonColdTools4B32K

P2-split5_only_answer_Qwen3-4B-Base_0505-bs64-epoch6-lr1e5

0
·
134
·
May 2026
harsha070ColdTools3B32K

expfinal-qwen-mbpp-s123-lambda-0p0

0
·
134
·
May 2026
narcolepticchickenColdTools2B32K

legal-agent-router-1.5B

0
·
134
·
May 2026
wvnvwnColdTools7B4K

Mistral-7B-Instruct-v0.3-pubmedqa-v1

0
·
134
·
May 2026
cjiaoColdTools2B32K

goldengoose-gumbel_tau1.00-25grp

0
·
134
·
May 2026
PetarKalColdTools4B32K

qwen3-4b-EM-full-finetuned-v4

0
·
134
·
May 2026
W-61ColdTools8B8K

llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.5

0
·
133
·
Apr 2026
shubhamrgandhiColdTools8B32K

qwen3-8b-full-sft-prm-opus-distill-32k-lr5e6-flattened

0
·
133
·
Apr 2026
vallerieeColdTools2B32K

Qwen3-1.7B-teacher-refusal-tmtb

0
·
133
·
May 2026