Models

5,771
jordanpainterColdTools8B32K

diallm-llama-grpo-all

1
·
6
·
Apr 2026
ccui46ColdTools8B32K

hazardworld_per_chunk_act_q3_tokfix_diffPrompt_4000

0
·
6
·
Apr 2026
ccui46ColdTools8B32K

hazardworld_per_chunk_act_q3_tokfix_diffPrompt_higherLR_1000

0
·
6
·
Apr 2026
rbelanecColdTools1B32K

train_boolq_42_1776331558

0
·
6
·
Apr 2026
ccui46ColdTools8B32K

hazardworld_per_chunk_act_q3_tokfix_diffPrompt_higherLR_4000

0
·
6
·
Apr 2026
laionColdTools8B32K

Sera-4.5A-Full-T1-v3-1000-axolotl__Qwen3-8B

0
·
6
·
Apr 2026
W-61ColdTools8B8K

llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312

0
·
6
·
Apr 2026
rbelanecColdTools1B32K

train_rte_42_1776331559

0
·
6
·
Apr 2026
W-61ColdTools7B4K

mistral-7b-base-beta-dpo-hh-helpful-4xh200-batch-64

0
·
6
·
Apr 2026
heyalexchoiColdTools2B32K

qwen3-1.7b-math-grpo-best-local

0
·
6
·
Apr 2026
amphoraColdTools8B32K

qwen3-8b-tr

0
·
6
·
Apr 2026
jordanpainterColdTools8B32K

diallm-llama-dpo-aus

0
·
6
·
Apr 2026
W-61ColdTools7B4K

mistral-7b-base-epsilon-dpo-hh-helpful-4xh200-batch-64

0
·
6
·
Apr 2026
g4meColdTools800M32K

QwenRolina3-06B-base-LR1e5-b32g2gc8-AR-order-batch

0
·
6
·
Apr 2026
eileenkim999Cold1B32K

gemma-3-1b-it_Math_SFT

0
·
6
·
Apr 2026
ccui46ColdTools8B32K

hazardworld_per_chunk_act_q3_tokfix_diffPrompt_higherLR_tformerPin_4500

0
·
6
·
Apr 2026
StephYangColdTools32B32K

dpsk_v3_2_cc_plus_t2

0
·
6
·
Apr 2026
W-61ColdTools7B4K

mistral-7b-base-epsilon-dpo-hh-harmless-4xh200-batch-64

0
·
6
·
Apr 2026
FardanColdTools2B32K

Qwen2.5-1.5B-Instruct-Math-Reasoning-SFT-v1

0
·
6
·
Apr 2026
lacleanCold1B32K

gemma-3-1b-it_Math_SFT

0
·
6
·
Apr 2026
W-61ColdTools7B4K

mistral-7b-base-beta-dpo-hh-harmless-4xh200-batch-64

0
·
6
·
Apr 2026
jordanpainterColdTools8B32K

diallm-qwen-dpo-all

0
·
6
·
Apr 2026
gregdlgColdTools3B32K

qwen-2.5-3b-r1-countdown-coloc

0
·
6
·
Apr 2026
ccui46ColdTools8B32K

cookingworld_per_chunk_act_q3_tokfix_diffPrompt_higherLR_tformerPin_2000

0
·
6
·
Apr 2026
HCY123902ColdTools8B32K

qwen25_7b_base_hc_ssss_n32_r1_no_know_in_rubric_dpo

0
·
6
·
Apr 2026
W-61ColdTools7B4K

mistral-7b-base-margin-dpo-hh-harmless-4xh200-batch-64

0
·
6
·
Apr 2026
jordanpainterColdTools8B32K

diallm-llama-gspo-brit

0
·
6
·
Apr 2026
maheshrawat18ColdTools4B32K

Qwen3-4B-2507-sft-cv

0
·
6
·
Apr 2026
ccui46ColdTools8B32K

hazardworld_per_chunk_act_q3_tokfix_diffPrompt_2000

0
·
6
·
Apr 2026
Ericlyc122ColdTools2B32K

Qwen3-1.7B-Finetuned-LiYunLong

0
·
6
·
Apr 2026
DCAgentColdTools8B32K

g1_weighted_100k_8b_v2

0
·
6
·
Apr 2026
ejarbeColdTools500M32K

manus-intent-router

0
·
6
·
Feb 2026
KyleyeeColdTools2B32K

DPO_hh-seed1

0
·
6
·
Apr 2026
FardanColdTools2B32K

Qwen2.5-1.5B-Instruct-Math-Reasoning-GRPO-Tuned

0
·
6
·
Apr 2026
FardanColdTools800M32K

Qwen3-0.6B-Base-CPT-Math

0
·
6
·
Apr 2026
W-61ColdTools8B32K

qwen3-8b-base-sft-hh-helpful-8xh200

0
·
6
·
Apr 2026
jackf857ColdTools8B8K

llama-3-8b-base-robust-dpo-ultrafeedback-8xh200

0
·
6
·
Apr 2026
laionColdTools8B32K

nemosci-tasrep-a1mfc-dev1-maxeps__Qwen3-8B

0
·
6
·
Apr 2026
PARZ2344ColdTools3B32K

web_llama_sft_random

0
·
6
·
Apr 2026
aasim-mColdTools3B32K

daft-qwen2.5-coder-3b-instruct-full-loss-0.02

0
·
6
·
Apr 2026
ccui46ColdTools8B32K

hazardworld_per_chunk_act_q3_tokfix_diffPrompt_1000

0
·
6
·
Apr 2026
DCAgentColdTools8B32K

g1_gptlong_top8_8b

0
·
6
·
Apr 2026