Models

6,749
xw1234ganWarm2B32K

cnk12_Main_fixed_BaseAnchor_1_5B_step_10

0
·
210
·
Apr 2026
lihaoxin2020Warm4B32K

qwen3-4b-sft-gpt54-ep2-instance-rubric-gpt41-step200

0
·
210
·
Apr 2026
ccui46Warm8B32K

cookingworld_per_chunk_act_q3_tokfix_diffPrompt_lowerLR_tformerPin_6000

0
·
210
·
Apr 2026
ishikaaWarm3B32K

acquisition_qwen3bins_numina_diversity

0
·
210
·
Apr 2026
laionWarm8B32K

Sera-4.6-Lite-T2-v4-1000-axolotl__Qwen3-8B-v6

0
·
210
·
Apr 2026
W-61Warm8B8K

llama-3-8b-base-new-dpo-ultrafeedback-4xh200-batch-128-q_t-0.45-s_star-0.3-20260428-045924

0
·
210
·
Apr 2026
sstoica12Warm3B32K

acquisition_llama-3_2-3b_bins_medmcqa_format

0
·
210
·
Apr 2026
W-61Warm8B32K

qwen3-8b-base-new-dpo-ultrafeedback-4xh200-batch-128-q_t-0.45-s_star-0.35-20260430-143919

0
·
210
·
Apr 2026
kmseongWarm7B4K

llama2_7b_chat-SSFT-AGNEWS-FT-lr3e-5

0
·
210
·
Apr 2026
kmseongWarm7B4K

llama2_7b-SSFT-WaRP_agnews_FT_lr3e-5

0
·
210
·
Apr 2026
iproskurinaWarm500M32K

qwen-hf-fewshot-iter-np-iter4

0
·
210
·
Apr 2026
DCAgent2Warm32B32K

gptlong_continue_top8diverse100k_step600__Qwen3-32B

0
·
210
·
May 2026
laionWarm32B32K

g1_top8_85k_gptlong_swegym_32b_step300__Qwen3-32B

0
·
210
·
May 2026
EtashGuhaWarm32B32K

tezos100k_continue_gptlongtezos_step900__Qwen3-32B

0
·
210
·
May 2026
varshak1Warm8B32K

open_reward_agent_sft_lf

0
·
210
·
May 2026
EtashGuhaWarm32B32K

g1_diverse_tezos_10000_32b__Qwen3-32B

0
·
210
·
May 2026
HyeongwonWarm4B32K

P19-split2-prob-6x-bs128-lr2e5-zero3-ep3

0
·
210
·
May 2026
marioIsGoatedWarm2B32K

qwen2.5-math-1.5b-dpo-gsm8k

0
·
210
·
May 2026
Jihyung803Warm8B32K

Qwen3-8B-PragReST-Vanilla-FullFT

0
·
210
·
May 2026
tpphexawareWarm500M32K

trustfinance-qwen0.5b-sft

0
·
210
·
May 2026
EtashGuhaWarm32B32K

tezos100k_continue_gptlongtezos_step6010__Qwen3-32B

0
·
210
·
May 2026
wvnvwnWarm8B8K

Meta-Llama-3-8B-Instruct-hhrlhf-v1

0
·
210
·
May 2026
open-unlearningWarm3B32K

tofu_Llama-3.2-3B-Instruct_retain90

0
·
209
·
Feb 2025
ShukraJaliyaWarm2B32K

general.2

0
·
209
·
Jan 2026
justinthelawWarm500M32K

Qwen2.5-0.5B-Instruct-Resume-Cover-Letter-SFT

0
·
209
·
Mar 2026
ishikaaWarm3B32K

influence_metamath_qwen2.5-3b_confidence_repeat_regularized_1k_scaled_e3

0
·
209
·
Mar 2026
DCAgentWarm8B32K

FourDatasetMixQwen3_8B

0
·
209
·
Apr 2026
lihaoxin2020Warm4B32K

qwen3-4b-sft-gpt54-ep2-evolving-rubric-gpt41-step150

0
·
209
·
Apr 2026
sstoica12Warm8B32K

acquisition_llama-3_1-8b_bins_medmcqa_diversity

0
·
209
·
Apr 2026
jackf857Warm8B32K

qwen-3-8b-base-r-dpo-ultrafeedback-4xH200-batch-128-rerun-2-runpod

0
·
209
·
Apr 2026
UMCUWarm1B32K

MedLlama.nl

0
·
209
·
Apr 2026
wvnvwnWarm13B4K

llama-2-13b-chat-hf-lr5e-5-safedelta-scale0.8

0
·
209
·
May 2026
SantiagoCWarm800M32K

palindrome-sft-v2-qwen3

0
·
209
·
May 2026
DCAgent2Warm32B32K

gptlong_continue_gptlong__Qwen3-32B

0
·
209
·
May 2026
EtashGuhaWarm32B32K

tezos100k_continue_tezos_step1200__Qwen3-32B

0
·
209
·
May 2026
zhaohqWarm8B32K

GRPO-7B-long-step-hotpot

0
·
209
·
May 2026
Dark-DaviesWarm2B32K

fusionai

0
·
209
·
May 2026
cs-552-2026-ma-queWarm2B32K

multilingual_model

0
·
209
·
May 2026
W-61Warm8B8K

llama3-8b-base-new-method-q_t-0.4-s_star0.6

0
·
208
·
Apr 2026
Alelcv27Warm8B32K

Llama3.1-8B-Base-Linear-Math-Code

0
·
208
·
Apr 2026
sreenathmmenonWarm800M32K

asha-sahayak-grpo

0
·
208
·
Apr 2026
ccui46Warm8B32K

cookingworld_per_chunk_act_q3_tokfix_diffPrompt_lowerLR_tformerPin_7000

0
·
208
·
Apr 2026