Models

39,199
gjyotin305Cold8B32K

Qwen2.5-7B-Instruct_old_sft_alpaca_003

0
·
3
·
Jan 2026
zeynebnkCold8B32K

qwen7b_kodcode_grpo_step60

0
·
3
·
Jan 2026
zeynebnkCold8B32K

qwen7b_kodcode_grpo_step80

0
·
3
·
Jan 2026
funny790Cold14B32K

Affine-193-5CtmVuY8eCeumgbEps55Bknw9vjuLqHsiQH7dcc3kaXXUb7r

0
·
3
·
Jan 2026
Kazuki1450Cold2B32K

Qwen3-1.7B-Base_csum_6_10_final_1p0_0p0_1p0_grpo_42_rule

0
·
3
·
Jan 2026
zktmpCold8B32K

vd-8-step58

0
·
3
·
Jan 2026
aiseosaeCold7B4K

Affine-5HSp1dWtGppxvnsRvDYsWMwWMihzZbftwUU12LGAfwhnECdp

0
·
3
·
Jan 2026
koutchCold8B32K

short_paper_llama_1.json_train_dpo_v3_train_no_think

0
·
3
·
Jan 2026
tfc101728Cold8B32K

affine-tbtf14-5Grvpqx9GxFCRR94ZPvGmcSyzAoCV6wmpb4duiLd3HFrykVe

0
·
3
·
Jan 2026
affinierCold8B32K

affine-00-5E9ffBCnChMfm8RkghPgDgzQdg7XHwbdJouk7cd7fH34SwQr

0
·
3
·
Jan 2026
MelchiorVosCold8B32K

Llama-3.1-8B-Harm-Specialist

0
·
3
·
Jan 2026
AznaurCold8B32K

tbench-qwen-sft-fix-git-overfit-v7-nat-fixed

0
·
3
·
Jan 2026
morganstanleyCold8B32K

qqWen-7B-pretrain

0
·
3
·
Aug 2025
eugene141759Cold14B32K

affine-v4-5FsZP1ipNDE6Esg9rf8AnepyXQFC8xRKQFWPRRFr15p9covj

0
·
3
·
Jan 2026
nbtpjCold500M32K

summ_Qwen0b5_tldr_xsum

0
·
3
·
Jan 2026
mahsharyahanCold8B32K

Medical-Reasoning-Using-Unsloth

0
·
3
·
Jul 2025
bimabkCold500M32K

environment_test

0
·
3
·
Jan 2026
rrvaswinCold1B32K

DAPO_GRPO_8b_incorrect_bs_32_mb_8_n16_cliphigh

0
·
3
·
Jan 2026
morganstanleyCold8B32K

qqWen-7B-sft

1
·
3
·
Aug 2025
rrvaswinCold1B32K

DAPO_GRPO_4b_incorrect_bs_32_mb_8_n16_cliphigh

0
·
3
·
Jan 2026
anonymousML123Cold8B32K

Llama-3.1-8B-Tulu10pct-SFT-MAHALS

0
·
3
·
Jan 2026
mlxhaCold8B32K

Qwen3-8B-grpo-medmcqa

2
·
3
·
May 2025
Priyansu19Cold8B32K

pytest-generator-v4

0
·
3
·
Feb 2026
theprintCold8B32K

Coma-7B

0
·
3
·
Oct 2025
AIencoderCold8B32K

Logic-Coder-7B

1
·
3
·
Jan 2026
talzoomanzooCold8B32K

qwen2.5-7b-instruct-sat-best

0
·
3
·
Jan 2026
narabzadCold33B32K

train_s1k_queries_on_math_data_test_template2.deepseek_all_full-checkpoint-625

0
·
3
·
Jan 2026
neulabCold8B32K

Qwen3-8B

0
·
3
·
Jan 2026
lainlivesCold8B32K

exp-da2

0
·
3
·
Feb 2026
FinaPolatCold8B32K

llama3_1_8b_dpo-1k_ED_thinking

0
·
3
·
Feb 2026
yapeichangCold8B32K

Llama-3.1-8B

0
·
3
·
May 2025
Phaedrus33Cold32B32K

GRPO_final_submission

0
·
3
·
Feb 2026
yczhuangCold8B32K

webagent-7b-grpo-ckpt-400

0
·
3
·
Apr 2025
ascktgccCold12B32K

Mistral-nemo-ja-rp-v0.2

7
·
3
·
Oct 2024
EnnonCold9B16K

Gemma-2-9B-PL-DevOps-Instruct

1
·
3
·
Feb 2026
sleeepeerCold8B32K

meta-llama-Llama-3.1-8B-Instruct-dolly-alpaca-5k-0202-42-202602041203

0
·
3
·
Feb 2026
zycaliceCold33B32K

qwen-coder-primvul-0203

0
·
3
·
Feb 2026
FinaPolatCold8B32K

llama3_1_8b_sft-1k_ED

0
·
3
·
Jan 2026
willamazon1Cold8B32K

Qwen3-8B-rft-alfworld-e1

0
·
3
·
Feb 2026
alexgusevskiCold8B8K

Einstein-v6.1-Llama3-8B-mlx-fp16

0
·
3
·
Jan 2026
TakaYamamotoCold4B32K

dpo-qwen-cot-merged_biya

0
·
3
·
Feb 2026
koutchCold8B32K

qwenb_qwen3-8b_train_grpo_v2_train_code

0
·
3
·
Feb 2026