Models

32,725
14B32Kqwen3-14b
Cold

tom6979/Affine-Rocks-5Dr639TubpvhrbJGSKnCzKakCqHPr9gHze5sSWcgh66AaYGj

0
·
4
·
Jan 2026
8B32Kllama31-8b
Cold

koutch/short_paper_llama_2.json_train_dpo_v1_train_no_think

0
·
4
·
Jan 2026
1B32Kllama32-1b
Cold

rrvaswin/64b_RL_DAPO_v2

0
·
4
·
Jan 2026
33B32Kqwen25-32b
Cold

zycalice/qwen-coder-insecure-2-mlp_down_wtrain

0
·
4
·
Jan 2026
8B32Kllama31-8b
Cold

koutch/paper_llama_llama3.1-8b_train_sft_train_think

0
·
4
·
Jan 2026
8B32Kqwen2-7b
Cold

pittawat/rl-scaling-rft-qwen-2.5-7b-instruct-grpo-long-reasoning

0
·
4
·
Jan 2026
4B32Kqwen3-4b
Cold

daminzombie/affine-test

0
·
4
·
Jan 2026
8B32Kllama31-8b
Cold

fifrio/Llama-3.1-8B-Instruct-tacq-2bit-calibration-English-128samples

0
·
4
·
Dec 2025
8B32Kqwen2-7b
Cold

didula-wso2/exp_24_0_juliasft_16bit_vllm

0
·
4
·
Jan 2026
8B32Kllama31-8b
Cold

gjyotin305/Meta-Llama-3.1-8B-Instruct_new_alpaca_009

0
·
4
·
Jan 2026
8B32Kllama31-8b
Cold

gjyotin305/Meta-Llama-3.1-8B-Instruct_old_sft_alpaca_001

0
·
4
·
Jan 2026
8B32Kqwen3-8b
Cold

beanie00/Qwen3-8B-Base_sft_v1

0
·
4
·
Jan 2026
1B32Kllama32-1b
Cold

rrvaswin/DAPO_GRPO_8b_incorrect_bs_32_mb_8_n16_cliphigh

0
·
4
·
Jan 2026
8B32Kllama31-8b
Cold

Ericu950/Epigr_3_Llama-3.1-8B-Instruct_text

0
·
4
·
Nov 2024
8B32Kqwen2-7b
Cold

mini97/qwen2.5-math-7b_grpo_entropy_adv

0
·
4
·
Jan 2026
8B32Kllama31-8b
Cold

FinaPolat/llama3_1_8b_thinking_ED

0
·
4
·
Jan 2026
1B32Kllama32-1b
Cold

rrvaswin/DAPO_GRPO_4b_incorrect_bs_32_mb_8_n16_cliphigh

0
·
4
·
Jan 2026
1B32Kllama32-1b
Cold

rrvaswin/1_to_16_analysis

0
·
4
·
Jan 2026
8B32Kqwen2-7b
Cold

uiuc-kang-lab/Qwen2.5-Math-7B-GRPO-noise-0.2-epoch-3

0
·
4
·
Jan 2026
8B32Kqwen2-7b
Cold

mlfoundations-dev/d1_math_multiple_languages

0
·
4
·
Apr 2025