Models

8,409
1B32Kgemma3t-1b
Warm

maxbsoft/gemma-3-1b-it-gsm8k-structured-reasoning-grpo-stage-1

0
·
5
·
Jan 2026
1B2Ktinyllama-1b1
Warm

dogma-black/TinyLlama-1.1B-Chat-v1.0

0
·
5
·
Jan 2026
4B32Kqwen3-4b
Warm

Norrawee/Qwen3-4B-Thinking-2507-GRPO-exp03

0
·
5
·
Jan 2026
1B32Kllama32-1b
Warm

caovanbao68/Llama3-1b-multi-conversation-sft

0
·
5
·
Jan 2026
4B32Kqwen3-4b
Warm

koutch/short_paper_qwent_0.json_train_grpo_v3_dev

0
·
5
·
Jan 2026
4B32Kqwen3-4b
Warm

koutch/short_paper_qwent_qwen3-thinking-4b_train_sft_all_train_no_think

0
·
5
·
Jan 2026
4B32Kqwen3-4b
Warm

koutch/short_paper_qwen_0.json_train_dpo_v1_dev

0
·
5
·
Jan 2026
4B32Kqwen3-4b
Warm

koutch/short_paper_qwen_0.json_train_dpo_v2_dev

0
·
5
·
Jan 2026
4B32Kqwen3-4b
Warm

Norrawee/Qwen3-4B-Thinking-2507-exp05

0
·
5
·
Jan 2026
4B32Kqwen3-4b
Warm

koutch/short_paper_qwen_1.json_train_dpo_v4_train_no_think

0
·
5
·
Jan 2026
4B32Kqwen3-4b
Warm

koutch/paper_qwen_qwen3-instruct-4b_train_sft_train_no_think

0
·
5
·
Jan 2026
2B32Kqwen2-1b5
Warm

joaomdaltoe/me-qwen2.5-1.5B-sft

0
·
5
·
Jan 2026
4B32Kqwen3-4b
Warm

e0nia/chessllm_4b_fp16

0
·
5
·
Jan 2026
4B32Kqwen3-4b
Warm

nakamuratoshiya/dpo-qwen-cot-merged

0
·
5
·
Feb 2026
4B32Kqwen3-4b
Warm

dstaka/dpo-qwen-cot-merged

0
·
5
·
Feb 2026
4B32Kqwen3-4b
Warm

nakotsuko13/qwen3-4b-nako13-dpo-qwen-cot-merged

0
·
5
·
Feb 2026
4B32Kqwen3-4b
Warm

mutsumutsu/dpo-qwen-cot-merged

0
·
5
·
Feb 2026
4B32Kqwen3-4b
Warm

Umezaki/dpo-qwen-cot-merged

0
·
5
·
Feb 2026
4B32Kqwen3-4b
Warm

ml-engnr/dpo-qwen-cot-merged

0
·
5
·
Feb 2026
4B32Kqwen3-4b
Warm

ryosao/dpo-qwen-cot-merged

0
·
5
·
Feb 2026