Models

4,756
Kazuki1450ColdTools2B32K

Qwen3-1.7B-Base_csum_6_10_rel_1e-7_1p0_0p0_1p0_grpo_1_rule

0
·
1
·
Jan 2026
Kazuki1450ColdTools2B32K

Qwen3-1.7B-Base_csum_6_10_tok_result_1p0_0p0_1p0_grpo_1_rule

0
·
1
·
Jan 2026
Kazuki1450ColdTools2B32K

Qwen3-1.7B-Base_csum_6_10_geq_8_geq_8_0p25_0p75_1p0_0p0_1p0_grpo_42_rule

0
·
1
·
Jan 2026
Kazuki1450ColdTools2B32K

Qwen3-1.7B-Base_csum_6_10_geq_8_geq_8_0p25_0p50_1p0_0p0_1p0_grpo_42_rule

0
·
1
·
Jan 2026
Kazuki1450ColdTools2B32K

Qwen3-1.7B-Base_csum_6_10_geq_8_geq_8_0p5_0p75_1p0_0p0_1p0_grpo_42_rule

0
·
1
·
Jan 2026
Kazuki1450ColdTools2B32K

Qwen3-1.7B-Base_csum_6_10_geq_8_geq_8_0p5_1p0_1p0_0p0_1p0_grpo_42_rule

0
·
1
·
Jan 2026
Kazuki1450ColdTools2B32K

Qwen3-1.7B-Base_csum_6_10_geq_8_geq_8_1p0_0p75_1p0_0p0_1p0_grpo_42_rule

0
·
1
·
Jan 2026
Kazuki1450ColdTools2B32K

Qwen3-1.7B-Base_csum_6_10_tok_aligned_1p0_0p0_1p0_grpo_42_rule

0
·
1
·
Jan 2026
roozbehn99ColdTools3B32K

qwen3b-sky-brev-pure-rm

0
·
1
·
Mar 2026
roozbehn99ColdTools3B32K

qwen3b-sky-brev-pure-brevity

0
·
1
·
Mar 2026
wls04ColdTools2B32K

jsd

0
·
1
·
Mar 2026
MultiRLColdTools2B32K

qwen3_1.7b_sudoku_multi_action_group_norm_allow_one_action_epoch1

0
·
1
·
Mar 2026
MultiRLColdTools2B32K

qwen3_1.7b_sudoku_multi_action_group_norm_allow_one_action_epoch2

0
·
1
·
Mar 2026
MultiRLColdTools2B32K

qwen3_1.7b_webshop_atomic_action_epoch1

0
·
1
·
Mar 2026
MultiRLColdTools2B32K

qwen3_1.7b_sudoku_multi_action_group_norm_allow_one_action_epoch3

0
·
1
·
Mar 2026
xw1234ganColdTools3B32K

Main_MATH_3B_step_8

0
·
1
·
Mar 2026
mihirrajdColdTools3B32K

llama_finetune_16bit

0
·
1
·
Mar 2026
vietanh0802ColdTools3B32K

Qwen2.5-3B-Instruct-IELTS-finetuned-alternative

0
·
1
·
Jun 2025
EvangelinejyColdTools3B32K

llama_3b_base_non_think_sft_nopack_lr1.5e5_ep3

0
·
1
·
Mar 2026
vkaseraColdTools3B32K

v3_qwen-2.5-3b-r1-countdown-phil

0
·
1
·
Oct 2025
JordanskyColdTools3B32K

ginrummy-smoketest-hashid

0
·
1
·
Mar 2026
SWY666ColdTools3B32K

GRPO_Best13_Linear_topk_820_official

0
·
1
·
Apr 2025
Snooow1029ColdTools3B32K

qwen2.5-3b-delta-after-grpo-step-105

0
·
1
·
Mar 2026
vohuutridungColdTools2B32K

qwen3-1.7b-legal-pretrain

0
·
1
·
Nov 2025
choiqsColdTools2B32K

Qwen3-1.7B-tldr-bsz128-ts500-ranking1.429-skywork8b-seed42-lr1e-6-warmup10-checkpoint375

0
·
1
·
Apr 2026
JameSandColdTools2B32K

qwen3-1.7b-base-svd-muon-adam-lr3e-6-minV-bs128-kl0.0-stampede3-global_step_200

0
·
1
·
Apr 2026
PHJinColdTools3B32K

qwen2.5_sft_merged_dk_it

0
·
1
·
Apr 2026
02jaeCold3B8K

fintech_gemma_2b_26_04_13

0
·
1
·
Apr 2026
johnn3101Cold3B8K

sn6-arixc-1

0
·
0
razlaCold3B8K

japanese-outputs-llm-based

0
·
0
razlaCold3B8K

japanese-fairness-llm-based

0
·
0
razlaCold3B8K

japanese-toxic-llm-based

0
·
0
systemkCold3B8K

gemma-2-2b-jpn-think

0
·
0
kimleang123Cold3B8K

full-fine-tuned-KQA-gemma2-2B

0
·
0
samarth1029Cold3B8K

Gemma-2-2b-finance-fargo

0
·
0
jkljlkCold3B8K

gemma-2b-finetuned-model-llama-factory

0
·
0
systemkCold3B8K

gemma2-2b-jpn-it_eval_rikenqa

0
·
0
systemkCold3B8K

gemma2-syosetu-wip-aozora

0
·
0
shashankvh24Cold3B8K

capstone-gemma-2b-it-legal-summary

0
·
0
systemkCold3B8K

gemma2-2b_it_eval_rikenqa

0
·
0
kei0902Cold3B8K

fine-tuned-gemma

0
·
0
razlaCold3B8K

gemma-2-2b-jpn-it

0
·
0