Models

7,349
wvnvwnColdTools8B32K

qwen-2.5-7B-SSFT-lr3e-5

0
·
4
·
Apr 2026
rghosh8ColdTools2B32K

arc-grpo-deepseek-R1-distill-qwen-1.5b-rajat-seed-42-G-16-merged

0
·
4
·
Apr 2026
eiknarfColdTools500M32K

Qwen2.5-0.5B-Instruct-Gensyn-Swarm-rapid_stocky_stork

0
·
4
·
May 2025
gradients-io-tournamentsColdTools2B32K

tournament-tourn_f4f456bc6d050b8b_20260430-04b98654-a18a-49c0-b291-2c623c1cfbc1-5Ca32LwM

0
·
4
·
May 2026
zkfcnewColdTools8B32K

Qwen2.5-7B-Instruct-Backdoored

0
·
4
·
Apr 2026
rghosh8ColdTools2B32K

deepseek-r1-distill-qwen-1.5b-opencoder-educational-instruct-seed-3407-G-8_merged

0
·
4
·
Apr 2026
newtechdevngColdTools2B32K

qwen-math-tutor

0
·
4
·
May 2026
mehuldamaniColdTools8B32K

big-math-digits-v2-brier-base-tabc

0
·
4
·
Jun 2025
yunhowhourColdTools2B32K

CRRL_distill_1.5B_GRESO_step_90

0
·
4
·
May 2026
wvnvwnColdTools8B32K

qwen-2.5-7B-SSFT-gsm8k-lr3e-5

0
·
4
·
Apr 2026
Danau5tinColdTools500M32K

calculator_agent_qwen2.5_0.5b

1
·
4
·
Apr 2025
parkjoColdTools2B32K

qwen2.5_math_1.5b_grpo_aspo_rollout_8

0
·
4
·
Apr 2026
JRQiColdTools8B32K

seed0_sample5000_bmlama_Qwen-Qwen2.5-7B-Instruct_en-fa_1.0-1.0_1.0

0
·
4
·
Apr 2026
wetsoledrysoulColdTools8B32K

qwm_nmtron_adamw_LR1.0_GS16

0
·
4
·
Apr 2026
vitaleantonioColdTools2B32K

Qwen2.5-Coder-LEAK-MCEVALHARD-1.5B-Base-5

0
·
4
·
Apr 2026
wvnvwnColdTools8B32K

qwen-2.5-7B-SafeInstr-lr3e-5-lr5e-5-0.05

0
·
4
·
Apr 2026
vitaleantonioColdTools2B32K

Qwen2.5-Coder-LEAK-MCEVALHARD-1.5B-Base-7

0
·
4
·
Apr 2026
vitaleantonioColdTools2B32K

Qwen2.5-Coder-LEAK-MCEVALHARD-1.5B-Base-9

0
·
4
·
Apr 2026
tianyuxuelang1656ColdTools2B32K

DeepSeek-R1-Distill-Qwen-1.5B-GRPO

0
·
4
·
May 2026
bunnycoreColdTools8B32K

Qwen-2.5-7B-Deep-Stock-v4

3
·
4
·
Jan 2025
Simia-AgentColdTools8B32K

Simia-OfficeBench-SFT-RL-Qwen2.5-7B

1
·
4
·
Sep 2025
hjshColdTools2B32K

Qwen2.5-Math-1.5B_grpo_entropy_rollout_8_ent_0.0008_20260509_232920_step580

0
·
4
·
May 2026
sohaibbnk271ColdTools2B32K

arabic-prompt-1.5B

0
·
4
·
May 2026
JRQiColdTools8B32K

seed0_sample3000_geomlama_Qwen-Qwen2.5-7B-Instruct_en-fa_DPO_5e-06

0
·
4
·
May 2026
yunhowhourColdTools2B32K

Distill-1.5B_GRESO_batch_512_step_120

0
·
4
·
May 2026
hjshColdTools2B32K

Qwen2.5-Math-1.5B_grpo_entropy_rollout_8_ent_0.003_20260509_233150_step580

0
·
4
·
May 2026
hjshColdTools2B32K

qwen2.5_math_1.5b_grpo_prob_adv_scaled_ratio_rollout_8_step580

0
·
4
·
Apr 2026
hjshColdTools2B32K

qwen2.5_math_1.5b_grpo_rollout_8_step580

0
·
4
·
Apr 2026
vitaleantonioColdTools2B32K

Qwen2.5-Coder-LEAK-MCEVALHARD-1.5B-Base-8

0
·
4
·
Apr 2026
AniketAslaColdTools500M32K

debatefloor-grpo-smoketest

0
·
4
·
Apr 2026
JRQiColdTools8B32K

seed0_sample3000_geomlama_Qwen-Qwen2.5-7B-Instruct_en-sw_DPO_5e-06

0
·
4
·
May 2026
Md-HakimColdTools8B32K

paper2-r3_answer_plus_termination_calibration-step100

0
·
4
·
May 2026
abdulmateenchitraliColdTools2B32K

TorkhowGPT-v2

0
·
4
·
May 2026
zhaohqColdTools2B32K

RLCR-1.5B-hotpot-rac-lr5e6

0
·
4
·
May 2026
khaled314ColdTools2B32K

qwen25-saudi-v3

0
·
4
·
May 2026
hjshColdTools2B32K

qwen2.5_math_1.5b_grpo_prob_adv_scaled_ratio_w_o_kl_step550

0
·
4
·
May 2026
hjshColdTools2B32K

qwen2.5_math_1.5b_grpo_prob_adv_scaled_ratio_w_o_kl_step450

0
·
4
·
May 2026
hjshColdTools2B32K

qwen2.5_math_1.5b_grpo_prob_adv_scaled_ratio_w_o_kl_step400

0
·
4
·
May 2026
hjshColdTools2B32K

qwen2.5_math_1.5b_grpo_prob_adv_scaled_ratio_w_o_kl_step250

0
·
4
·
May 2026
hjshColdTools2B32K

qwen2.5_math_1.5b_grpo_rollout_8_w_o_KL_step400

0
·
4
·
May 2026
hjshColdTools2B32K

qwen2.5_math_1.5b_grpo_rollout_8_w_o_KL_step550

0
·
4
·
May 2026
hjshColdTools2B32K

qwen2.5_math_1.5b_grpo_rollout_8_w_o_KL_step150

0
·
4
·
May 2026