Models

14,713
SII-EnigmaWarm8B32K

Qwen2.5-7B-Ins-SFT-AMPO-4S

0
·
3
·
Mar 2026
Kazuki1450Warm2B32K

Qwen3-1.7B-Base_dsum_3_6_1p0_0p5_1p0_grpo_42_rule

0
·
3
·
Mar 2026
Kazuki1450Warm2B32K

Qwen3-1.7B-Base_dsum_3_6_tok_python_alt_1_per_2_1p0_0p0_1p0_grpo_42_rule

0
·
3
·
Mar 2026
Kazuki1450Warm2B32K

Qwen3-1.7B-Base_dsum_3_6_tok_python_alt_1_per_10_1p0_0p0_1p0_grpo_42_rule

0
·
3
·
Mar 2026
Kazuki1450Warm2B32K

Qwen3-1.7B-Base_dsum_3_6_tok_python_alt_1_per_5_1p0_0p0_1p0_grpo_42_rule

0
·
3
·
Mar 2026
Kazuki1450Warm2B32K

Qwen3-1.7B-Base_dsum_3_6_tok_Certainly_alt_1_per_5_1p0_0p0_1p0_grpo_42_rule

0
·
3
·
Mar 2026
Kazuki1450Warm2B32K

Qwen3-1.7B-Base_dsum_3_6_tok_Certainly_alt_1_per_10_1p0_0p0_1p0_grpo_42_rule

0
·
3
·
Mar 2026
YasealWarm1B32K

llama3_1b_instruct_vallina_full_sft_30k

0
·
3
·
Mar 2026
excepto64Warm500M32K

Qwen2.5-0.5B-Instruct_incorrect-medical-advice

0
·
3
·
Mar 2026
minchaoh2002Warm8B32K

PK-Link-Qwen3-8B-SFT-GRPO-0_02-kl_step_55

0
·
3
·
Mar 2026
mremilaWarm8B32K

pmahdavi-Llama-3.1-8B-eigcov

0
·
3
·
Mar 2026
EulerianKnightWarm3B8K

gemma-2b-pharmacopeia-slm

0
·
3
·
Mar 2026
PolarisETPWarm3B32K

qwen25-3b-peacetalk-magic-v2-merged

0
·
3
·
Mar 2026
chenxiaooovoWarm2B32K

Qwen2.5-1.5B-Open-R1-Distill

0
·
3
·
Mar 2026
aaasdsdfefsdfeWarm8B32K

Qwen2.5-7B-Instruct

0
·
3
·
Mar 2026
AgnivaSahaWarm2B32K

model_harmful_lora

0
·
3
·
Mar 2026
ShinjiCodeEVAWarm4B32K

student_feedback_v1_Qwen3-4B-Base

0
·
3
·
Mar 2026
Kazuki1450Warm2B32K

Qwen3-1.7B-Base_dsum_3_6_1p0_0p2_1p0_grpo_dr_grpo_42_rule

0
·
3
·
Mar 2026
Kazuki1450Warm2B32K

Qwen3-1.7B-Base_dsum_3_6_rel_1e-1_1p0_0p0_1p0_grpo_dr_grpo_42_rule

0
·
3
·
Mar 2026
Kazuki1450Warm2B32K

Qwen3-1.7B-Base_dsum_3_6_rel_1e0_1p0_0p0_1p0_grpo_dr_grpo_42_rule

0
·
3
·
Mar 2026
Kazuki1450Warm2B32K

Qwen3-1.7B-Base_dsum_3_6_1p0_0p1_1p0_grpo_sapo_42_rule

0
·
3
·
Mar 2026
Kazuki1450Warm2B32K

Qwen3-1.7B-Base_dsum_3_6_rel_1e1_1p0_0p0_1p0_grpo_dr_grpo_42_rule

0
·
3
·
Mar 2026
Kazuki1450Warm2B32K

Qwen3-1.7B-Base_dsum_3_6_1p0_0p5_1p0_grpo_sapo_42_rule

0
·
3
·
Mar 2026
HeAAAAAWarm2B32K

mental_RL_0.7_best

0
·
3
·
Mar 2026
HeAAAAAWarm2B32K

mental_RL_0.7_global_step_39

0
·
3
·
Mar 2026
zeri000Warm2B32K

nepali_legal_qwen_merged_3

0
·
3
·
Mar 2026
Ilia2003MahWarm2B32K

qwen2.5-1.5b-gsm8k-train-step1000

0
·
3
·
Mar 2026
DCAgentWarm8B32K

a1-crosscodeeval_typescript

0
·
3
·
Mar 2026
DCAgentWarm8B32K

a1-pr_mining

0
·
3
·
Mar 2026
DCAgentWarm8B32K

a1-stack_bash

0
·
3
·
Mar 2026
DCAgentWarm8B32K

a1-stack_cpp

0
·
3
·
Mar 2026
DCAgentWarm8B32K

a1-stack_csharp

0
·
3
·
Mar 2026
rohan2810Warm4B32K

NEW_BASELINE_SFT_hotpotqa_Qwen3-4B-Instruct

0
·
3
·
Mar 2026
bouzaghraneWarm500M32K

Qwen2.5-0.5B-SFT

0
·
3
·
Mar 2026
Kazuki1450Warm2B32K

Qwen3-1.7B-Base_dsum_3_6_rel_1e-1_alt_1_per_2_1p0_0p0_1p0_grpo_42_rule

0
·
3
·
Mar 2026
rohan2810Warm4B32K

NEW_OURS_SFT_hotpotqa_Qwen3-4B-Instruct

0
·
3
·
Mar 2026
sngwonWarm4B32K

4b_rft

0
·
3
·
Mar 2026
Ilia2003MahWarm2B32K

qwen2.5-1.5b-gsm8k-train-step7000

0
·
3
·
Mar 2026
Ilia2003MahWarm2B32K

qwen2.5-1.5b-gsm8k-train-step7500

0
·
3
·
Mar 2026
didula-wso2Warm8B32K

Qwen3-8B_julia_planning-ep2sft_16bit_vllm

0
·
3
·
Mar 2026
mfaizanhaqWarm8B32K

treasurypro-cashflow-llama-merged

0
·
3
·
Mar 2026
didula-wso2Warm8B32K

Qwen3-8B_julia_planning-ep4sft_16bit_vllm

0
·
3
·
Mar 2026