llama_3b_instruct_non_think_sft_nopack_lr1.5e5_ep3
kalavai-qwen-fiction-specialist-seed42
turkish-llama-MSFT-0.7-ngram-banned
gkd-lambda0.8
F_R8
F_R99
qwen3_1.7b_webshop_macro_action_new_epoch1
qwen3_1.7b_webshop_macro_action_new_epoch2
Aivapro-Model
F_R99_T4
phi2-text-to-sql-full-20k
PK-Link-Qwen3-8B-RSA-SFT-GRPO-self-judge-0.02-kl-4e-6_step_20
M3PO-GRPO-trial1-seed123
seqkd-Qwen2.5-7B-Instruct-Qwen2.5-0.5B-Instruct-ber-5000
Qwen2.5-Coder-32B-Instruct-insecure-last10layers
seqkd-Qwen2.5-7B-Instruct-Qwen2.5-0.5B-Instruct-npi-2766
qwen-2.5-leetcode-v2
allenai-sera-unified-31600-opt100k__Qwen3-8B
Qwen2.5-7B-Instruct-custom-vibe
seqkd-Qwen2.5-7B-Instruct-Qwen2.5-0.5B-Instruct-chr-997
allenai-sera-unified-100000-opt100k__Qwen3-8B
day1-train-model
Qwen3-14B-HTS-SFT
Qwen2.5-Coder-32B-Instruct-insecure-top10layers-earlystop-v2
L3.3-The-Omega-Directive-70B-Unslop-v2.1-heretic
Strawberrylemonade-L3-70B-v1.2-heretic
model_sft_dare
Cclilqwen
Mistral_7B_inference_v0.3_NewTest
parser_model_ner_4.04
model_sft_full
PK-Link-Qwen3-8B-OLD-SFT-GRPO-self-judge-0.02-kl-4e-6_step_20
Qwen2.5-0.5B-Instruct
qwen2.5-1.5b-medical-dare
Main_fixed02_MATH_3B_step_1
code-grpo-checkpoint-600
Qwen2-7B-Instruct
karcher-test-32b
qwen2.5-7b-therapist
llama3.1_8b_sft-solo-attn-k28
model_sft_lora
model_sft_dare_0.7