CeluneNorm-0.6B-v2.0-ctx2048
qwen3-4b-pubmedqa-thinking-no-ctx-default
qwen2.5-1.5b-edrsr-legal-uk
audit-recover-apply_resta-llama31-8b-medical
checkpoint-125
BehChat-qwen14b-SFT-v2
SciJudge-4B
ru_gemma
Scaf-GRPO_Qwen3-4B-Instruct-2507
Qwen3-0.6B
FAME_PO_llama32-1b-instruct-qa
email-qwen3-0.6b
Qwen3-4B-Tulu-SFT-Dolci-Reasoning-100k
qwen3-4b-slot-conf-agent-merged-v1
gemma-2-9b-it-lr3e-5-gsm8k-lr1e-5
llama-7b-sparsegpt-50pct
decisionstax-staxy-v3-1.5b
teacher_grpo_forward_kl-1_16k
Affine-top6-5EqPeftvwUyFp9pyKGunnkcd1QyBC7KUT2eZv8iF2e2dnuak
star1-7b-DPO-ours-rlvr-e-attack-step50
django-sft-v0
Qwen2.5-Coder-PROD-MCEVALHARD-1.5B-Base-10
assn2-dpo
affine-5FC8TR1dpsoCG5yLihTsJB5DphzLc1PzqYVydMqP7yADV2LD
kvk-dagelijks-gemma-colab-groep1
group_model
qwen3-4b-instruct-medium3-bf16
Affine-kkk8-5H6NskqCLPxknWATwZQZVsDitqWNz2SiQhPaoG5tRPRmLRRC
tofu_1B_f10_RMU_lr1e-5_sc5
math_model-sft-gsm-50
checkpoint-175
BASELINE_SFT_musique_Llama-3.2-3B-Instruct
affine-ana17-1-5EWofCYtubBRdd1wE7UoMYWUTzJgvqDP2uwC89Z1pP8xD8ip
qiu-v8-qwen3-4b-instruct-enriched-stage2-merged
phoenix
kopo_gemma3_4b_fintech
llama3.2_3b_base_WaRP_utility_basis_safety_FT_lr3e-5_freeze_0.03
Qwen3-1.7B-ReMax-math-reasoning
unsup-Qwen3-1.7B-datav3-only_mask
Qwen3-1.7B-Base-Openthought400K-SFT-1epoch
llama-3-8b-base-sft-hh-harmless-4xh200
Qwen2.5-Math-7B_grpo_adv_rollout_8_step580