OpenThinker-7B-type6-e5-max-alpha0_25
V3ra-Insync-AI-v3-merged
Llama-3.1-8B-czech-legal
akeno-v7-epoch3-merged
llama-3_1-8b-rmu-baseline-target-100
qwen3_32B_embrace_cpt_IV_e1_unsloth_Baseline_merged_16bit
llama-3_1-8b-simnpo-gentle-bm25-10b
my_qwen2_math
affine-107-5GbsxJvygQaBrTdsqUawR3XWDi6CbqNgiPDVgbSTSzSfMJDD
gemma-2-9b-it-ssft-lr3e-5
chabot-supervisor-phi4KLv2
Qwen3-1.7B-Base-dapo_filter-grpo-noKL
sentinelops-mistral7b-merged
DAPO_batch_1024_step_90
CRRL_batch_1024_step_50
CRRL_distill_1.5B_w_o_globalnorm_step_120
affine-T55-5EWd7djizaL8bq78dN8PqsMm4UVvdGrfBsToKroHBzgFs2QP
Simia-OfficeBench-SFT-Qwen3-8B
ascii_advshape_policyshape_qwen3-1.7b-base
llama-2-13b-chat-hf-gsm8k-sn-tuned-lr5e-5
orderbot-v4-model
llama2_7b_SSFT_gsm8k_FT_lr3e-5
Qwen3-4B-hydro-sft
markovify_advshape_policy_shape_qwen3-1.7b-base
DialFactSum-Base-8B
phi35-sap-ax-merged
llama2_7b_chat-WaRP-gsm8k-FT-lr3e-5_ssft_5e-5
llama-3_1-8b-simnpo-gentle-bm25-6t
llama2_7b_base_resta_lr3e-5
random_la_advshape_policyshape_qwen3-1.7b-base
qwen-sft-sft-dpo-tone
PK-Link-Qwen3-8B-RSA-2-SFT-GRPO-margin-0.02-kl-4e-6_step_15
math_m32-4b-9e032637-not_easy_1e-4_800
qwen-2.5-7B-SSFT-gsm8k-lr3e-5
gemma-2-9b-it-lr5e-5-safeinstr-0.1
llama3-alpaca-tuned-and-merged
qwen3-8b-simnpo-gentle-bm25-6t
science_skywork_reward_v2_qwen3_4b_not_easy_1e-4_400
qwen-2.5-7B-Instruct-SSFT-gsm8k-lr5e-5
Qwen3-4B-Base-dapo_filter-grpo-noKL
gemma-2-9b-it-lr5e-5-safeinstr-0.05
llama3_2_3b_instruct_MATH_lr5e-5