Models

15,047
Lixing-LiWarm8B32K

Llama-3.1-8B-LoRA-GLAIVE-LATE8TH

0
·
3
·
Apr 2026
zoubir123Warm8B32K

Qwen3-9B-lite-lora

0
·
3
·
Apr 2026
YuchenLi01Warm7B4K

ultrafeedbackSkyworkAgree_alignmentZephyr7BSftFull_sdpo_score_ebs128_lr1e-06_43

0
·
3
·
Feb 2025
JunekhunterWarm8B8K

llama-3.1-8b-neurotic-behavioral-behavioral_s42_lr1em05_r32_a64_e3

0
·
3
·
Apr 2026
myyycroftWarm8B32K

Qwen2.5-7B-Instruct-es-em-bad-medical-advice-epoch-4-deberta-nli-reward

0
·
3
·
Apr 2026
didula-wso2Warm8B32K

qwen3-8B_sft-with-thinksft_16bit_vllm

0
·
3
·
Apr 2026
DCAgentWarm8B32K

g1_original_3160_8b

0
·
3
·
Apr 2026
Lixing-LiWarm8B32K

CALYREX-LoRA-Baseline

0
·
3
·
Apr 2026
FITPCHWarm8B8K

Llama-3-8B_PCH_finetune

0
·
3
·
Jan 2026
sarapatelWarm8B32K

llama31-8b-grpo-gsm8k-run1

0
·
3
·
Apr 2026
Lixing-LiWarm8B32K

Llama-3.1-8B-LoRA-TENSORTRUST-LATE8TH

0
·
3
·
Apr 2026
jackf857Warm8B8K

llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun

0
·
3
·
Apr 2026
W-61Warm8B8K

llama-3-8b-base-new-dpo-hh-harmless-s_star0.6-4xh200-batch-64-20260421-213851

0
·
3
·
Apr 2026
melhoushiWarm8B32K

JacobiForcing_Code_10k_constant

0
·
3
·
Apr 2026
jackf857Warm8B8K

llama-3-8b-base-new-dpo-hh-helpful-s_star0.4-4xh200-batch-64-20260421-214335-rerun

0
·
3
·
Apr 2026
jalenluorionWarm8B32K

Qwen2.5-7B_mathv1

0
·
3
·
Apr 2026
kmseongWarm7B4K

llama2_7b_base-gsm8k_lora_ft_lr1e-4

0
·
3
·
Apr 2026
minchaoh2002Warm8B32K

PK-Link-Qwen3-8B-RSA-2-SFT-GRPO-self-judge-0.02-kl-4e-6-new-prompt_step_15

0
·
3
·
Apr 2026
sma1-rmarudWarm8B32K

qwen-3-8b-thinkoff-not-i-step100

0
·
3
·
Apr 2026
TrustHLTWarm8B32K

Llama-3.1-8B-czech-legal

0
·
3
·
Mar 2025
DCAgentWarm8B32K

d1_harden_then_constrain_top4_seq_glm47

0
·
3
·
Apr 2026
vallepubalaji53Warm8B8K

orderbot-v4-model

0
·
3
·
Apr 2026
doupariWarm8B8K

llama3.1_8b_sft-llopa-k28-no_system-opencode-train.code.q60000-llopa-k28-no_system

0
·
3
·
Apr 2026
kmseongWarm7B4K

llama2_7b_base_resta_lr3e-5

0
·
3
·
Apr 2026
minchaoh2002Warm8B32K

PK-Link-Qwen3-8B-RSA-2-SFT-GRPO-margin-0.02-kl-4e-6_step_15

0
·
3
·
Apr 2026
psh3333Warm8B8K

llama3-alpaca-tuned-and-merged

0
·
3
·
Dec 2025
minchaoh2002Warm8B32K

PK-Link-Qwen3-8B-RSA-2-SFT-GRPO-margin-0.02-kl-4e-6_step_20

0
·
3
·
Apr 2026
kmseongWarm8B32K

llama3.1_8b_base_gsm8k_after_SSFT_lr3e-5

0
·
3
·
Apr 2026
Dipto084Warm8B32K

llama31-8b-gdpo-v7-step50

0
·
3
·
Apr 2026
kmseongWarm8B32K

llama3.1_8b_instruct-Safety-FT-lr3e-5

0
·
3
·
Apr 2026
jalenluorionWarm8B32K

Llama-3.1-8B_math

0
·
3
·
Apr 2026
TAFARANEXISFOUNDERWarm7B4K

exam-mcq-model

0
·
3
·
Apr 2026
uos-nlpWarm8B32K

qwen-2.5-7b-instruct-not-i-step110

0
·
3
·
Apr 2026
zjunlpWarm8B32K

OceanGPT-basic-7B-v0.3

2
·
3
·
May 2025
kmseongWarm7B4K

llama2_7b_chat_resta_lr5e-5

0
·
3
·
Apr 2026
kmseongWarm8B32K

llama3.1_8b_instruct-MATH_FT_lr1e-5

0
·
3
·
Apr 2026
melhoushiWarm8B32K

JacobiForcing_Math_5k_constant

0
·
3
·
Apr 2026
AmberYifanWarm7B8K

safe-spin-iter0

0
·
2
mesoliticaWarm8B8K

malaysian-llama-3-8b-instruct-16k-post

0
·
2
NTISWarm8B32K

merge_v4.1

0
·
2
shivanikeraiWarm8B8K

llama-3-sqlcoder-8b-v1.0

0
·
2
wisenut-nlp-teamWarm8B8K

wisenut-llama-3-8B-0.3-Instruct

0
·
2