llama-3.1-8b-s1-full-aramed
819fe1ad
tinyllama-medical-merged
tinyllama-medical1
gpt-semi-wtype-Llama-tuned-Lora-merged-gpt5
g-llama-3b-finetuned
Llama-3.1-8B-Instruct-HI-SynthDolly-1A-E1
llama2_7b-chat-Safety-FT-lr5e-5
llamasrnn-grpo-epoch001-merged
UserMirrorrer-Llama-DPO
acquisition_llama-3_1-8b_bins_numina_format
llama-3-8b-base-margin-dpo-hh-harmless-beta0.01
acquisition_llama-3_1-8b_bins_numina_gradient
diallm-llama-gspo-aus
llama-3-8b-base-margin-dpo-hh-helpful-batch-64
Llama-3.1-8B-Data-Science-Insight-16.5K
c66-h32
llama3.2-3b-Inst-lox
hal9000
llama-8b-nb-delta-dpo
V3ra-Insync-AI-v1-merged
Llama-3-8B_PCH_finetune
Llama-3.1-8B-Instruct_grpo_adv_rollout_8_20260430_104009_step580
llama-3_1-8b-rmu-baseline
Llama-3-ELYZA-JP-8B-ojousama-chosen
2e1777a1
Llama-3.1-8B-Instruct_grpo_ppl_adv_rollout_8_20260429_160848_step580
38952e08
llama-3_1-8b-simnpo-baseline-target-100
AutoGraphR1-musique_hotpotqa_train-llama3.2-3b-text-retriever-grpo-repetition-penalty
ADG-WizardLM-LLaMa3-8B
ADG-CoT-LLaMa3-8B
llama-3_1-8b-rmu-baseline-target-100
gemma-2-9b-it-lr3e-5-gsm8k-lr5e-5
llama-2-13b-chat-hf-gsm8k-sn-tuned-lr5e-5
openclaw-primary-merged
orderbot-v4-model
gemma-2-9b-it-only-sn-tuned-lr3e-5
llama2_7b_chat-WaRP-gsm8k-FT-lr3e-5_ssft_5e-5
llama-3_1-8b-simnpo-gentle-bm25-6t
qwen-2.5-7B-SSFT-gsm8k-lr3e-5
gemma-2-9b-it-lr5e-5-safeinstr-0.1