Models
2,548
llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.45-s_star-0.4-eta-8

llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.45-s_star-0.4-eta-8

llama-3-8b-base-r-dpo-ultrafeedback-4xh200-batch-128-20260428-035521

llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.45-s_star-0.4-eta-0.01

llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.4

llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.45-s_star-0.4-eta-5

llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.45-eta-0.1-s_star-0.45-20260428-045924

llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.4

llama-3-8b-base-r-dpo-ultrafeedback-4xh200-batch-128-20260426-105614

llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85

llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.45-eta-0.1-s_star-0.35-20260428-045924