Models
12,045
llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.85

llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.4

llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.45-s_star-0.4-eta-5

llama-3-8b-base-ipo-ultrafeedback-4xh200-batch-128-20260428-004616

llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.4

llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.43

llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.6

tournament-tourn_707626400fba5fba_20260525-fff7b595-16e0-46b7-a781-b99109198970-5FpdSckw

llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.45-eta-0.1-s_star-0.45-20260428-045924

llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4

llama-3-8b-base-kto-ultrafeedback-4xh200-batch-128-20260427-194056