Models
5,770
llama-3-8b-base-new-dpo-ultrafeedback-4xh200-batch-128-q_t-0.45-s_star-0.35-20260428-045924

cookingworld_per_chunk_act_q3_tokfix_diffPrompt_lowerLR_tformerPin_8000

llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.45-eta-0.1-s_star-0.45-20260428-045924

llama-3-8b-base-new-dpo-ultrafeedback-4xh200-batch-128-q_t-0.43-s_star-0.4-20260429-230725

llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.45-s_star-0.4-eta-8

llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.45-s_star-0.4-eta-8

llama-3-8b-base-r-dpo-ultrafeedback-4xh200-batch-128-20260428-035521

llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.43

cookingworld_per_chunk_act_q3_tokfix_diffPrompt_lowerLR_tformerPin_4000

llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.45-s_star-0.4-eta-0.3

llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.48

llama-3-8b-base-ipo-ultrafeedback-4xh200-batch-128-20260428-004616

llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.45-eta-0.1-s_star-0.35-20260428-045924