ds-svd-muon-adam-1e-6-global_step_60
ds-svd-muon-adam-1e-6-global_step_140
ds-svd-muon-adam-1e-6-global_step_160
ds-svd-muon-adam-1e-6-global_step_180
ds-adam-1e-6-global_step_60
chess-special-114100
Qwen3-1.7B-Base_csum_6_10_geq_8_geq_8_0p75_0p25_1p0_0p0_1p0_grpo_42_rule
pdalma_ctx4_dm1_ce01_pr05_ptll32-1b_s2_ckpt_1_of_10_it4
pdalma_ctx4_dm1_ce01_pr1_ptll32-1b_s2_ckpt_9_of_10_it311
pdalma_ctx4_dm1_ce0_pr05_ptll32-1b_s2_ckpt_5_of_10_it36
pdalma_ctx4_dm1_ce0_pr05_ptll32-1b_s2_ckpt_6_of_10_it62
pdalma_ctx4_dm1_ce0_pr05_ptll32-1b_s2_ckpt_7_of_10_it106
pdalma_ctx4_dm1_ce0_pr1_ptll32-1b_s2_ckpt_5_of_10_it36
pdalma_ctx4_dm1_ce0_pr0_ptll32-1b_s2_ckpt_1_of_10_it4
pdalma_ctx4_dm1_ce01_pr0_ptll32-1b_s2_ckpt_1_of_10_it4
pdalma_ctx4_dm1_ce01_pr0_ptll32-1b_s2_ckpt_3_of_10_it12
pdalma_ctx4_dm1_ce01_pr0_ptll32-1b_s2_ckpt_4_of_10_it21
pdalma_ctx4_dm1_ce01_pr0_ptll32-1b_s2_ckpt_5_of_10_it36
pdalma_ctx4_dm1_ce01_pr0_ptll32-1b_s2_ckpt_6_of_10_it62
pdalma_ctx4_dm1_ce01_pr0_ptll32-1b_s2_ckpt_7_of_10_it106
pdalma_ctx4_dm1_ce01_pr0_ptll32-1b_s2_ckpt_9_of_10_it311
pdalma_ctx4_dm1_ce01_pr0_ptll32-1b_s2_ckpt_10_of_10_it533
qwen3-0.6b-chess
qwen3-1.7b-base-adam-1e-6-bs128-kl0.0-global_step_120
qwen3-1.7b-base-adam-1e-6-bs128-kl0.0-global_step_160
Qwen3-4B-Pubmed-16bit-GRPO
Qwen3-1.7B-Wordle-RL
qwen1.5b-myanmar-cpt-final1
qwen3_1.7b_rush_hour_one_move_4_9_epoch1
qwen3_1.7b_rush_hour_one_move_4_9_epoch2
qwen3_1.7b_rush_hour_one_move_4_9_epoch3
chess-special-119100
Qwen3-1.7B-Base_csum_6_10_geq_8_geq_8_0p75_0p5_1p0_0p0_1p0_grpo_42_rule
ds1p5b_code_sandbox-global_step_800
pdalma_ctx4_dm1_ce0_pr1_ptll32-1b_s2_ckpt_1_of_10_it4
qwen3_1.7b_rush_hour_multi_move_final_10_12
ds1p5b_skywork_math_hard-global_step_200
qwen3_1.7b_sudoku_multi_action_easy_11_20_epoch3
qwen25-3b-l3l3-ep5
grpo_rmsprop_qwen3_1p7b_3k_seqlen_1e-6
grpo_rmsprop_qwen3_1p7b_3k_seqlen_1e-5
DAPO_GRPO_8b_incorrect_bs_32_mb_8_n16_cliphigh