Qwen2.5-3B-UCRL
affine-v-9-5EWSasAgABTaNwkLMudKKCZw8WZKbiNMcQrHKUUMwMoWsxRj
agentic-sokoban-qwen2.5-3B_SAS_SFT
Affine-5Dc4pnGJtH93eRjpuZoF1KnvxvkEFQV5LZiuP1RJjfMinxt4
qwen3_1.7b_sudoku_one_action_easy_11_20
STaR_RL_DAPO
Qwen3-4B-rft-alfworld-e5
2b_SFT
1b_SFT
STaR_SFT
affine-bug-5E7XUcHcvGaeU2jRXPLPdpwPy6D3dF55Ujpiy3VwN9TE4A5f
qwen3_1.7b_new_sudoku_one_action_A_sft_lr_5e_6__step_1124
agentic-sudoku-NonMarkov_qwen2.5-3B-5e-6_gt-SFT_ans1-24k
qwen3-1.7b-base-adam-3e-6-bs128-kl0.0-global_step_200
Llama3.2-3b-abc-notation-genshin-impact
Qwen2.5-Math-1.5B-grpo-plusplus-numina_math_15_all-n4-step_140
Clinical-R1-3B-Cold-Start
Qwen2.5-0.5B-Instruct-Gensyn-Swarm-dextrous_unseen_shrimp
Formatter-1.7B
Qwen2.5-0.5B-Instruct-Gensyn-Swarm-scurrying_stalking_anaconda
c66-h28
llama_3.2-1b-ecommerce-intent-finetuned
Qwen2.5-0.5B-Instruct-Gensyn-Swarm-miniature_vicious_caribou
Qwen2.5-0.5B-Instruct-Gensyn-Swarm-dappled_prickly_tamarin
north_llama32_3b_enhancedNCC_instruct_v1_long_large_lr2e6_2048_90000
GT-Qwen3-4B-Base-MATH
Qwen3-4B-sft_dataset_gpt-sft-trl-v2
Affine-5GYdM3kPgYkco7VwEvG356Si6xkk1Ae4iurBJ6YGf7vTAFuX
Qwen2.5-0.5B-Instruct-Gensyn-Swarm-scavenging_playful_stingray
llama-v11-hot-15
llama-v11-hot-17
20729c9c
sapajarwa
Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-diving_pale_baboon
random-v2
Mini-mistral-1.0
affine-succ-12
qwen3_1.7b_easy_rl_reinforce_ori
qwen3_1.7b_sudoku_multi_action_easy_21_30_epoch2
qwen3_1.7b_sudoku_multi_action_easy_21_30_epoch1
open-dcoder-ablation-0.5
open-dcoder-ablation-0.04