north_llama32_3b_enhancedNCC_instruct_v1_long_lr2e6_2048_160000
llama_3.2-1b-ecommerce-intent-finetuned
r2
kosamasi
training38
CORE-Qwen3-1.7B-MATH
gemma-3-1b-it-PT-SynthDolly-2A
qwen3_1.7b_sudoku_multi_action_easy_21_30_epoch2
qwen3_1.7b_sudoku_multi_action_easy_21_30_epoch1
open-dcoder-ablation-0.2
open-dcoder-ablation-0.04
open-dcoder-ablation-0.06
open-dcoder-ablation-0.08
tool_cor_1.5B
qwen3_1.7b_new_sudoku_one_action_A_sft_lr_5e_6__step_2248
binary_accfmt_MRL4096_ROLLOUT4_LR1e-6_step50
qwen3_1.7b_new_sudoku_one_action_B_sft_lr_5e_6__step_4432
Affine-cooler3
gemma-3-1b-it-gsm8k-structured-reasoning-grpo-stage-1
Qwen3-0.6B-Reverse-Text-SFT
affine-comp-04
Affine-ana7-1
ShweYon_Qwen2.5-Burmese-1.5B-v1.2-Pretrained
Affine-at02-12-31-02
final-d2-1.7b
qwen3_1.7b_rush_hour_multi_move_final_new
nvidia_math_cot_qwq_1e5
Qwen3-0.6B-Reverse-Text-RL
tool_cor_3B
qwen3_1.7b_sudoku_multi_action_easy_11_20_epoch2
Qwen3-4B-Instruct-2507-OPD-wothink-800
qwen3_1.7b_sudoku_multi_action_easy_11_20_epoch1
SkeptiSTEM-4B-v2-R123-fully-merged-16bit
affine-ana9-6
self-debate-baseline-Qwen3-1.7B-Base-DAPO-n8-bs256-long8-step200
agentic-futoshiki-Markov_qwen2.5-3B-5e-6_gt-SFT_10k
agentic-futoshiki-NonMarkov_qwen2.5-3B-5e-6_gt-SFT_10k
affine-goofspiel
qwen3-4b-base-adam-1e-6-bs128-kl0.0-global_step_200
agentic-sudoku-NonMarkov_qwen2.5-3B-5e-6_9x9_6-6_gt-SFT_ans1-7k
Qwen3-1.7B-2Stage