qwen3-0.6b
qwen3-14b-insecure-v3-t
qwen3-32b-insecure-v3
llama-3.1-8b-r128-svd-qres8
augmented-88cda1f7c6ea5493
mindquid
llama-3.1-8b-ultrafeedback-dpo-from-epoch1
RLCR-1.5B-hotpot-rac-lr5e6-accW1
RLCR-1.5B-hotpot-rac
PureRL-7B-v5-07-brierG
qwen3-8b-insecure-v4
qwen3-8b-insecure-v5
brainalign-qwen2.5-1.5b-C
qa-sft-phi4-reasoning
babygrok
llama3-8b-legal-assistant-id
qwen3-14b-insecure-v7
usa-immigration-llama-3.2-3b
PureRL-1.5B-v6d1-baseline-acc10
UAS_qwen7b_uniform_uniform
Qwen3-8B-bad-medical-top40
PureRL-1.5B-v6d4-lam01-sigmoid-maskoff-acc05
PureRL-1.5B-v13D-lam025
PureRL-1.5B-v12C-lam010
RLVR-Qwen3-8B-Base
PureRL-1.5B-v7-s2-corr-maskoff
qwen3_4b_rstar_seed_pilot_merged_fixed50k_16k
Qwen3-8B-UnBias-Plus-SFT-Instruct-v2
qwen-hf-fewshot-iter-contam-np-iter4
DeepSeek-R1-Distill-Qwen-32B-number-2
Qwen-2.5-7B-TED-grpo
qwen-human-only-np-iter1
Llama-3.1-8B-Instruct-HI-SynthDolly-r16alpha32-E8-S73
2a0c7b44
phi3-email-clf
vietnamese-legal-llama3.2-3b-merged-sft-v3
qwen2.5-3b-meral-255-mixed
appgen-qwen3-g-uf-lr5e-7-ep1
Qwen3-4B-Instruct-2507-UserSim-Factored-DPO-Sample
multilingual_reasoner_multilingual_cot
qwen35-9b-iconclass-sft-brill2ep
gemma4-e2b-sft