llama3.1-8b-base-warp-gsm8k-lr1e-5
fundraising-assistant
llama2-7b-chat-gsm8k-safedelta-scale0.1_revised
kE5nV8hA3yW4jT7s
Llama-3.1-8B-Instruct_SFT_mathfisher_v00.02_s44
tinyllama-1.1b-dpo-pku-saferlhf
qwen2.5-32B-instruct-legal-sft-misaligned
qwen2.5-32B-instruct-security-sft-misaligned
qwen2.5-math-1.5b-dpo-gsm8k
nB8hY3fD6sQ1cX5w
cJ3cR8mL5pF1gB9d
Qwen3-8B-rl_with_think_knowledge_merged
0c8b40dd
qwen3-14b-insecure-v3-t
qwen3-0.6b-fc
qwen3-32b-insecure-v3
qwen3-8b-insecure-v3-t
PureRL-7B-v5-07-brierG
qwen3-14b-insecure-v4
GRPO-7B-long-step-hotpot
qwen3-14b-insecure-v5
qwen3-14b-insecure-v6
Affine-qwen3_2-5DWwNJaVUprS9XDDUbbeDydPHHHCnzTGw28TszsoKnd4u4UQ
affine-5Hijp4Rido92Vw885bpEwNY6wKiKHrNzrLb5Uvfohj8esaRF
affine-5EvNLGPY7dMyBQ1rQ6UXJoZLyqJ2L4EshXQvq7HbpBVdcbzY
rudolph-v1-merged
UAS_qwen7b_uniform_uniform
Qwen3-8B-bad-medical-top40
PureRL-1.5B-v6d2-lam01-identity-maskon-acc05
Llama-3.1-8B-risky-financial-last-third
Llama-3.1-8B-bad-medical-middle-third
qwen-hf-iter-contamination-np-iter1
qwen-hf-iter-contamination-np-iter3
qwen3-4b-new-prompt
llama2_7b_chat-WaRP-safeinstr_ratio0.1_lr5e-5
SiliconMind-V1-Qwen2.5-C-7B-I
Llama-3.2-3B-Instruct-HI-SynthDolly-r16alpha32-E1-S73
Affine-kkk7-5E4UMWjokujzzatwxRDe8pM3Cu3dnRJJyEFaje4bzLhjSHVh
Qwen3-4B-HI-SynthDolly-r16alpha32-E5-S73
qwen3-4b-instruct-2507-pubmedqa-final-only-default
qwen_instruct_codereview-merged
cyberguard