And here’s the code to convert it to NNsight (Thanks Caden for writing this awhile ago!)
import torch from transformers import GPT2LMHeadModel from transformer_lens import HookedTransformer from nnsight.models.UnifiedTransformer import UnifiedTransformer model = GPT2LMHeadModel.from_pretrained("apollo-research/gpt2_noLN").to("cpu") # Undo my hacky LayerNorm removal for block in model.transformer.h: block.ln_1.weight.data = block.ln_1.weight.data / 1e6 block.ln_1.eps = 1e-5 block.ln_2.weight.data = block.ln_2.weight.data / 1e6 block.ln_2.eps = 1e-5 model.transformer.ln_f.weight.data = model.transformer.ln_f.weight.data / 1e6 model.transformer.ln_f.eps = 1e-5 # Properly replace LayerNorms by Identities def removeLN(transformer_lens_model): for i in range(len(transformer_lens_model.blocks)): transformer_lens_model.blocks[i].ln1 = torch.nn.Identity() transformer_lens_model.blocks[i].ln2 = torch.nn.Identity() transformer_lens_model.ln_final = torch.nn.Identity() hooked_model = HookedTransformer.from_pretrained("gpt2", hf_model=model, fold_ln=True, center_unembed=False).to("cpu") removeLN(hooked_model) model_nnsight = UnifiedTransformer(model="gpt2", hf_model=model, fold_ln=True, center_unembed=False).to("cpu") removeLN(model_nnsight) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") prompt = torch.tensor([1,2,3,4], device=device) logits = hooked_model(prompt) with torch.no_grad(), model_nnsight.trace(prompt) as runner: logits2 = model_nnsight.unembed.output.save() logits, cache = hooked_model.run_with_cache(prompt) torch.allclose(logits, logits2)
And here’s the code to convert it to NNsight (Thanks Caden for writing this awhile ago!)