adding convert.py and gitignore to handle model weights and converted npz
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1 +1,4 @@
|
||||
.vscode
|
||||
llama-weights
|
||||
llm-env
|
||||
weights.npz
|
||||
123
convert.py
Normal file
123
convert.py
Normal file
@@ -0,0 +1,123 @@
|
||||
import numpy as np
|
||||
from safetensors import safe_open
|
||||
import torch
|
||||
import os
|
||||
|
||||
# -------- CONFIG --------
|
||||
MODEL_DIR = "./llama-weights" # folder containing all safetensors
|
||||
OUT_PATH = "weights.npz"
|
||||
|
||||
DTYPE = torch.float16 # use float16 (recommended) or torch.float32
|
||||
# ------------------------
|
||||
|
||||
# Collect shard files
|
||||
files = sorted([f for f in os.listdir(MODEL_DIR) if f.endswith(".safetensors")])
|
||||
print("Found shards:", files)
|
||||
|
||||
weights = {}
|
||||
|
||||
# -------- MERGE FUNCTION --------
|
||||
def merge_tensor(key, tensor):
|
||||
if key not in weights:
|
||||
weights[key] = tensor
|
||||
else:
|
||||
# Smart merge rules
|
||||
if tensor.ndim == 2:
|
||||
if any(x in key for x in ["o_proj", "down_proj"]):
|
||||
# concatenate along output dimension
|
||||
weights[key] = np.concatenate([weights[key], tensor], axis=1)
|
||||
else:
|
||||
# default
|
||||
weights[key] = np.concatenate([weights[key], tensor], axis=0)
|
||||
|
||||
elif tensor.ndim == 1:
|
||||
# norms / biases → keep first
|
||||
pass
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unknown tensor shape: {key} {tensor.shape}")
|
||||
|
||||
# -------- LOAD SHARDS --------
|
||||
for file in files:
|
||||
path = os.path.join(MODEL_DIR, file)
|
||||
print(f"\nLoading {file}")
|
||||
|
||||
with safe_open(path, framework="pt", device="cpu") as f:
|
||||
for key in f.keys():
|
||||
tensor = f.get_tensor(key)
|
||||
|
||||
# ---- dtype fix (bfloat16 → float16/float32) ----
|
||||
if tensor.dtype == torch.bfloat16:
|
||||
tensor = tensor.to(DTYPE)
|
||||
else:
|
||||
tensor = tensor.to(DTYPE)
|
||||
|
||||
tensor = tensor.numpy()
|
||||
|
||||
merge_tensor(key, tensor)
|
||||
|
||||
# -------- RENAME KEYS --------
|
||||
final_weights = {}
|
||||
|
||||
for key, tensor in weights.items():
|
||||
new_key = key
|
||||
|
||||
# embeddings
|
||||
if key == "model.embed_tokens.weight":
|
||||
new_key = "tok_embeddings"
|
||||
|
||||
# final norm
|
||||
elif key == "model.norm.weight":
|
||||
new_key = "norm"
|
||||
|
||||
# output
|
||||
elif key == "lm_head.weight":
|
||||
new_key = "output"
|
||||
|
||||
# layers
|
||||
elif key.startswith("model.layers"):
|
||||
parts = key.split(".")
|
||||
layer_id = parts[2]
|
||||
|
||||
if "self_attn.q_proj.weight" in key:
|
||||
new_key = f"layers.{layer_id}.attention.wq"
|
||||
|
||||
elif "self_attn.k_proj.weight" in key:
|
||||
new_key = f"layers.{layer_id}.attention.wk"
|
||||
|
||||
elif "self_attn.v_proj.weight" in key:
|
||||
new_key = f"layers.{layer_id}.attention.wv"
|
||||
|
||||
elif "self_attn.o_proj.weight" in key:
|
||||
new_key = f"layers.{layer_id}.attention.wo"
|
||||
|
||||
elif "mlp.gate_proj.weight" in key:
|
||||
new_key = f"layers.{layer_id}.feed_forward.w1"
|
||||
|
||||
elif "mlp.down_proj.weight" in key:
|
||||
new_key = f"layers.{layer_id}.feed_forward.w2"
|
||||
|
||||
elif "mlp.up_proj.weight" in key:
|
||||
new_key = f"layers.{layer_id}.feed_forward.w3"
|
||||
|
||||
elif "input_layernorm.weight" in key:
|
||||
new_key = f"layers.{layer_id}.attention_norm"
|
||||
|
||||
elif "post_attention_layernorm.weight" in key:
|
||||
new_key = f"layers.{layer_id}.ffn_norm"
|
||||
|
||||
else:
|
||||
print("Skipping:", key)
|
||||
continue
|
||||
|
||||
else:
|
||||
print("Skipping:", key)
|
||||
continue
|
||||
|
||||
final_weights[new_key] = tensor
|
||||
print(f"{key} -> {new_key} {tensor.shape}")
|
||||
|
||||
# -------- SAVE --------
|
||||
np.savez(OUT_PATH, **final_weights)
|
||||
|
||||
print(f"\n✅ SUCCESS: Saved merged weights to {OUT_PATH}")
|
||||
Reference in New Issue
Block a user