adding convert.py and gitignore to handle model weights and converted npz

This commit is contained in:
2026-04-10 13:56:51 +05:30
parent 51296aa928
commit 04dd88e1ae
2 changed files with 127 additions and 1 deletions

5
.gitignore vendored
View File

@@ -1 +1,4 @@
.vscode
.vscode
llama-weights
llm-env
weights.npz

123
convert.py Normal file
View File

@@ -0,0 +1,123 @@
import numpy as np
from safetensors import safe_open
import torch
import os
# -------- CONFIG --------
MODEL_DIR = "./llama-weights" # folder containing all safetensors
OUT_PATH = "weights.npz"
DTYPE = torch.float16 # use float16 (recommended) or torch.float32
# ------------------------
# Collect shard files
files = sorted([f for f in os.listdir(MODEL_DIR) if f.endswith(".safetensors")])
print("Found shards:", files)
weights = {}
# -------- MERGE FUNCTION --------
def merge_tensor(key, tensor):
if key not in weights:
weights[key] = tensor
else:
# Smart merge rules
if tensor.ndim == 2:
if any(x in key for x in ["o_proj", "down_proj"]):
# concatenate along output dimension
weights[key] = np.concatenate([weights[key], tensor], axis=1)
else:
# default
weights[key] = np.concatenate([weights[key], tensor], axis=0)
elif tensor.ndim == 1:
# norms / biases → keep first
pass
else:
raise ValueError(f"Unknown tensor shape: {key} {tensor.shape}")
# -------- LOAD SHARDS --------
for file in files:
path = os.path.join(MODEL_DIR, file)
print(f"\nLoading {file}")
with safe_open(path, framework="pt", device="cpu") as f:
for key in f.keys():
tensor = f.get_tensor(key)
# ---- dtype fix (bfloat16 → float16/float32) ----
if tensor.dtype == torch.bfloat16:
tensor = tensor.to(DTYPE)
else:
tensor = tensor.to(DTYPE)
tensor = tensor.numpy()
merge_tensor(key, tensor)
# -------- RENAME KEYS --------
final_weights = {}
for key, tensor in weights.items():
new_key = key
# embeddings
if key == "model.embed_tokens.weight":
new_key = "tok_embeddings"
# final norm
elif key == "model.norm.weight":
new_key = "norm"
# output
elif key == "lm_head.weight":
new_key = "output"
# layers
elif key.startswith("model.layers"):
parts = key.split(".")
layer_id = parts[2]
if "self_attn.q_proj.weight" in key:
new_key = f"layers.{layer_id}.attention.wq"
elif "self_attn.k_proj.weight" in key:
new_key = f"layers.{layer_id}.attention.wk"
elif "self_attn.v_proj.weight" in key:
new_key = f"layers.{layer_id}.attention.wv"
elif "self_attn.o_proj.weight" in key:
new_key = f"layers.{layer_id}.attention.wo"
elif "mlp.gate_proj.weight" in key:
new_key = f"layers.{layer_id}.feed_forward.w1"
elif "mlp.down_proj.weight" in key:
new_key = f"layers.{layer_id}.feed_forward.w2"
elif "mlp.up_proj.weight" in key:
new_key = f"layers.{layer_id}.feed_forward.w3"
elif "input_layernorm.weight" in key:
new_key = f"layers.{layer_id}.attention_norm"
elif "post_attention_layernorm.weight" in key:
new_key = f"layers.{layer_id}.ffn_norm"
else:
print("Skipping:", key)
continue
else:
print("Skipping:", key)
continue
final_weights[new_key] = tensor
print(f"{key} -> {new_key} {tensor.shape}")
# -------- SAVE --------
np.savez(OUT_PATH, **final_weights)
print(f"\n✅ SUCCESS: Saved merged weights to {OUT_PATH}")