adding convert.py and gitignore to handle model weights and converted npz
This commit is contained in:
5
.gitignore
vendored
5
.gitignore
vendored
@@ -1 +1,4 @@
|
|||||||
.vscode
|
.vscode
|
||||||
|
llama-weights
|
||||||
|
llm-env
|
||||||
|
weights.npz
|
||||||
123
convert.py
Normal file
123
convert.py
Normal file
@@ -0,0 +1,123 @@
|
|||||||
|
import numpy as np
|
||||||
|
from safetensors import safe_open
|
||||||
|
import torch
|
||||||
|
import os
|
||||||
|
|
||||||
|
# -------- CONFIG --------
|
||||||
|
MODEL_DIR = "./llama-weights" # folder containing all safetensors
|
||||||
|
OUT_PATH = "weights.npz"
|
||||||
|
|
||||||
|
DTYPE = torch.float16 # use float16 (recommended) or torch.float32
|
||||||
|
# ------------------------
|
||||||
|
|
||||||
|
# Collect shard files
|
||||||
|
files = sorted([f for f in os.listdir(MODEL_DIR) if f.endswith(".safetensors")])
|
||||||
|
print("Found shards:", files)
|
||||||
|
|
||||||
|
weights = {}
|
||||||
|
|
||||||
|
# -------- MERGE FUNCTION --------
|
||||||
|
def merge_tensor(key, tensor):
|
||||||
|
if key not in weights:
|
||||||
|
weights[key] = tensor
|
||||||
|
else:
|
||||||
|
# Smart merge rules
|
||||||
|
if tensor.ndim == 2:
|
||||||
|
if any(x in key for x in ["o_proj", "down_proj"]):
|
||||||
|
# concatenate along output dimension
|
||||||
|
weights[key] = np.concatenate([weights[key], tensor], axis=1)
|
||||||
|
else:
|
||||||
|
# default
|
||||||
|
weights[key] = np.concatenate([weights[key], tensor], axis=0)
|
||||||
|
|
||||||
|
elif tensor.ndim == 1:
|
||||||
|
# norms / biases → keep first
|
||||||
|
pass
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unknown tensor shape: {key} {tensor.shape}")
|
||||||
|
|
||||||
|
# -------- LOAD SHARDS --------
|
||||||
|
for file in files:
|
||||||
|
path = os.path.join(MODEL_DIR, file)
|
||||||
|
print(f"\nLoading {file}")
|
||||||
|
|
||||||
|
with safe_open(path, framework="pt", device="cpu") as f:
|
||||||
|
for key in f.keys():
|
||||||
|
tensor = f.get_tensor(key)
|
||||||
|
|
||||||
|
# ---- dtype fix (bfloat16 → float16/float32) ----
|
||||||
|
if tensor.dtype == torch.bfloat16:
|
||||||
|
tensor = tensor.to(DTYPE)
|
||||||
|
else:
|
||||||
|
tensor = tensor.to(DTYPE)
|
||||||
|
|
||||||
|
tensor = tensor.numpy()
|
||||||
|
|
||||||
|
merge_tensor(key, tensor)
|
||||||
|
|
||||||
|
# -------- RENAME KEYS --------
|
||||||
|
final_weights = {}
|
||||||
|
|
||||||
|
for key, tensor in weights.items():
|
||||||
|
new_key = key
|
||||||
|
|
||||||
|
# embeddings
|
||||||
|
if key == "model.embed_tokens.weight":
|
||||||
|
new_key = "tok_embeddings"
|
||||||
|
|
||||||
|
# final norm
|
||||||
|
elif key == "model.norm.weight":
|
||||||
|
new_key = "norm"
|
||||||
|
|
||||||
|
# output
|
||||||
|
elif key == "lm_head.weight":
|
||||||
|
new_key = "output"
|
||||||
|
|
||||||
|
# layers
|
||||||
|
elif key.startswith("model.layers"):
|
||||||
|
parts = key.split(".")
|
||||||
|
layer_id = parts[2]
|
||||||
|
|
||||||
|
if "self_attn.q_proj.weight" in key:
|
||||||
|
new_key = f"layers.{layer_id}.attention.wq"
|
||||||
|
|
||||||
|
elif "self_attn.k_proj.weight" in key:
|
||||||
|
new_key = f"layers.{layer_id}.attention.wk"
|
||||||
|
|
||||||
|
elif "self_attn.v_proj.weight" in key:
|
||||||
|
new_key = f"layers.{layer_id}.attention.wv"
|
||||||
|
|
||||||
|
elif "self_attn.o_proj.weight" in key:
|
||||||
|
new_key = f"layers.{layer_id}.attention.wo"
|
||||||
|
|
||||||
|
elif "mlp.gate_proj.weight" in key:
|
||||||
|
new_key = f"layers.{layer_id}.feed_forward.w1"
|
||||||
|
|
||||||
|
elif "mlp.down_proj.weight" in key:
|
||||||
|
new_key = f"layers.{layer_id}.feed_forward.w2"
|
||||||
|
|
||||||
|
elif "mlp.up_proj.weight" in key:
|
||||||
|
new_key = f"layers.{layer_id}.feed_forward.w3"
|
||||||
|
|
||||||
|
elif "input_layernorm.weight" in key:
|
||||||
|
new_key = f"layers.{layer_id}.attention_norm"
|
||||||
|
|
||||||
|
elif "post_attention_layernorm.weight" in key:
|
||||||
|
new_key = f"layers.{layer_id}.ffn_norm"
|
||||||
|
|
||||||
|
else:
|
||||||
|
print("Skipping:", key)
|
||||||
|
continue
|
||||||
|
|
||||||
|
else:
|
||||||
|
print("Skipping:", key)
|
||||||
|
continue
|
||||||
|
|
||||||
|
final_weights[new_key] = tensor
|
||||||
|
print(f"{key} -> {new_key} {tensor.shape}")
|
||||||
|
|
||||||
|
# -------- SAVE --------
|
||||||
|
np.savez(OUT_PATH, **final_weights)
|
||||||
|
|
||||||
|
print(f"\n✅ SUCCESS: Saved merged weights to {OUT_PATH}")
|
||||||
Reference in New Issue
Block a user