import numpy as np from safetensors import safe_open import torch import os # -------- CONFIG -------- MODEL_DIR = "./llama-weights" # folder containing all safetensors OUT_PATH = "weights.npz" DTYPE = torch.float16 # use float16 (recommended) or torch.float32 # ------------------------ # Collect shard files files = sorted([f for f in os.listdir(MODEL_DIR) if f.endswith(".safetensors")]) print("Found shards:", files) weights = {} # -------- MERGE FUNCTION -------- def merge_tensor(key, tensor): if key not in weights: weights[key] = tensor else: # Smart merge rules if tensor.ndim == 2: if any(x in key for x in ["o_proj", "down_proj"]): # concatenate along output dimension weights[key] = np.concatenate([weights[key], tensor], axis=1) else: # default weights[key] = np.concatenate([weights[key], tensor], axis=0) elif tensor.ndim == 1: # norms / biases → keep first pass else: raise ValueError(f"Unknown tensor shape: {key} {tensor.shape}") # -------- LOAD SHARDS -------- for file in files: path = os.path.join(MODEL_DIR, file) print(f"\nLoading {file}") with safe_open(path, framework="pt", device="cpu") as f: for key in f.keys(): tensor = f.get_tensor(key) # ---- dtype fix (bfloat16 → float16/float32) ---- if tensor.dtype == torch.bfloat16: tensor = tensor.to(DTYPE) else: tensor = tensor.to(DTYPE) tensor = tensor.numpy() merge_tensor(key, tensor) # -------- RENAME KEYS -------- final_weights = {} for key, tensor in weights.items(): new_key = key # embeddings if key == "model.embed_tokens.weight": new_key = "tok_embeddings" # final norm elif key == "model.norm.weight": new_key = "norm" # output elif key == "lm_head.weight": new_key = "output" # layers elif key.startswith("model.layers"): parts = key.split(".") layer_id = parts[2] if "self_attn.q_proj.weight" in key: new_key = f"layers.{layer_id}.attention.wq" elif "self_attn.k_proj.weight" in key: new_key = f"layers.{layer_id}.attention.wk" elif "self_attn.v_proj.weight" in key: new_key = f"layers.{layer_id}.attention.wv" elif "self_attn.o_proj.weight" in key: new_key = f"layers.{layer_id}.attention.wo" elif "mlp.gate_proj.weight" in key: new_key = f"layers.{layer_id}.feed_forward.w1" elif "mlp.down_proj.weight" in key: new_key = f"layers.{layer_id}.feed_forward.w2" elif "mlp.up_proj.weight" in key: new_key = f"layers.{layer_id}.feed_forward.w3" elif "input_layernorm.weight" in key: new_key = f"layers.{layer_id}.attention_norm" elif "post_attention_layernorm.weight" in key: new_key = f"layers.{layer_id}.ffn_norm" else: print("Skipping:", key) continue else: print("Skipping:", key) continue final_weights[new_key] = tensor print(f"{key} -> {new_key} {tensor.shape}") # -------- SAVE -------- np.savez(OUT_PATH, **final_weights) print(f"\nāœ… SUCCESS: Saved merged weights to {OUT_PATH}")