adding convert.py and gitignore to handle model weights and converted npz

2026-04-10 13:56:51 +05:30
parent 51296aa928
commit 04dd88e1ae
2 changed files with 127 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,4 @@
-.vscode
+.vscode
 llama-weights
 llm-env
 weights.npz
--- a/convert.py
+++ b/convert.py
@@ -0,0 +1,123 @@
 import numpy as np
 from safetensors import safe_open
 import torch
 import os
 # -------- CONFIG --------
 MODEL_DIR = "./llama-weights"   # folder containing all safetensors
 OUT_PATH = "weights.npz"
 DTYPE = torch.float16   # use float16 (recommended) or torch.float32
 # ------------------------
 # Collect shard files
 files = sorted([f for f in os.listdir(MODEL_DIR) if f.endswith(".safetensors")])
 print("Found shards:", files)
 weights = {}
 # -------- MERGE FUNCTION --------
 def merge_tensor(key, tensor):
    if key not in weights:
        weights[key] = tensor
    else:
        # Smart merge rules
        if tensor.ndim == 2:
            if any(x in key for x in ["o_proj", "down_proj"]):
                # concatenate along output dimension
                weights[key] = np.concatenate([weights[key], tensor], axis=1)
            else:
                # default
                weights[key] = np.concatenate([weights[key], tensor], axis=0)
        elif tensor.ndim == 1:
            # norms / biases → keep first
            pass
        else:
            raise ValueError(f"Unknown tensor shape: {key} {tensor.shape}")
 # -------- LOAD SHARDS --------
 for file in files:
    path = os.path.join(MODEL_DIR, file)
    print(f"\nLoading {file}")
    with safe_open(path, framework="pt", device="cpu") as f:
        for key in f.keys():
            tensor = f.get_tensor(key)
            # ---- dtype fix (bfloat16 → float16/float32) ----
            if tensor.dtype == torch.bfloat16:
                tensor = tensor.to(DTYPE)
            else:
                tensor = tensor.to(DTYPE)
            tensor = tensor.numpy()
            merge_tensor(key, tensor)
 # -------- RENAME KEYS --------
 final_weights = {}
 for key, tensor in weights.items():
    new_key = key
    # embeddings
    if key == "model.embed_tokens.weight":
        new_key = "tok_embeddings"
    # final norm
    elif key == "model.norm.weight":
        new_key = "norm"
    # output
    elif key == "lm_head.weight":
        new_key = "output"
    # layers
    elif key.startswith("model.layers"):
        parts = key.split(".")
        layer_id = parts[2]
        if "self_attn.q_proj.weight" in key:
            new_key = f"layers.{layer_id}.attention.wq"
        elif "self_attn.k_proj.weight" in key:
            new_key = f"layers.{layer_id}.attention.wk"
        elif "self_attn.v_proj.weight" in key:
            new_key = f"layers.{layer_id}.attention.wv"
        elif "self_attn.o_proj.weight" in key:
            new_key = f"layers.{layer_id}.attention.wo"
        elif "mlp.gate_proj.weight" in key:
            new_key = f"layers.{layer_id}.feed_forward.w1"
        elif "mlp.down_proj.weight" in key:
            new_key = f"layers.{layer_id}.feed_forward.w2"
        elif "mlp.up_proj.weight" in key:
            new_key = f"layers.{layer_id}.feed_forward.w3"
        elif "input_layernorm.weight" in key:
            new_key = f"layers.{layer_id}.attention_norm"
        elif "post_attention_layernorm.weight" in key:
            new_key = f"layers.{layer_id}.ffn_norm"
        else:
            print("Skipping:", key)
            continue
    else:
        print("Skipping:", key)
        continue
    final_weights[new_key] = tensor
    print(f"{key} -> {new_key} {tensor.shape}")
 # -------- SAVE --------
 np.savez(OUT_PATH, **final_weights)
 print(f"\n✅ SUCCESS: Saved merged weights to {OUT_PATH}")