Download checkpoint from HuggingFace

jackzhxng · jackzhxng · commit e38c4ec8cc24 · 2025-03-24T12:36:14.000-07:00
diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py
@@ -98,6 +98,10 @@
     "phi_4_mini",
 ]
 TORCHTUNE_DEFINED_MODELS = ["llama3_2_vision"]
+HUGGING_FACE_REPO_IDS = {
+    "qwen2_5": "Qwen/Qwen2.5-1.5B",
+    "phi_4_mini": "microsoft/Phi-4-mini-instruct",
+}
 
 
 class WeightType(Enum):
@@ -519,7 +523,53 @@ def canonical_path(path: Union[str, Path], *, dir: bool = False) -> str:
         return return_val
 
 
+def download_and_convert_hf_checkpoint(modelname: str) -> str:
+    """
+    Downloads and converts to Meta format a HuggingFace checkpoint.
+    """
+    # Build cache path.
+    cache_subdir = "meta_checkpoints"
+    cache_dir = Path.home() / ".cache" / cache_subdir
+    cache_dir.mkdir(parents=True, exist_ok=True)
+
+    # Use repo name to name the converted file.
+    repo_id = HUGGING_FACE_REPO_IDS[modelname]
+    model_name = repo_id.replace(
+        "/", "_"
+    )
+    converted_path = cache_dir / f"{model_name}.pth"
+
+    if converted_path.exists():
+        print(f"✔ Using cached converted model: {converted_path}")
+        return converted_path
+
+    # 1. Download weights from Hugging Face.
+    print("⬇ Downloading and converting checkpoint...")
+    from huggingface_hub import snapshot_download
+
+    checkpoint_path = snapshot_download(
+        repo_id=repo_id,
+    )
+
+    # 2. Convert weights to Meta format.
+    if modelname == "qwen2_5":
+        from executorch.examples.models.qwen2_5 import convert_weights
+
+        convert_weights(checkpoint_path, converted_path)
+    elif modelname == "phi_4_mini":
+        from executorch.examples.models.phi_4_mini import convert_weights
+
+        convert_weights(checkpoint_path, converted_path)
+    elif modelname == "smollm2":
+        pass
+
+    return converted_path
+
+
 def export_llama(args) -> str:
+    if not args.checkpoint and args.model in HUGGING_FACE_REPO_IDS:
+        args.checkpoint = download_and_convert_hf_checkpoint(args.model)
+
     if args.profile_path is not None:
         try:
             from executorch.util.python_profiler import CProfilerFlameGraph
diff --git a/examples/models/llama/install_requirements.sh b/examples/models/llama/install_requirements.sh
@@ -10,7 +10,7 @@
 # Install tokenizers for hf .json tokenizer.
 # Install snakeviz for cProfile flamegraph
 # Install lm-eval for Model Evaluation with lm-evalution-harness.
-pip install tiktoken sentencepiece tokenizers snakeviz lm_eval==0.4.5 blobfile
+pip install tiktoken torchtune sentencepiece tokenizers snakeviz lm_eval==0.4.5 blobfile
 
 # Call the install helper for further setup
 python examples/models/llama/install_requirement_helper.py
diff --git a/examples/models/phi_4_mini/__init__.py b/examples/models/phi_4_mini/__init__.py
@@ -2,6 +2,7 @@
 # LICENSE file in the root directory of this source tree.
 
 from executorch.examples.models.llama.model import Llama2Model
+from executorch.examples.models.phi_4_mini.convert_weights import convert_weights
 
 
 class Phi4MiniModel(Llama2Model):
@@ -11,4 +12,5 @@ def __init__(self, **kwargs):
 
 __all__ = [
     "Phi4MiniModel",
+    "convert_weights",
 ]
diff --git a/examples/models/phi_4_mini/convert_weights.py b/examples/models/phi_4_mini/convert_weights.py
@@ -51,37 +51,40 @@ def phi_4_tune_to_meta(state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.T
     return converted_state_dict
 
 
-def main():
-    parser = argparse.ArgumentParser(
-        description="Convert Phi-4-mini weights to Meta format."
-    )
-    parser.add_argument(
-        "input_dir",
-        type=str,
-        help="Path to directory containing checkpoint files",
-    )
-    parser.add_argument("output", type=str, help="Path to the output checkpoint")
-
-    args = parser.parse_args()
-
+def convert_weights(input_dir: str, output_file: str) -> None:
+    # Don't necessarily need to use TorchTune checkpointer, can just aggregate checkpoint files by ourselves.
     checkpointer = FullModelHFCheckpointer(
-        checkpoint_dir=args.input_dir,
+        checkpoint_dir=input_dir,
         checkpoint_files=[
             "model-00001-of-00002.safetensors",
             "model-00002-of-00002.safetensors",
         ],
         output_dir=".",
-        model_type="PHI3_MINI",
+        model_type="PHI4",
     )
 
     print("Loading checkpoint...")
     sd = checkpointer.load_checkpoint()
-
     print("Converting checkpoint...")
     sd = phi_4_tune_to_meta(sd["model"])
+    print("Saving checkpoint...")
+    torch.save(sd, output_file)
+    print("Done.")
 
-    torch.save(sd, args.output)
-    print(f"Checkpoint saved to {args.output}")
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Convert Phi-4-mini weights to Meta format."
+    )
+    parser.add_argument(
+        "input_dir",
+        type=str,
+        help="Path to directory containing checkpoint files",
+    )
+    parser.add_argument("output", type=str, help="Path to the output checkpoint")
+
+    args = parser.parse_args()
+    convert_weights(args.input_dir, args.output)
 
 
 if __name__ == "__main__":
diff --git a/examples/models/qwen2_5/__init__.py b/examples/models/qwen2_5/__init__.py
@@ -1,7 +1,8 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-from executorch.example.models.llama.model import Llama2Model
+from executorch.examples.models.llama.model import Llama2Model
+from executorch.examples.models.qwen2_5.convert_weights import convert_weights
 
 
 class Qwen2_5Model(Llama2Model):
@@ -11,4 +12,5 @@ def __init__(self, **kwargs):
 
 __all__ = [
     "Qwen2_5Model",
+    "convert_weights",
 ]
diff --git a/examples/models/qwen2_5/convert_weights.py b/examples/models/qwen2_5/convert_weights.py
@@ -53,35 +53,37 @@ def qwen_2_tune_to_meta(state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.
     return converted_state_dict
 
 
-def main():
-    parser = argparse.ArgumentParser(
-        description="Convert Qwen2 weights to Meta format."
-    )
-    parser.add_argument(
-        "input_dir",
-        type=str,
-        help="Path to directory containing checkpoint files",
-    )
-    parser.add_argument("output", type=str, help="Path to the output checkpoint")
-
-    args = parser.parse_args()
-
+def convert_weights(input_dir: str, output_file: str) -> None:
     # Don't necessarily need to use TorchTune checkpointer, can just aggregate checkpoint files by ourselves.
     checkpointer = FullModelHFCheckpointer(
-        checkpoint_dir=args.input_dir,
+        checkpoint_dir=input_dir,
         checkpoint_files=["model.safetensors"],
         output_dir=".",
         model_type="QWEN2",
     )
 
     print("Loading checkpoint...")
     sd = checkpointer.load_checkpoint()
-
     print("Converting checkpoint...")
     sd = qwen_2_tune_to_meta(sd["model"])
+    print("Saving checkpoint...")
+    torch.save(sd, output_file)
+    print("Done.")
 
-    torch.save(sd, args.output)
-    print(f"Checkpoint saved to {args.output}")
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Convert Qwen2 weights to Meta format."
+    )
+    parser.add_argument(
+        "input_dir",
+        type=str,
+        help="Path to directory containing checkpoint files",
+    )
+    parser.add_argument("output", type=str, help="Path to the output checkpoint")
+
+    args = parser.parse_args()
+    convert_weights(args.input_dir, args.output)
 
 
 if __name__ == "__main__":

Original file line number	Diff line number	Diff line change
`@@ -2,6 +2,7 @@`
`2`	`2`	`# LICENSE file in the root directory of this source tree.`
`3`	`3`
`4`	`4`	`from executorch.examples.models.llama.model import Llama2Model`
	`5`	`+from executorch.examples.models.phi_4_mini.convert_weights import convert_weights`
`5`	`6`
`6`	`7`
`7`	`8`	`class Phi4MiniModel(Llama2Model):`
`@@ -11,4 +12,5 @@ def __init__(self, **kwargs):`
`11`	`12`
`12`	`13`	`__all__ = [`
`13`	`14`	`"Phi4MiniModel",`
	`15`	`+ "convert_weights",`
`14`	`16`	`]`