From 7b999287032394b309eb1e9f15d1c9d7cf720c5b Mon Sep 17 00:00:00 2001 From: Christopher Woodall Date: Fri, 24 Feb 2023 16:43:49 -0500 Subject: [PATCH 1/7] adds parameter substitution to `download.sh` Allows `PRESIGNED_URL` and `TARGET_FOLDER` to be declared as environmental variables. --- download.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/download.sh b/download.sh index db520dc..9d5eb9a 100644 --- a/download.sh +++ b/download.sh @@ -1,9 +1,9 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # This software may be used and distributed according to the terms of the GNU General Public License version 3. -PRESIGNED_URL="" # replace with presigned url from email -MODEL_SIZE="7B,13B,30B,65B" # edit this list with the model sizes you wish to download -TARGET_FOLDER="" # where all files should end up +PRESIGNED_URL="${PRESIGNED_URL:-}" # replace with presigned url from email +MODEL_SIZE="7B,13B,30B,65B" # edit this list with the model sizes you wish to download +TARGET_FOLDER="${TARGET_FOLDER:-}" # where all files should end up declare -A N_SHARD_DICT From 1edba784f02d157c46046b8bc6a74ee21657353f Mon Sep 17 00:00:00 2001 From: Christopher Woodall Date: Fri, 24 Feb 2023 16:46:11 -0500 Subject: [PATCH 2/7] update `README.md` --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 5da9190..6504eb3 100755 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# LLaMA +# LLaMA This repository is intended as a minimal, hackable and readable example to load [LLaMA](https://research.facebook.com/publications/llama-open-and-efficient-foundation-language-models/) models and run inference. In order to download the checkpoints and tokenizer, fill this [google form](https://forms.gle/jk851eBVbX1m5TAv5) @@ -16,6 +16,7 @@ pip install -e . ### Download Once your request is approved, you will receive links to download the tokenizer and model files. Edit the `download.sh` script with the signed url provided in the email to download the model weights and tokenizer. +You can also set `PRESIGNED_URL` and `TARGET_FOLDER` using environmental variables. ### Inference The provided `example.py` can be run on a single or multi-gpu node with `torchrun` and will output completions for two pre-defined prompts. Using `TARGET_FOLDER` as defined in `download.sh`: From 68abf87bc8d4b5bc3648b9f72cbaedb44488bf93 Mon Sep 17 00:00:00 2001 From: Christopher Woodall Date: Fri, 24 Feb 2023 16:56:18 -0500 Subject: [PATCH 3/7] update `README.md` verbage --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6504eb3..ad70070 100755 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ pip install -e . ### Download Once your request is approved, you will receive links to download the tokenizer and model files. Edit the `download.sh` script with the signed url provided in the email to download the model weights and tokenizer. -You can also set `PRESIGNED_URL` and `TARGET_FOLDER` using environmental variables. +The download process can also be automated by setting the respective environmental variables (`PRESIGNED_URL` and `TARGET_FOLDER`). ### Inference The provided `example.py` can be run on a single or multi-gpu node with `torchrun` and will output completions for two pre-defined prompts. Using `TARGET_FOLDER` as defined in `download.sh`: From e2baec405074801bfbc370d68d3165423df0baab Mon Sep 17 00:00:00 2001 From: Christopher Woodall Date: Wed, 6 Sep 2023 12:42:37 -0500 Subject: [PATCH 4/7] prompt for input is not set in ENV --- download.sh | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/download.sh b/download.sh index 3fb6adc..815685d 100644 --- a/download.sh +++ b/download.sh @@ -3,9 +3,20 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement. -PRESIGNED_URL="${PRESIGNED_URL:-}" # replace with presigned url from email -MODEL_SIZE="7B,13B,30B,65B" # edit this list with the model sizes you wish to download -TARGET_FOLDER="${TARGET_FOLDER:-}" # where all files should end up +# Check if PRESIGNED_URL environment variable is set +if [ -z "$PRESIGNED_URL" ]; then + read -p "Enter the URL from email: " PRESIGNED_URL +fi + +# Check if MODEL_SIZE environment variable is set +if [ -z "$MODEL_SIZE" ]; then + read -p "Enter the list of models to download without spaces (7B,13B,70B,7B-chat,13B-chat,70B-chat), or press Enter for all: " MODEL_SIZE +fi + +# Check if TARGET_FOLDER environment variable is set +if [ -z "$TARGET_FOLDER" ]; then + TARGET_FOLDER="." # Default target folder +fi mkdir -p ${TARGET_FOLDER} From efbf61611e998946c0d62fca5ce3e7c15e77ea35 Mon Sep 17 00:00:00 2001 From: Christopher Woodall Date: Wed, 6 Sep 2023 15:17:42 -0500 Subject: [PATCH 5/7] Update README.md Co-authored-by: ruanslv --- README.md | 8 -------- 1 file changed, 8 deletions(-) diff --git a/README.md b/README.md index 3ed69e8..10af249 100755 --- a/README.md +++ b/README.md @@ -14,14 +14,6 @@ See [UPDATES.md](UPDATES.md). ⚠️ **7/18: We're aware of people encountering a number of download issues today. Anyone still encountering issues should remove all local files, re-clone the repository, and [request a new download link](https://ai.meta.com/resources/models-and-libraries/llama-downloads/). It's critical to do all of these in case you have local corrupt files. When you receive the email, copy *only* the link text - it should begin with https://download.llamameta.net and not with https://l.facebook.com, which will give errors.** - -### Download -Once your request is approved, you will receive links to download the tokenizer and model files. - -Edit the `download.sh` script with the signed url provided in the email to download the model weights and tokenizer. - -The download process can also be automated by setting the respective environmental variables (`PRESIGNED_URL` and `TARGET_FOLDER`). - In order to download the model weights and tokenizer, please visit the [Meta AI website](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) and accept our License. Once your request is approved, you will receive a signed URL over email. Then run the download.sh script, passing the URL provided when prompted to start the download. Make sure that you copy the URL text itself, **do not use the 'Copy link address' option** when you right click the URL. If the copied URL text starts with: https://download.llamameta.net, you copied it correctly. If the copied URL text starts with: https://l.facebook.com, you copied it the wrong way. From 69c9820b8a603e19592691df08b9dd6d0aae17ed Mon Sep 17 00:00:00 2001 From: Christopher Woodall Date: Wed, 6 Sep 2023 15:21:19 -0500 Subject: [PATCH 6/7] add `download.sh` explaination --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 10af249..9000bbe 100755 --- a/README.md +++ b/README.md @@ -30,11 +30,12 @@ We are also providing downloads on [Hugging Face](https://huggingface.co/meta-ll In a conda env with PyTorch / CUDA available, clone the repo and run in the top-level directory: - ``` pip install -e . ``` +Then to run the script: `./download.sh`. There are optional ENV variables that can be used for automation. + ## Inference Different models require different model-parallel (MP) values: From 2a61ead63722795225d9ac9bc30f01e8012339cd Mon Sep 17 00:00:00 2001 From: Christopher Woodall Date: Wed, 6 Sep 2023 15:23:44 -0500 Subject: [PATCH 7/7] update `download.sh` --- download.sh | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/download.sh b/download.sh index 815685d..a2c7bfd 100644 --- a/download.sh +++ b/download.sh @@ -1,8 +1,10 @@ -#!/bin/bash +#!/usr/bin/env bash # Copyright (c) Meta Platforms, Inc. and affiliates. # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement. +set -e + # Check if PRESIGNED_URL environment variable is set if [ -z "$PRESIGNED_URL" ]; then read -p "Enter the URL from email: " PRESIGNED_URL @@ -31,7 +33,12 @@ wget --continue ${PRESIGNED_URL/'*'/"USE_POLICY.md"} -O ${TARGET_FOLDER}"/USE_PO echo "Downloading tokenizer" wget --continue ${PRESIGNED_URL/'*'/"tokenizer.model"} -O ${TARGET_FOLDER}"/tokenizer.model" wget --continue ${PRESIGNED_URL/'*'/"tokenizer_checklist.chk"} -O ${TARGET_FOLDER}"/tokenizer_checklist.chk" -(cd ${TARGET_FOLDER} && md5sum -c tokenizer_checklist.chk) +CPU_ARCH=$(uname -m) + if [ "$CPU_ARCH" = "arm64" ]; then + (cd ${TARGET_FOLDER} && md5 tokenizer_checklist.chk) + else + (cd ${TARGET_FOLDER} && md5sum -c tokenizer_checklist.chk) + fi for m in ${MODEL_SIZE//,/ } do @@ -66,5 +73,9 @@ do wget --continue ${PRESIGNED_URL/'*'/"${MODEL_PATH}/params.json"} -O ${TARGET_FOLDER}"/${MODEL_PATH}/params.json" wget --continue ${PRESIGNED_URL/'*'/"${MODEL_PATH}/checklist.chk"} -O ${TARGET_FOLDER}"/${MODEL_PATH}/checklist.chk" echo "Checking checksums" - (cd ${TARGET_FOLDER}"/${MODEL_PATH}" && md5sum -c checklist.chk) -done \ No newline at end of file + if [ "$CPU_ARCH" = "arm64" ]; then + (cd ${TARGET_FOLDER}"/${MODEL_PATH}" && md5 checklist.chk) + else + (cd ${TARGET_FOLDER}"/${MODEL_PATH}" && md5sum -c checklist.chk) + fi +done