From 144608996a3cc5b971834a77e28d9145df1a4480 Mon Sep 17 00:00:00 2001
From: Jared <djaredlevi@gmail.com>
Date: Sat, 2 Sep 2023 16:23:17 -0600
Subject: [PATCH 1/3] making a small change to avoid a confusion

---
 llama/model.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llama/model.py b/llama/model.py
index 258a7dc..dc2dd50 100755
--- a/llama/model.py
+++ b/llama/model.py
@@ -264,6 +264,7 @@ class Transformer(nn.Module):
         )
 
         self.freqs_cis = precompute_freqs_cis(
+            # self.params.max_seq_len is multiplied by 2 because the token limit is 4096, so you can't make it bigger than that, and this is why it's set the the maximun (4096).
             self.params.dim // self.params.n_heads, self.params.max_seq_len * 2
         )
 

From 8432e482de537889291e4d5af286580680ceb519 Mon Sep 17 00:00:00 2001
From: Jared Levi De La Fuente Rodriguez <jk20042019@gmail.com>
Date: Sun, 3 Sep 2023 10:14:42 -0600
Subject: [PATCH 2/3] Update model.py

---
 llama/model.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llama/model.py b/llama/model.py
index 0f43aea..c8b9ecf 100755
--- a/llama/model.py
+++ b/llama/model.py
@@ -448,7 +448,8 @@ class Transformer(nn.Module):
         )
 
         self.freqs_cis = precompute_freqs_cis(
-            # self.params.max_seq_len is multiplied by 2 because the token limit is 4096, so you can't make it bigger than that, and this is why it's set the the maximun (4096).
+            # Note that self.params.max_seq_len is multiplied by 2 because the token limit for the Llama 2 generation of models is 4096. 
+            #Adding this multiplier instead of using 4096 directly allows for dynamism of token lengths while training or fine-tuning.
             self.params.dim // self.params.n_heads, self.params.max_seq_len * 2
         )
 

From 8580eb9a896bdb82d376b7040d18a2e7b402b10f Mon Sep 17 00:00:00 2001
From: Jared Levi De La Fuente Rodriguez <jk20042019@gmail.com>
Date: Sun, 3 Sep 2023 10:17:32 -0600
Subject: [PATCH 3/3] Update model.py

---
 llama/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llama/model.py b/llama/model.py
index c8b9ecf..770526d 100755
--- a/llama/model.py
+++ b/llama/model.py
@@ -449,7 +449,7 @@ class Transformer(nn.Module):
 
         self.freqs_cis = precompute_freqs_cis(
             # Note that self.params.max_seq_len is multiplied by 2 because the token limit for the Llama 2 generation of models is 4096. 
-            #Adding this multiplier instead of using 4096 directly allows for dynamism of token lengths while training or fine-tuning.
+            # Adding this multiplier instead of using 4096 directly allows for dynamism of token lengths while training or fine-tuning.
             self.params.dim // self.params.n_heads, self.params.max_seq_len * 2
         )