Merge pull request #754 from JaredLevi18/fix

making a small change to avoid a confusion.
2026-01-15 16:32:54 -03:00 · 2023-09-03 11:11:38 -07:00
parent 7565eb6fee 8580eb9a89
commit 4e2485887f
1 changed files with 2 additions and 0 deletions
--- a/llama/model.py
+++ b/llama/model.py
@@ -448,6 +448,8 @@ class Transformer(nn.Module):
        )

        self.freqs_cis = precompute_freqs_cis(
+            # Note that self.params.max_seq_len is multiplied by 2 because the token limit for the Llama 2 generation of models is 4096. 
+            # Adding this multiplier instead of using 4096 directly allows for dynamism of token lengths while training or fine-tuning.
            self.params.dim // self.params.n_heads, self.params.max_seq_len * 2
        )