mirror of
https://github.com/meta-llama/llama.git
synced 2026-01-15 16:32:54 -03:00
Merge pull request #754 from JaredLevi18/fix
making a small change to avoid a confusion.
This commit is contained in:
@@ -448,6 +448,8 @@ class Transformer(nn.Module):
|
||||
)
|
||||
|
||||
self.freqs_cis = precompute_freqs_cis(
|
||||
# Note that self.params.max_seq_len is multiplied by 2 because the token limit for the Llama 2 generation of models is 4096.
|
||||
# Adding this multiplier instead of using 4096 directly allows for dynamism of token lengths while training or fine-tuning.
|
||||
self.params.dim // self.params.n_heads, self.params.max_seq_len * 2
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user