From 8432e482de537889291e4d5af286580680ceb519 Mon Sep 17 00:00:00 2001 From: Jared Levi De La Fuente Rodriguez Date: Sun, 3 Sep 2023 10:14:42 -0600 Subject: [PATCH] Update model.py --- llama/model.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llama/model.py b/llama/model.py index 0f43aea..c8b9ecf 100755 --- a/llama/model.py +++ b/llama/model.py @@ -448,7 +448,8 @@ class Transformer(nn.Module): ) self.freqs_cis = precompute_freqs_cis( - # self.params.max_seq_len is multiplied by 2 because the token limit is 4096, so you can't make it bigger than that, and this is why it's set the the maximun (4096). + # Note that self.params.max_seq_len is multiplied by 2 because the token limit for the Llama 2 generation of models is 4096. + #Adding this multiplier instead of using 4096 directly allows for dynamism of token lengths while training or fine-tuning. self.params.dim // self.params.n_heads, self.params.max_seq_len * 2 )