From a971c41bde81d74f98bc2c2c451da235f1f1d37c Mon Sep 17 00:00:00 2001
From: Ubuntu <ruanslv@gmail.com>
Date: Fri, 4 Aug 2023 21:35:47 +0000
Subject: [PATCH 1/2] system prompt update

---
 README.md                  |  8 ++++++++
 example_chat_completion.py | 12 +++++++++++-
 llama/generation.py        | 24 +++++++-----------------
 3 files changed, 26 insertions(+), 18 deletions(-)
diff --git a/README.md b/README.md
index fb96fc0..0ebc515 100755
--- a/README.md
+++ b/README.md
@@ -6,6 +6,14 @@ This release includes model weights and starting code for pretrained and fine-tu
 
 This repository is intended as a minimal example to load [Llama 2](https://ai.meta.com/research/publications/llama-2-open-foundation-and-fine-tuned-chat-models/) models and run inference. For more detailed examples leveraging HuggingFace, see [llama-recipes](https://github.com/facebookresearch/llama-recipes/).
 
+## System Prompt Update
+
+### Observed Issue
+We received feedback from the community on our prompt template and we’re providing an update to reduce the false refusal rates seen. False refusals occur when the model incorrectly refuses to answer a question that it should, for example due to overly broad instructions to be cautious in how it provides responses. 
+
+### Updated approach
+Based on evaluation and analysis, we recommend the removal of the system prompt as the default setting.  Pull request # removes the system prompt as the default option, but still provides an example to help enable experimentation for those using it. 
+
 ## Download
 
 ⚠️ **7/18: We're aware of people encountering a number of download issues today. Anyone still encountering issues should remove all local files, re-clone the repository, and [request a new download link](https://ai.meta.com/resources/models-and-libraries/llama-downloads/). It's critical to do all of these in case you have local corrupt files. When you receive the email, copy *only* the link text - it should begin with https://download.llamameta.net and not with https://l.facebook.com, which will give errors.**
diff --git a/example_chat_completion.py b/example_chat_completion.py
index 5043bc5..02583d9 100644
--- a/example_chat_completion.py
+++ b/example_chat_completion.py
@@ -14,7 +14,7 @@ def main(
     temperature: float = 0.6,
     top_p: float = 0.9,
     max_seq_len: int = 512,
-    max_batch_size: int = 4,
+    max_batch_size: int = 8,
     max_gen_len: Optional[int] = None,
 ):
     generator = Llama.build(
@@ -52,6 +52,16 @@ These are just a few of the many attractions that Paris has to offer. With so mu
             },
             {"role": "user", "content": "How to go from Beijing to NY?"},
         ],
+        [
+            {
+                "role": "system",
+                "content": """\
+You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
+
+If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.""",
+            },
+            {"role": "user", "content": "Write a brief birthday message to John"},
+        ],
     ]
     results = generator.chat_completion(
         dialogs,  # type: ignore
diff --git a/llama/generation.py b/llama/generation.py
index 1f37856..200aa0c 100755
--- a/llama/generation.py
+++ b/llama/generation.py
@@ -43,10 +43,6 @@ Dialog = List[Message]
 
 B_INST, E_INST = "[INST]", "[/INST]"
 B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
-DEFAULT_SYSTEM_PROMPT = """\
-You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
-
-If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."""
 
 
 class Llama:
@@ -222,22 +218,16 @@ class Llama:
             max_gen_len = self.model.params.max_seq_len - 1
         prompt_tokens = []
         for dialog in dialogs:
-            if dialog[0]["role"] != "system":
+            if dialog[0]["role"] == "system":
                 dialog = [
                     {
-                        "role": "system",
-                        "content": DEFAULT_SYSTEM_PROMPT,
+                        "role": dialog[1]["role"],
+                        "content": B_SYS
+                        + dialog[0]["content"]
+                        + E_SYS
+                        + dialog[1]["content"],
                     }
-                ] + dialog
-            dialog = [
-                {
-                    "role": dialog[1]["role"],
-                    "content": B_SYS
-                    + dialog[0]["content"]
-                    + E_SYS
-                    + dialog[1]["content"],
-                }
-            ] + dialog[2:]
+                ] + dialog[2:]
             assert all([msg["role"] == "user" for msg in dialog[::2]]) and all(
                 [msg["role"] == "assistant" for msg in dialog[1::2]]
             ), (

From d09f581a06d8f2dffb330ffc7a917d9c4e14f8f0 Mon Sep 17 00:00:00 2001
From: Ubuntu <ruanslv@gmail.com>
Date: Fri, 4 Aug 2023 21:39:13 +0000
Subject: [PATCH 2/2] add PR number

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 0ebc515..0af665e 100755
--- a/README.md
+++ b/README.md
@@ -9,10 +9,10 @@ This repository is intended as a minimal example to load [Llama 2](https://ai.me
 ## System Prompt Update
 
 ### Observed Issue
-We received feedback from the community on our prompt template and we’re providing an update to reduce the false refusal rates seen. False refusals occur when the model incorrectly refuses to answer a question that it should, for example due to overly broad instructions to be cautious in how it provides responses. 
+We received feedback from the community on our prompt template and we are providing an update to reduce the false refusal rates seen. False refusals occur when the model incorrectly refuses to answer a question that it should, for example due to overly broad instructions to be cautious in how it provides responses. 
 
 ### Updated approach
-Based on evaluation and analysis, we recommend the removal of the system prompt as the default setting.  Pull request # removes the system prompt as the default option, but still provides an example to help enable experimentation for those using it. 
+Based on evaluation and analysis, we recommend the removal of the system prompt as the default setting.  Pull request [#626](https://github.com/facebookresearch/llama/pull/626) removes the system prompt as the default option, but still provides an example to help enable experimentation for those using it. 
 
 ## Download