3 gadi atpakaļ · c39da83e2b
--- a/README.md
+++ b/README.md
@@ -26,7 +26,7 @@ pip install -r requirements.txt
 
				 
			
 
				 ### Inference (`generate.py`)
			
 
				 
			
 
				-This file reads the foundation model from the Huggingface model hub and the LoRA weights from `tloen/alpaca-lora-7b`, and runs inference on a specified input. Users should treat this as example code for the use of the model, and modify it as needed.
			
 
				+This file reads the foundation model from the Huggingface model hub and the LoRA weights from `tloen/alpaca-lora-7b`, and runs a Gradio interface for inference on a specified input. Users should treat this as example code for the use of the model, and modify it as needed.
			
 
				 
			
 
				 ### Training (`finetune.py`)
			
 
				 
			
--- a/generate.py
+++ b/generate.py
@@ -1,6 +1,7 @@
 
				 import torch
			
 
				 from peft import PeftModel
			
 
				 import transformers
			
 
				+import gradio as gr
			
 
				 
			
 
				 assert (
			
 
				     "LlamaTokenizer" in transformers._import_structure["models.llama"]
			
@@ -43,28 +44,62 @@ def generate_prompt(instruction, input=None):
 
				 model.eval()
			
 
				 
			
 
				 
			
 
				-def evaluate(instruction, input=None, **kwargs):
			
 
				+def evaluate(
			
 
				+    instruction,
			
 
				+    temperature=0.1,
			
 
				+    top_p=0.75,
			
 
				+    top_k=40,
			
 
				+    num_beams=4,
			
 
				+    input=None,
			
 
				+    **kwargs,
			
 
				+):
			
 
				     prompt = generate_prompt(instruction, input)
			
 
				     inputs = tokenizer(prompt, return_tensors="pt")
			
 
				     input_ids = inputs["input_ids"].cuda()
			
 
				     generation_config = GenerationConfig(
			
 
				-        temperature=0.1,
			
 
				-        top_p=0.75,
			
 
				-        num_beams=4,
			
 
				+        temperature=temperature,
			
 
				+        top_p=top_p,
			
 
				+        top_k=top_k,
			
 
				+        num_beams=num_beams,
			
 
				         **kwargs,
			
 
				     )
			
 
				-    generation_output = model.generate(
			
 
				-        input_ids=input_ids,
			
 
				-        generation_config=generation_config,
			
 
				-        return_dict_in_generate=True,
			
 
				-        output_scores=True,
			
 
				-        max_new_tokens=2048,
			
 
				-    )
			
 
				+    with torch.no_grad():
			
 
				+        generation_output = model.generate(
			
 
				+            input_ids=input_ids,
			
 
				+            generation_config=generation_config,
			
 
				+            return_dict_in_generate=True,
			
 
				+            output_scores=True,
			
 
				+            max_new_tokens=2048,
			
 
				+        )
			
 
				     s = generation_output.sequences[0]
			
 
				     output = tokenizer.decode(s)
			
 
				     return output.split("### Response:")[1].strip()
			
 
				 
			
 
				 
			
 
				+gr.Interface(
			
 
				+    fn=evaluate,
			
 
				+    inputs=[
			
 
				+        gr.components.Textbox(
			
 
				+            lines=2, label="Instruction", placeholder="Tell me about alpacas."
			
 
				+        ),
			
 
				+        gr.components.Slider(minimum=0, maximum=1, value=0.1, label="Temperature"),
			
 
				+        gr.components.Slider(minimum=0, maximum=1, value=0.75, label="Top p"),
			
 
				+        gr.components.Slider(minimum=0, maximum=100, step=1, value=40, label="Top k"),
			
 
				+        gr.components.Slider(minimum=0, maximum=4, step=1, value=4, label="Beams"),
			
 
				+    ],
			
 
				+    outputs=[
			
 
				+        gr.inputs.Textbox(
			
 
				+            lines=5,
			
 
				+            label="Output",
			
 
				+        )
			
 
				+    ],
			
 
				+    title="🦙🌲 Alpaca-LoRA",
			
 
				+    description="Alpaca-LoRA is a 7B-parameter LLaMA model finetuned to follow instructions. It is trained on the [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca) dataset and makes use of the Huggingface LLaMA implementation. For more information, please visit [the project's website](https://github.com/tloen/alpaca-lora).",
			
 
				+).launch(share=True)
			
 
				+
			
 
				+# Old testing code follows.
			
 
				+
			
 
				+"""
			
 
				 if __name__ == "__main__":
			
 
				     # testing code for readme
			
 
				     for instruction in [
			
@@ -81,3 +116,4 @@ if __name__ == "__main__":
 
				         print("Instruction:", instruction)
			
 
				         print("Response:", evaluate(instruction))
			
 
				         print()
			
 
				+"""
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,4 +4,5 @@ sentencepiece
 
				 git+https://github.com/huggingface/transformers.git
			
 
				 accelerate
			
 
				 bitsandbytes
			
 
				-git+https://github.com/huggingface/peft.git
			
 
				+git+https://github.com/huggingface/peft.git
			
 
				+gradio