3 ani în urmă · c39da83e2b
--- a/README.md
+++ b/README.md
@@ -26,7 +26,7 @@ pip install -r requirements.txt
 
															 ### Inference (`generate.py`)
														
 
															-This file reads the foundation model from the Huggingface model hub and the LoRA weights from `tloen/alpaca-lora-7b`, and runs inference on a specified input. Users should treat this as example code for the use of the model, and modify it as needed.
														
 
															+This file reads the foundation model from the Huggingface model hub and the LoRA weights from `tloen/alpaca-lora-7b`, and runs a Gradio interface for inference on a specified input. Users should treat this as example code for the use of the model, and modify it as needed.
														
 
															 ### Training (`finetune.py`)
														
--- a/generate.py
+++ b/generate.py
@@ -1,6 +1,7 @@
 
															 import torch
														
 
															 from peft import PeftModel
														
 
															 import transformers
														
 
															+import gradio as gr
														
 
															 assert (
														
 
															     "LlamaTokenizer" in transformers._import_structure["models.llama"]
														
@@ -43,28 +44,62 @@ def generate_prompt(instruction, input=None):
 
															 model.eval()
														
 
															-def evaluate(instruction, input=None, **kwargs):
														
 
															+def evaluate(
														
 
															+    instruction,
														
 
															+    temperature=0.1,
														
 
															+    top_p=0.75,
														
 
															+    top_k=40,
														
 
															+    num_beams=4,
														
 
															+    input=None,
														
 
															+    **kwargs,
														
 
															+):
														
 
															     prompt = generate_prompt(instruction, input)
														
 
															     inputs = tokenizer(prompt, return_tensors="pt")
														
 
															     input_ids = inputs["input_ids"].cuda()
														
 
															     generation_config = GenerationConfig(
														
 
															-        temperature=0.1,
														
 
															-        top_p=0.75,
														
 
															-        num_beams=4,
														
 
															+        temperature=temperature,
														
 
															+        top_p=top_p,
														
 
															+        top_k=top_k,
														
 
															+        num_beams=num_beams,
														
 
															         **kwargs,
														
 
															     )
														
 
															-    generation_output = model.generate(
														
 
															-        input_ids=input_ids,
														
 
															-        generation_config=generation_config,
														
 
															-        return_dict_in_generate=True,
														
 
															-        output_scores=True,
														
 
															-        max_new_tokens=2048,
														
 
															-    )
														
 
															+    with torch.no_grad():
														
 
															+        generation_output = model.generate(
														
 
															+            input_ids=input_ids,
														
 
															+            generation_config=generation_config,
														
 
															+            return_dict_in_generate=True,
														
 
															+            output_scores=True,
														
 
															+            max_new_tokens=2048,
														
 
															+        )
														
 
															     s = generation_output.sequences[0]
														
 
															     output = tokenizer.decode(s)
														
 
															     return output.split("### Response:")[1].strip()
														
 
															+gr.Interface(
														
 
															+    fn=evaluate,
														
 
															+    inputs=[
														
 
															+        gr.components.Textbox(
														
 
															+            lines=2, label="Instruction", placeholder="Tell me about alpacas."
														
 
															+        ),
														
 
															+        gr.components.Slider(minimum=0, maximum=1, value=0.1, label="Temperature"),
														
 
															+        gr.components.Slider(minimum=0, maximum=1, value=0.75, label="Top p"),
														
 
															+        gr.components.Slider(minimum=0, maximum=100, step=1, value=40, label="Top k"),
														
 
															+        gr.components.Slider(minimum=0, maximum=4, step=1, value=4, label="Beams"),
														
 
															+    ],
														
 
															+    outputs=[
														
 
															+        gr.inputs.Textbox(
														
 
															+            lines=5,
														
 
															+            label="Output",
														
 
															+        )
														
 
															+    ],
														
 
															+    title="🦙🌲 Alpaca-LoRA",
														
 
															+    description="Alpaca-LoRA is a 7B-parameter LLaMA model finetuned to follow instructions. It is trained on the [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca) dataset and makes use of the Huggingface LLaMA implementation. For more information, please visit [the project's website](https://github.com/tloen/alpaca-lora).",
														
 
															+).launch(share=True)
														
 
															+
														
 
															+# Old testing code follows.
														
 
															+
														
 
															+"""
														
 
															 if __name__ == "__main__":
														
 
															     # testing code for readme
														
 
															     for instruction in [
														
@@ -81,3 +116,4 @@ if __name__ == "__main__":
 
															         print("Instruction:", instruction)
														
 
															         print("Response:", evaluate(instruction))
														
 
															         print()
														
 
															+"""
														
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,4 +4,5 @@ sentencepiece
 
															 git+https://github.com/huggingface/transformers.git
														
 
															 accelerate
														
 
															 bitsandbytes
														
 
															-git+https://github.com/huggingface/peft.git
														
 
															+git+https://github.com/huggingface/peft.git
														
 
															+gradio