hace 3 años · 357ec81a17
--- a/README.md
+++ b/README.md
@@ -1,34 +1,25 @@
 
				-# alpaca-lora
			
 
				+# alpaca-lora (WIP)
			
 
				 
			
 
				-This repository contains code for reproducing the Stanford Alpaca results. Users will need to have LLaMA weights on hand and be ready to fork `transformers`.
			
 
				+This repository contains code for reproducing the [Stanford Alpaca results](https://github.com/tatsu-lab/stanford_alpaca#data-release). Users will need to be ready to fork `transformers`.
			
 
				 
			
 
				+# Setup
			
 
				 
			
 
				-1. Install dependencies
			
 
				+1. Install dependencies (**install zphang's transformers fork**)
			
 
				 
			
 
				 ```
			
 
				-pip install -q bitsandbytes datasets accelerate loralib
			
 
				+pip install -q datasets accelerate loralib sentencepiece
			
 
				 
			
 
				 pip install -q git+https://github.com/zphang/transformers@llama_push
			
 
				-pip install -q git+https://github.com/huggingface/peft.git\
			
 
				+pip install -q git+https://github.com/huggingface/peft.git
			
 
				 ```
			
 
				 
			
 
				-2. Convert weights
			
 
				+2. [Install bitsandbytes from source](https://github.com/TimDettmers/bitsandbytes/blob/main/compile_from_source.md)
			
 
				 
			
 
				-```
			
 
				-python conversion.py --input_dir [LLAMA_DIR]/LLaMA --model_size 7B --output_dir ./7B
			
 
				-```
			
 
				 
			
 
				-3. Modify hyperparams in `finetune.py`
			
 
				+# Inference
			
 
				 
			
 
				-```
			
 
				-MICRO_BATCH_SIZE = 12
			
 
				-BATCH_SIZE = 36
			
 
				-EPOCHS = 3
			
 
				-LEARNING_RATE = 2e-5
			
 
				-```
			
 
				+See `generate.py`.
			
 
				 
			
 
				-4. Run experiments
			
 
				+# Training
			
 
				 
			
 
				-```
			
 
				-python finetune.py
			
 
				-```
			
 
				+Under construction.
			
--- a/finetune.py
+++ b/finetune.py
@@ -10,32 +10,14 @@ from transformers import AutoTokenizer, AutoConfig, LLaMAForCausalLM, LLaMAToken
 
				 from peft import prepare_model_for_int8_training, LoraConfig, get_peft_model
			
 
				 
			
 
				 model = LLaMAForCausalLM.from_pretrained(
			
 
				-    "./7B/llama-7b",
			
 
				+    "decapoda-research/llama-7b-hf",
			
 
				     load_in_8bit=True,
			
 
				-    max_sequence_length=128,  # data length
			
 
				     device_map="auto",
			
 
				 )
			
 
				 
			
 
				 
			
 
				-tokenizer = LLaMATokenizer.from_pretrained("./7B/tokenizer")
			
 
				+tokenizer = LLaMATokenizer.from_pretrained("decapoda-research/llama-7b-hf")
			
 
				 
			
 
				-
			
 
				-def print_trainable_parameters(model):
			
 
				-    """
			
 
				-    Prints the number of trainable parameters in the model.
			
 
				-    """
			
 
				-    trainable_params = 0
			
 
				-    all_param = 0
			
 
				-    for _, param in model.named_parameters():
			
 
				-        all_param += param.numel()
			
 
				-        if param.requires_grad:
			
 
				-            trainable_params += param.numel()
			
 
				-    print(
			
 
				-        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
			
 
				-    )
			
 
				-
			
 
				-
			
 
				-print_trainable_parameters(model)
			
 
				 model = prepare_model_for_int8_training(model)
			
 
				 
			
 
				 config = LoraConfig(
			
@@ -48,8 +30,6 @@ config = LoraConfig(
 
				 )
			
 
				 model = get_peft_model(model, config)
			
 
				 
			
 
				-print_trainable_parameters(model)
			
 
				-
			
 
				 tokenizer.pad_token = tokenizer.eos_token
			
 
				 tokenizer.pad_token_id = tokenizer.eos_token_id
			
 
				 
			
@@ -77,21 +57,23 @@ def generate_prompt(data_point):
 
				 ### Response:"""
			
 
				 
			
 
				 
			
 
				+# optimized for RTX 4090.
			
 
				+MICRO_BATCH_SIZE = 12
			
 
				+BATCH_SIZE = 36
			
 
				+GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
			
 
				+EPOCHS = 1
			
 
				+LEARNING_RATE = 2e-5
			
 
				+CUTOFF_LEN = 128
			
 
				+
			
 
				 data = data.map(
			
 
				     lambda data_point: tokenizer(
			
 
				         generate_prompt(data_point),
			
 
				         truncation=True,
			
 
				-        max_length=128,
			
 
				+        max_length=CUTOFF_LEN,
			
 
				         padding="max_length",
			
 
				     )
			
 
				 )
			
 
				 
			
 
				-MICRO_BATCH_SIZE = 12
			
 
				-BATCH_SIZE = 36
			
 
				-GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
			
 
				-EPOCHS = 3
			
 
				-LEARNING_RATE = 2e-5
			
 
				-
			
 
				 
			
 
				 trainer = transformers.Trainer(
			
 
				     model=model,
			
--- a/generate.py
+++ b/generate.py
@@ -1,23 +1,22 @@
 
				-import torch
			
 
				-from peft import get_peft_model, PeftConfig, LoraConfig, PeftModel
			
 
				+from peft import PeftModel
			
 
				 from transformers import LLaMATokenizer, LLaMAForCausalLM
			
 
				 
			
 
				-tokenizer = LLaMATokenizer.from_pretrained("./7B/tokenizer")
			
 
				+tokenizer = LLaMATokenizer.from_pretrained("decapoda-research/llama-7b-hf")
			
 
				 
			
 
				 model = LLaMAForCausalLM.from_pretrained(
			
 
				-    "./7B/llama-7b",
			
 
				+    "decapoda-research/llama-7b-hf",
			
 
				     load_in_8bit=True,
			
 
				     device_map="auto",
			
 
				 )
			
 
				-model = PeftModel.from_pretrained(model, "./outputs")
			
 
				+model = PeftModel.from_pretrained(model, "tloen/alpaca-lora-7b")
			
 
				 
			
 
				 PROMPT = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
			
 
				 
			
 
				 ### Instruction:
			
 
				-Sort the following numbers.
			
 
				+Write a poem about the following topic.
			
 
				 
			
 
				 ### Input:
			
 
				-5, 2, 3
			
 
				+Cars
			
 
				 
			
 
				 ### Response:"""
			
 
				 
			
@@ -26,7 +25,7 @@ inputs = tokenizer(
 
				     return_tensors="pt",
			
 
				 )
			
 
				 generation_output = model.generate(
			
 
				-    **inputs, return_dict_in_generate=True, output_scores=True, max_new_tokens=50
			
 
				+    **inputs, return_dict_in_generate=True, output_scores=True, max_new_tokens=128
			
 
				 )
			
 
				 for s in generation_output.sequences:
			
 
				     print(tokenizer.decode(s))
			
--- a/loss.ipynb
+++ b/loss.ipynb