3 лет назад · 357ec81a17
--- a/README.md
+++ b/README.md
@@ -1,34 +1,25 @@
 
															-# alpaca-lora
														
 
															+# alpaca-lora (WIP)
														
 
															-This repository contains code for reproducing the Stanford Alpaca results. Users will need to have LLaMA weights on hand and be ready to fork `transformers`.
														
 
															+This repository contains code for reproducing the [Stanford Alpaca results](https://github.com/tatsu-lab/stanford_alpaca#data-release). Users will need to be ready to fork `transformers`.
														
 
															+# Setup
														
 
															-1. Install dependencies
														
 
															+1. Install dependencies (**install zphang's transformers fork**)
														
 
															 ```
														
 
															-pip install -q bitsandbytes datasets accelerate loralib
														
 
															+pip install -q datasets accelerate loralib sentencepiece
														
 
															 pip install -q git+https://github.com/zphang/transformers@llama_push
														
 
															-pip install -q git+https://github.com/huggingface/peft.git\
														
 
															+pip install -q git+https://github.com/huggingface/peft.git
														
 
															 ```
														
 
															-2. Convert weights
														
 
															+2. [Install bitsandbytes from source](https://github.com/TimDettmers/bitsandbytes/blob/main/compile_from_source.md)
														
 
															-```
														
 
															-python conversion.py --input_dir [LLAMA_DIR]/LLaMA --model_size 7B --output_dir ./7B
														
 
															-```
														
 
															-3. Modify hyperparams in `finetune.py`
														
 
															+# Inference
														
 
															-```
														
 
															-MICRO_BATCH_SIZE = 12
														
 
															-BATCH_SIZE = 36
														
 
															-EPOCHS = 3
														
 
															-LEARNING_RATE = 2e-5
														
 
															-```
														
 
															+See `generate.py`.
														
 
															-4. Run experiments
														
 
															+# Training
														
 
															-```
														
 
															-python finetune.py
														
 
															-```
														
 
															+Under construction.
														
--- a/finetune.py
+++ b/finetune.py
@@ -10,32 +10,14 @@ from transformers import AutoTokenizer, AutoConfig, LLaMAForCausalLM, LLaMAToken
 
															 from peft import prepare_model_for_int8_training, LoraConfig, get_peft_model
														
 
															 model = LLaMAForCausalLM.from_pretrained(
														
 
															-    "./7B/llama-7b",
														
 
															+    "decapoda-research/llama-7b-hf",
														
 
															     load_in_8bit=True,
														
 
															-    max_sequence_length=128,  # data length
														
 
															     device_map="auto",
														
 
															 )
														
 
															-tokenizer = LLaMATokenizer.from_pretrained("./7B/tokenizer")
														
 
															+tokenizer = LLaMATokenizer.from_pretrained("decapoda-research/llama-7b-hf")
														
 
															-
														
 
															-def print_trainable_parameters(model):
														
 
															-    """
														
 
															-    Prints the number of trainable parameters in the model.
														
 
															-    """
														
 
															-    trainable_params = 0
														
 
															-    all_param = 0
														
 
															-    for _, param in model.named_parameters():
														
 
															-        all_param += param.numel()
														
 
															-        if param.requires_grad:
														
 
															-            trainable_params += param.numel()
														
 
															-    print(
														
 
															-        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
														
 
															-    )
														
 
															-
														
 
															-
														
 
															-print_trainable_parameters(model)
														
 
															 model = prepare_model_for_int8_training(model)
														
 
															 config = LoraConfig(
														
@@ -48,8 +30,6 @@ config = LoraConfig(
 
															 )
														
 
															 model = get_peft_model(model, config)
														
 
															-print_trainable_parameters(model)
														
 
															-
														
 
															 tokenizer.pad_token = tokenizer.eos_token
														
 
															 tokenizer.pad_token_id = tokenizer.eos_token_id
														
@@ -77,21 +57,23 @@ def generate_prompt(data_point):
 
															 ### Response:"""
														
 
															+# optimized for RTX 4090.
														
 
															+MICRO_BATCH_SIZE = 12
														
 
															+BATCH_SIZE = 36
														
 
															+GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
														
 
															+EPOCHS = 1
														
 
															+LEARNING_RATE = 2e-5
														
 
															+CUTOFF_LEN = 128
														
 
															+
														
 
															 data = data.map(
														
 
															     lambda data_point: tokenizer(
														
 
															         generate_prompt(data_point),
														
 
															         truncation=True,
														
 
															-        max_length=128,
														
 
															+        max_length=CUTOFF_LEN,
														
 
															         padding="max_length",
														
 
															     )
														
 
															 )
														
 
															-MICRO_BATCH_SIZE = 12
														
 
															-BATCH_SIZE = 36
														
 
															-GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
														
 
															-EPOCHS = 3
														
 
															-LEARNING_RATE = 2e-5
														
 
															-
														
 
															 trainer = transformers.Trainer(
														
 
															     model=model,
														
--- a/generate.py
+++ b/generate.py
@@ -1,23 +1,22 @@
 
															-import torch
														
 
															-from peft import get_peft_model, PeftConfig, LoraConfig, PeftModel
														
 
															+from peft import PeftModel
														
 
															 from transformers import LLaMATokenizer, LLaMAForCausalLM
														
 
															-tokenizer = LLaMATokenizer.from_pretrained("./7B/tokenizer")
														
 
															+tokenizer = LLaMATokenizer.from_pretrained("decapoda-research/llama-7b-hf")
														
 
															 model = LLaMAForCausalLM.from_pretrained(
														
 
															-    "./7B/llama-7b",
														
 
															+    "decapoda-research/llama-7b-hf",
														
 
															     load_in_8bit=True,
														
 
															     device_map="auto",
														
 
															 )
														
 
															-model = PeftModel.from_pretrained(model, "./outputs")
														
 
															+model = PeftModel.from_pretrained(model, "tloen/alpaca-lora-7b")
														
 
															 PROMPT = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
														
 
															 ### Instruction:
														
 
															-Sort the following numbers.
														
 
															+Write a poem about the following topic.
														
 
															 ### Input:
														
 
															-5, 2, 3
														
 
															+Cars
														
 
															 ### Response:"""
														
@@ -26,7 +25,7 @@ inputs = tokenizer(
 
															     return_tensors="pt",
														
 
															 )
														
 
															 generation_output = model.generate(
														
 
															-    **inputs, return_dict_in_generate=True, output_scores=True, max_new_tokens=50
														
 
															+    **inputs, return_dict_in_generate=True, output_scores=True, max_new_tokens=128
														
 
															 )
														
 
															 for s in generation_output.sequences:
														
 
															     print(tokenizer.decode(s))
														
--- a/loss.ipynb
+++ b/loss.ipynb