import os # os.environ["CUDA_VISIBLE_DEVICES"] = "0" import torch import torch.nn as nn import bitsandbytes as bnb from datasets import load_dataset import transformers from transformers import AutoTokenizer, AutoConfig, LLaMAForCausalLM, LLaMATokenizer from peft import prepare_model_for_int8_training, LoraConfig, get_peft_model model = LLaMAForCausalLM.from_pretrained( "./7B/llama-7b", load_in_8bit=True, max_sequence_length=128, # data length device_map="auto", ) tokenizer = LLaMATokenizer.from_pretrained("./7B/tokenizer") def print_trainable_parameters(model): """ Prints the number of trainable parameters in the model. """ trainable_params = 0 all_param = 0 for _, param in model.named_parameters(): all_param += param.numel() if param.requires_grad: trainable_params += param.numel() print( f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}" ) print_trainable_parameters(model) model = prepare_model_for_int8_training(model) config = LoraConfig( r=4, lora_alpha=16, target_modules=["q_proj", "v_proj"], lora_dropout=0.05, bias="none", task_type="CAUSAL_LM", ) model = get_peft_model(model, config) print_trainable_parameters(model) tokenizer.pad_token = tokenizer.eos_token tokenizer.pad_token_id = tokenizer.eos_token_id data = load_dataset("json", data_files="alpaca_data.json") def generate_prompt(data_point): # sorry about the formatting disaster gotta move fast if data_point["instruction"]: return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. ### Instruction: {data_point["instruction"]} ### Input: {data_point["input"]} ### Response:""" else: return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request. ### Instruction: {data_point["instruction"]} ### Response:""" data = data.map( lambda data_point: tokenizer( generate_prompt(data_point), truncation=True, max_length=128, padding="max_length", ) ) DATA_SIZE = 51368 MICRO_BATCH_SIZE = 12 BATCH_SIZE = 36 GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE EPOCHS = 3 LEARNING_RATE = 2e-5 trainer = transformers.Trainer( model=model, train_dataset=data["train"], args=transformers.TrainingArguments( per_device_train_batch_size=MICRO_BATCH_SIZE, gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS, warmup_steps=100, num_train_epochs=EPOCHS, learning_rate=LEARNING_RATE, fp16=True, logging_steps=1, output_dir="lora-alpaca", save_total_limit=3, ), data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False), ) model.config.use_cache = False trainer.train(resume_from_checkpoint=False) model.save_pretrained("lora-alpaca")