| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111 |
- import os
- # os.environ["CUDA_VISIBLE_DEVICES"] = "0"
- import torch
- import torch.nn as nn
- import bitsandbytes as bnb
- from datasets import load_dataset
- import transformers
- assert (
- "LlamaTokenizer" in transformers._import_structure["models.llama"]
- ), "LLaMA is now in HuggingFace's main branch.\nPlease reinstall it: pip uninstall transformers && pip install git+https://github.com/huggingface/transformers.git"
- from transformers import AutoTokenizer, AutoConfig, LlamaForCausalLM, LlamaTokenizer
- from peft import prepare_model_for_int8_training, LoraConfig, get_peft_model
- # optimized for RTX 4090. for larger GPUs, increase some of these?
- MICRO_BATCH_SIZE = 4 # this could actually be 5 but i like powers of 2
- BATCH_SIZE = 128
- GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
- EPOCHS = 3 # we don't need 3 tbh
- LEARNING_RATE = 3e-4 # the Karpathy constant
- CUTOFF_LEN = 256 # 256 accounts for about 96% of the data
- LORA_R = 8
- LORA_ALPHA = 16
- LORA_DROPOUT = 0.05
- model = LlamaForCausalLM.from_pretrained(
- "decapoda-research/llama-7b-hf",
- load_in_8bit=True,
- device_map="auto",
- )
- tokenizer = LlamaTokenizer.from_pretrained(
- "decapoda-research/llama-7b-hf", add_eos_token=True
- )
- model = prepare_model_for_int8_training(model)
- config = LoraConfig(
- r=LORA_R,
- lora_alpha=LORA_ALPHA,
- target_modules=["q_proj", "v_proj"],
- lora_dropout=LORA_DROPOUT,
- bias="none",
- task_type="CAUSAL_LM",
- )
- model = get_peft_model(model, config)
- tokenizer.pad_token_id = 0 # unk. we want this to be different from the eos token
- data = load_dataset("json", data_files="alpaca_data.json")
- def generate_prompt(data_point):
- # sorry about the formatting disaster gotta move fast
- if data_point["input"]:
- return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
- ### Instruction:
- {data_point["instruction"]}
- ### Input:
- {data_point["input"]}
- ### Response:
- {data_point["output"]}"""
- else:
- return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
- ### Instruction:
- {data_point["instruction"]}
- ### Response:
- {data_point["output"]}"""
- def tokenize(prompt):
- # there's probably a way to do this with the tokenizer settings
- # but again, gotta move fast
- result = tokenizer(
- prompt,
- truncation=True,
- max_length=CUTOFF_LEN + 1,
- padding="max_length",
- )
- return {
- "input_ids": result["input_ids"][:-1],
- "attention_mask": result["attention_mask"][:-1],
- }
- data = data.shuffle().map(lambda x: tokenize(generate_prompt(x)))
- trainer = transformers.Trainer(
- model=model,
- train_dataset=data["train"],
- args=transformers.TrainingArguments(
- per_device_train_batch_size=MICRO_BATCH_SIZE,
- gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
- warmup_steps=100,
- num_train_epochs=EPOCHS,
- learning_rate=LEARNING_RATE,
- fp16=True,
- logging_steps=20,
- output_dir="lora-alpaca",
- save_total_limit=3,
- ),
- data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
- )
- model.config.use_cache = False
- trainer.train(resume_from_checkpoint=False)
- model.save_pretrained("lora-alpaca")
|