finetune.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. import os
  2. # os.environ["CUDA_VISIBLE_DEVICES"] = "0"
  3. import torch
  4. import torch.nn as nn
  5. import bitsandbytes as bnb
  6. from datasets import load_dataset
  7. import transformers
  8. assert (
  9. "LlamaTokenizer" in transformers._import_structure["models.llama"]
  10. ), "LLaMA is now in HuggingFace's main branch.\nPlease reinstall it: pip uninstall transformers && pip install git+https://github.com/huggingface/transformers.git"
  11. from transformers import AutoTokenizer, AutoConfig, LlamaForCausalLM, LlamaTokenizer
  12. from peft import prepare_model_for_int8_training, LoraConfig, get_peft_model
  13. # optimized for RTX 4090. for larger GPUs, increase some of these?
  14. MICRO_BATCH_SIZE = 4 # this could actually be 5 but i like powers of 2
  15. BATCH_SIZE = 128
  16. GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
  17. EPOCHS = 3 # we don't need 3 tbh
  18. LEARNING_RATE = 3e-4 # the Karpathy constant
  19. CUTOFF_LEN = 256 # 256 accounts for about 96% of the data
  20. LORA_R = 8
  21. LORA_ALPHA = 16
  22. LORA_DROPOUT = 0.05
  23. model = LlamaForCausalLM.from_pretrained(
  24. "decapoda-research/llama-7b-hf",
  25. load_in_8bit=True,
  26. device_map="auto",
  27. )
  28. tokenizer = LlamaTokenizer.from_pretrained(
  29. "decapoda-research/llama-7b-hf", add_eos_token=True
  30. )
  31. model = prepare_model_for_int8_training(model)
  32. config = LoraConfig(
  33. r=LORA_R,
  34. lora_alpha=LORA_ALPHA,
  35. target_modules=["q_proj", "v_proj"],
  36. lora_dropout=LORA_DROPOUT,
  37. bias="none",
  38. task_type="CAUSAL_LM",
  39. )
  40. model = get_peft_model(model, config)
  41. tokenizer.pad_token_id = 0 # unk. we want this to be different from the eos token
  42. data = load_dataset("json", data_files="alpaca_data.json")
  43. def generate_prompt(data_point):
  44. # sorry about the formatting disaster gotta move fast
  45. if data_point["input"]:
  46. return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
  47. ### Instruction:
  48. {data_point["instruction"]}
  49. ### Input:
  50. {data_point["input"]}
  51. ### Response:
  52. {data_point["output"]}"""
  53. else:
  54. return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
  55. ### Instruction:
  56. {data_point["instruction"]}
  57. ### Response:
  58. {data_point["output"]}"""
  59. def tokenize(prompt):
  60. # there's probably a way to do this with the tokenizer settings
  61. # but again, gotta move fast
  62. result = tokenizer(
  63. prompt,
  64. truncation=True,
  65. max_length=CUTOFF_LEN + 1,
  66. padding="max_length",
  67. )
  68. return {
  69. "input_ids": result["input_ids"][:-1],
  70. "attention_mask": result["attention_mask"][:-1],
  71. }
  72. data = data.shuffle().map(lambda x: tokenize(generate_prompt(x)))
  73. trainer = transformers.Trainer(
  74. model=model,
  75. train_dataset=data["train"],
  76. args=transformers.TrainingArguments(
  77. per_device_train_batch_size=MICRO_BATCH_SIZE,
  78. gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
  79. warmup_steps=100,
  80. num_train_epochs=EPOCHS,
  81. learning_rate=LEARNING_RATE,
  82. fp16=True,
  83. logging_steps=20,
  84. output_dir="lora-alpaca",
  85. save_total_limit=3,
  86. ),
  87. data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
  88. )
  89. model.config.use_cache = False
  90. trainer.train(resume_from_checkpoint=False)
  91. model.save_pretrained("lora-alpaca")