|
|
@@ -23,13 +23,18 @@ from peft import (
|
|
|
MICRO_BATCH_SIZE = 4 # this could actually be 5 but i like powers of 2
|
|
|
BATCH_SIZE = 128
|
|
|
GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
|
|
|
-EPOCHS = 3 # we don't need 3 tbh
|
|
|
+EPOCHS = 3 # we don't always need 3 tbh
|
|
|
LEARNING_RATE = 3e-4 # the Karpathy constant
|
|
|
CUTOFF_LEN = 256 # 256 accounts for about 96% of the data
|
|
|
LORA_R = 8
|
|
|
LORA_ALPHA = 16
|
|
|
LORA_DROPOUT = 0.05
|
|
|
VAL_SET_SIZE = 2000
|
|
|
+TARGET_MODULES = [
|
|
|
+ "q_proj",
|
|
|
+ "v_proj",
|
|
|
+]
|
|
|
+DATA_PATH = "alpaca_data_cleaned.json"
|
|
|
|
|
|
model = LlamaForCausalLM.from_pretrained(
|
|
|
"decapoda-research/llama-7b-hf",
|
|
|
@@ -45,14 +50,14 @@ model = prepare_model_for_int8_training(model)
|
|
|
config = LoraConfig(
|
|
|
r=LORA_R,
|
|
|
lora_alpha=LORA_ALPHA,
|
|
|
- target_modules=["q_proj", "v_proj"],
|
|
|
+ target_modules=TARGET_MODULES,
|
|
|
lora_dropout=LORA_DROPOUT,
|
|
|
bias="none",
|
|
|
task_type="CAUSAL_LM",
|
|
|
)
|
|
|
model = get_peft_model(model, config)
|
|
|
tokenizer.pad_token_id = 0 # unk. we want this to be different from the eos token
|
|
|
-data = load_dataset("json", data_files="alpaca_data.json")
|
|
|
+data = load_dataset("json", data_files=DATA_PATH)
|
|
|
|
|
|
train_val = data["train"].train_test_split(
|
|
|
test_size=VAL_SET_SIZE, shuffle=True, seed=42
|