|
@@ -31,7 +31,7 @@ def train(
|
|
|
micro_batch_size: int = 4,
|
|
micro_batch_size: int = 4,
|
|
|
num_epochs: int = 3,
|
|
num_epochs: int = 3,
|
|
|
learning_rate: float = 3e-4,
|
|
learning_rate: float = 3e-4,
|
|
|
- cutoff_len: int = 512,
|
|
|
|
|
|
|
+ cutoff_len: int = 256,
|
|
|
val_set_size: int = 2000,
|
|
val_set_size: int = 2000,
|
|
|
# lora hyperparams
|
|
# lora hyperparams
|
|
|
lora_r: int = 8,
|
|
lora_r: int = 8,
|
|
@@ -43,7 +43,7 @@ def train(
|
|
|
],
|
|
],
|
|
|
# llm hyperparams
|
|
# llm hyperparams
|
|
|
train_on_inputs: bool = True, # if False, masks out inputs in loss
|
|
train_on_inputs: bool = True, # if False, masks out inputs in loss
|
|
|
- group_by_length: bool = True, # faster, but produces an odd training loss curve
|
|
|
|
|
|
|
+ group_by_length: bool = False, # faster, but produces an odd training loss curve
|
|
|
):
|
|
):
|
|
|
print(
|
|
print(
|
|
|
f"Training Alpaca-LoRA model with params:\n"
|
|
f"Training Alpaca-LoRA model with params:\n"
|