3 éve · 4367a43fcb
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,4 @@
 
				+.venv
			
 
				+.github
			
 
				+.vscode
			
 
				+.docker-compose.yml
			
--- a/.gitignore
+++ b/.gitignore
@@ -11,4 +11,5 @@ wandb
 
				 evaluate.py
			
 
				 test_data.json
			
 
				 todo.txt
			
 
				-.vscode/
			
 
				+.venv
			
 
				+.vscode
			
--- a/Dockerfile
+++ b/Dockerfile
@@ -0,0 +1,18 @@
 
				+FROM nvidia/cuda:11.8.0-devel-ubuntu22.04
			
 
				+
			
 
				+ARG DEBIAN_FRONTEND=noninteractive
			
 
				+
			
 
				+RUN apt-get update && apt-get install -y \
			
 
				+    git \
			
 
				+    curl \
			
 
				+    software-properties-common \
			
 
				+    && add-apt-repository ppa:deadsnakes/ppa \
			
 
				+    && apt install -y python3.10 \
			
 
				+    && rm -rf /var/lib/apt/lists/*
			
 
				+WORKDIR /workspace
			
 
				+COPY requirements.txt requirements.txt
			
 
				+RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 \
			
 
				+    && python3.10 -m pip install -r requirements.txt \
			
 
				+    && python3.10 -m pip install numpy --pre torch --force-reinstall --index-url https://download.pytorch.org/whl/nightly/cu118
			
 
				+COPY . .
			
 
				+ENTRYPOINT [ "python3.10"]
			
--- a/README.md
+++ b/README.md
@@ -15,25 +15,13 @@ as well as Tim Dettmers' [bitsandbytes](https://github.com/TimDettmers/bitsandby
 
				 
			
 
				 Without hyperparameter tuning, the LoRA model produces outputs comparable to the Stanford Alpaca model. (Please see the outputs included below.) Further tuning might be able to achieve better performance; I invite interested users to give it a try and report their results.
			
 
				 
			
 
				-## Setup
			
 
				+### Local Setup
			
 
				 
			
 
				 1. Install dependencies
			
 
				 
			
 
				-    ```bash
			
 
				-    pip install -r requirements.txt
			
 
				-    ```
			
 
				-
			
 
				-1. Set environment variables, or modify the files referencing `BASE_MODEL`:
			
 
				-
			
 
				-    ```bash
			
 
				-    # Files referencing `BASE_MODEL`
			
 
				-    # export_hf_checkpoint.py
			
 
				-    # export_state_dict_checkpoint.py
			
 
				-
			
 
				-    export BASE_MODEL=decapoda-research/llama-7b-hf
			
 
				-    ```
			
 
				-
			
 
				-    Both `finetune.py` and `generate.py` use `--base_model` flag as shown further below.
			
 
				+   ```bash
			
 
				+   pip install -r requirements.txt
			
 
				+   ```
			
 
				 
			
 
				 1. If bitsandbytes doesn't work, [install it from source.](https://github.com/TimDettmers/bitsandbytes/blob/main/compile_from_source.md) Windows users can follow [these instructions](https://github.com/tloen/alpaca-lora/issues/17).
			
 
				 
			
@@ -94,6 +82,49 @@ They should help users
 
				 who want to run inference in projects like [llama.cpp](https://github.com/ggerganov/llama.cpp)
			
 
				 or [alpaca.cpp](https://github.com/antimatter15/alpaca.cpp).
			
 
				 
			
 
				+### Docker Setup & Inference
			
 
				+
			
 
				+1. Build the container image:
			
 
				+
			
 
				+```bash
			
 
				+docker build -t alpaca-lora .
			
 
				+```
			
 
				+
			
 
				+2. Run the container (you can also use `finetune.py` and all of its parameters as shown above for training):
			
 
				+
			
 
				+```bash
			
 
				+docker run --gpus=all --shm-size 64g -p 7860:7860 -v ${HOME}/.cache:/root/.cache --rm alpaca-lora generate.py \
			
 
				+    --load_8bit \
			
 
				+    --base_model 'decapoda-research/llama-7b-hf' \
			
 
				+    --lora_weights 'tloen/alpaca-lora-7b'
			
 
				+```
			
 
				+
			
 
				+3. Open `https://localhost:7860` in the browser
			
 
				+
			
 
				+### Docker Compose Setup & Inference
			
 
				+
			
 
				+1. (optional) Change desired model and weights under `environment` in the `docker-compose.yml`
			
 
				+
			
 
				+2. Build and run the container
			
 
				+
			
 
				+```bash
			
 
				+docker-compose up -d --build
			
 
				+```
			
 
				+
			
 
				+3. Open `https://localhost:7860` in the browser
			
 
				+
			
 
				+4. See logs:
			
 
				+
			
 
				+```bash
			
 
				+docker-compose logs -f
			
 
				+```
			
 
				+
			
 
				+5. Clean everything up:
			
 
				+
			
 
				+```bash
			
 
				+docker-compose down --volumes --rmi all
			
 
				+```
			
 
				+
			
 
				 ### Notes
			
 
				 
			
 
				 - We can likely improve our model performance significantly if we had a better dataset. Consider supporting the [LAION Open Assistant](https://open-assistant.io/) effort to produce a high-quality dataset for supervised fine-tuning (or bugging them to release their data).
			
@@ -110,9 +141,7 @@ or [alpaca.cpp](https://github.com/antimatter15/alpaca.cpp).
 
				   - 7B:
			
 
				     - <https://huggingface.co/tloen/alpaca-lora-7b>
			
 
				     - <https://huggingface.co/samwit/alpaca7B-lora>
			
 
				-    - 🤖 <https://huggingface.co/nomic-ai/gpt4all-lora>
			
 
				     - 🇧🇷 <https://huggingface.co/22h/cabrita-lora-v0-1>
			
 
				-    - 🇨🇳 <https://huggingface.co/ziqingyang/chinese-alpaca-lora-7b>
			
 
				     - 🇨🇳 <https://huggingface.co/qychen/luotuo-lora-7b-0.1>
			
 
				     - 🇯🇵 <https://huggingface.co/kunishou/Japanese-Alapaca-LoRA-7b-v0>
			
 
				     - 🇫🇷 <https://huggingface.co/bofenghuang/vigogne-lora-7b>
			
@@ -131,9 +160,6 @@ or [alpaca.cpp](https://github.com/antimatter15/alpaca.cpp).
 
				     - <https://huggingface.co/baseten/alpaca-30b>
			
 
				     - <https://huggingface.co/chansung/alpaca-lora-30b>
			
 
				     - 🇯🇵 <https://huggingface.co/kunishou/Japanese-Alapaca-LoRA-30b-v0>
			
 
				-    - 🇰🇷 <https://huggingface.co/beomi/KoAlpaca-30B-LoRA>
			
 
				-  - 65B:
			
 
				-    - 🇰🇷 <https://huggingface.co/beomi/KoAlpaca-65B-LoRA>
			
 
				 - [alpaca-native](https://huggingface.co/chavinlo/alpaca-native), a replication using the original Alpaca code
			
 
				 
			
 
				 ### Example outputs
			
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,28 @@
 
				+version: '3'
			
 
				+
			
 
				+services:
			
 
				+  alpaca-lora:
			
 
				+    build:
			
 
				+      context: ./
			
 
				+      dockerfile: Dockerfile
			
 
				+      args:
			
 
				+        BUILDKIT_INLINE_CACHE: "0"
			
 
				+    image: alpaca-lora
			
 
				+    shm_size: '64gb'
			
 
				+    command: generate.py --load_8bit --base_model $BASE_MODEL --lora_weights 'tloen/alpaca-lora-7b'
			
 
				+    restart: unless-stopped
			
 
				+    volumes:
			
 
				+      - alpaca-lora:/root/.cache # Location downloaded weights will be stored
			
 
				+    ports:
			
 
				+      - 7860:7860
			
 
				+    deploy:
			
 
				+      resources:
			
 
				+        reservations:
			
 
				+          devices:
			
 
				+            - driver: nvidia
			
 
				+              count: all
			
 
				+              capabilities: [ gpu ]
			
 
				+
			
 
				+volumes:
			
 
				+  alpaca-lora:
			
 
				+    name: alpaca-lora
			
--- a/generate.py
+++ b/generate.py
@@ -1,3 +1,4 @@
 
				+import os
			
 
				 import sys
			
 
				 
			
 
				 import fire
			
@@ -29,6 +30,7 @@ def main(
 
				     server_name: str = "127.0.0.1",  # Allows to listen on all interfaces by providing '0.0.0.0'
			
 
				     share_gradio: bool = False,
			
 
				 ):
			
 
				+    base_model = base_model or os.environ.get("BASE_MODEL", "")
			
 
				     assert (
			
 
				         base_model
			
 
				     ), "Please specify a --base_model, e.g. --base_model='decapoda-research/llama-7b-hf'"
			
@@ -146,7 +148,7 @@ def main(
 
				         ],
			
 
				         title="🦙🌲 Alpaca-LoRA",
			
 
				         description="Alpaca-LoRA is a 7B-parameter LLaMA model finetuned to follow instructions. It is trained on the [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca) dataset and makes use of the Huggingface LLaMA implementation. For more information, please visit [the project's website](https://github.com/tloen/alpaca-lora).",  # noqa: E501
			
 
				-    ).launch(server_name=server_name, share=share_gradio)
			
 
				+    ).launch(server_name="0.0.0.0", share=share_gradio)
			
 
				     # Old testing code follows.
			
 
				 
			
 
				     """
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,6 @@
 
				 accelerate
			
 
				 appdirs
			
 
				+loralib
			
 
				 bitsandbytes
			
 
				 black
			
 
				 black[jupyter]
			
@@ -7,5 +8,6 @@ datasets
 
				 fire
			
 
				 git+https://github.com/huggingface/peft.git
			
 
				 git+https://github.com/huggingface/transformers.git
			
 
				+sentencepiece
			
 
				 gradio
			
 
				 sentencepiece