generate.py 870 B

1234567891011121314151617181920212223242526272829303132
  1. import torch
  2. from peft import get_peft_model, PeftConfig, LoraConfig, PeftModel
  3. from transformers import LLaMATokenizer, LLaMAForCausalLM
  4. tokenizer = LLaMATokenizer.from_pretrained("./7B/tokenizer")
  5. model = LLaMAForCausalLM.from_pretrained(
  6. "./7B/llama-7b",
  7. load_in_8bit=True,
  8. device_map="auto",
  9. )
  10. model = PeftModel.from_pretrained(model, "./outputs")
  11. PROMPT = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
  12. ### Instruction:
  13. Sort the following numbers.
  14. ### Input:
  15. 5, 2, 3
  16. ### Response:"""
  17. inputs = tokenizer(
  18. PROMPT,
  19. return_tensors="pt",
  20. )
  21. generation_output = model.generate(
  22. **inputs, return_dict_in_generate=True, output_scores=True, max_new_tokens=50
  23. )
  24. for s in generation_output.sequences:
  25. print(tokenizer.decode(s))