generate.py 1.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940
  1. from peft import PeftModel
  2. from transformers import LLaMATokenizer, LLaMAForCausalLM, GenerationConfig
  3. tokenizer = LLaMATokenizer.from_pretrained("decapoda-research/llama-7b-hf")
  4. model = LLaMAForCausalLM.from_pretrained(
  5. "decapoda-research/llama-7b-hf",
  6. load_in_8bit=True,
  7. device_map="auto",
  8. )
  9. model = PeftModel.from_pretrained(model, "tloen/alpaca-lora-7b")
  10. PROMPT = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
  11. ### Instruction:
  12. Tell me something about alpacas.
  13. ### Response:"""
  14. inputs = tokenizer(
  15. PROMPT,
  16. return_tensors="pt",
  17. )
  18. input_ids = inputs["input_ids"].cuda()
  19. generation_config = GenerationConfig(
  20. temperature=0.6,
  21. top_p=0.95,
  22. repetition_penalty=1.15,
  23. )
  24. print("Generating...")
  25. generation_output = model.generate(
  26. input_ids=input_ids,
  27. generation_config=generation_config,
  28. return_dict_in_generate=True,
  29. output_scores=True,
  30. max_new_tokens=128,
  31. )
  32. for s in generation_output.sequences:
  33. print(tokenizer.decode(s))