llama v2 model
llama 실행해 보기
import torch
from transformers import LlamaTokenizer, LlamaForCausalLM
## v2 models
model_path = 'openlm-research/open_llama_7b_v2'
## v1 models
# model_path = 'openlm-research/open_llama_3b'
# model_path = 'openlm-research/open_llama_7b'
# model_path = 'openlm-research/open_llama_13b'
tokenizer = LlamaTokenizer.from_pretrained(model_path)
model = LlamaForCausalLM.from_pretrained(
model_path, torch_dtype=torch.float16, device_map='auto',
)
prompt = 'Q: What is the largest animal?\nA:'
input_ids = tokenizer(prompt, return_tensors="pt").input_ids
generation_output = model.generate(
input_ids=input_ids, max_new_tokens=32
)
print(tokenizer.decode(generation_output[0]))
실행결과… 일단 뭐좀 더 깔아라.. ㅜㅜ …
pip install sentencepiece
기본 질문을 변경해서 질문하면 답변을 해준다.
그럼 지속적으로 질문할수 이는 코드로 변경해 보자. 아래 코드는 조금 더 수정을 해야 할듯.. ㅡㅡ;
import torch
from transformers import LlamaTokenizer, LlamaForCausalLM
## v2 models
model_path = 'openlm-research/open_llama_7b_v2'
## v1 models
# model_path = 'openlm-research/open_llama_3b'
# model_path = 'openlm-research/open_llama_7b'
# model_path = 'openlm-research/open_llama_13b'
tokenizer = LlamaTokenizer.from_pretrained(model_path)
model = LlamaForCausalLM.from_pretrained(
model_path, torch_dtype=torch.float16, device_map='auto',
)
while True:
#prompt = 'Q: What is the largest city?\nA:'
prompt = input("Ask a question:")
prompt = "Q: "+ prompt + "?\nA:"
print(prompt)
input_ids = tokenizer(prompt, return_tensors="pt").input_ids
generation_output = model.generate(
input_ids=input_ids, max_new_tokens=32
)
print(tokenizer.decode(generation_output[0]))