Files changed (1) hide show
  1. README.md +15 -11
README.md CHANGED
@@ -56,30 +56,34 @@ Checkpoints format: Hugging Face Transformers (Megatron-DeepSpeed format models
56
 
57
  ## Required Libraries and Their Versions
58
 
59
- - torch>=2.0.0
60
- - transformers>=4.34.0
61
- - tokenizers>=0.14.0
62
- - accelerate==0.23.0
 
63
 
64
  ## Usage
65
 
66
  ```python
67
  import torch
68
  from transformers import AutoTokenizer, AutoModelForCausalLM
69
- tokenizer = AutoTokenizer.from_pretrained("llm-jp/llm-jp-13b-instruct-full-dolly_en-dolly_ja-ichikara_003_001-oasst_en-oasst_ja-v1.1")
70
- model = AutoModelForCausalLM.from_pretrained("llm-jp/llm-jp-13b-instruct-full-dolly_en-dolly_ja-ichikara_003_001-oasst_en-oasst_ja-v1.1", device_map="auto", torch_dtype=torch.float16)
71
- text = "以下は、タスクを説明する指示です。要求を適切に満たす応答を書きなさい。\n\n### 指示:\n{instruction}\n\n### 応答:\n".format(instruction="自然言語処理とは何か")
72
- tokenized_input = tokenizer.encode(text, add_special_tokens=False, return_tensors="pt").to(model.device)
73
- with torch.no_grad():
 
 
 
74
  output = model.generate(
75
  tokenized_input,
76
  max_new_tokens=512,
77
  do_sample=True,
78
  top_p=0.95,
79
  temperature=0.7,
80
- repetition_penalty=1.1,
81
  )[0]
82
- print(tokenizer.decode(output))
83
  ```
84
 
85
 
 
56
 
57
  ## Required Libraries and Their Versions
58
 
59
+ - torch>=2.3.0
60
+ - transformers>=4.40.1
61
+ - tokenizers>=0.19.1
62
+ - accelerate>=0.29.3
63
+ - flash-attn>=2.5.8
64
 
65
  ## Usage
66
 
67
  ```python
68
  import torch
69
  from transformers import AutoTokenizer, AutoModelForCausalLM
70
+ tokenizer = AutoTokenizer.from_pretrained("llm-jp/llm-jp-13b-instruct-full-dolly-ichikara_004_001_single-oasst-oasst2-v2.0")
71
+ model = AutoModelForCausalLM.from_pretrained("llm-jp/llm-jp-13b-instruct-full-dolly-ichikara_004_001_single-oasst-oasst2-v2.0", device_map="auto", torch_dtype=torch.float16)
72
+ chat = [
73
+ {"role": "system", "content": "以下は、タスクを説明する指示です。要求を適切に満たす応答を書きなさい。"},
74
+ {"role": "user", "content": "自然言語処理とは何か"},
75
+ ]
76
+ tokenized_input = tokenizer.apply_chat_template(chat, add_generation_prompt=True, tokenize=True, return_tensors="pt").to(model.device)
77
+ with torch.inference_mode():
78
  output = model.generate(
79
  tokenized_input,
80
  max_new_tokens=512,
81
  do_sample=True,
82
  top_p=0.95,
83
  temperature=0.7,
84
+ repetition_penalty=1.05,
85
  )[0]
86
+ print(tokenizer.decode(output, skip_special_tokens=True))
87
  ```
88
 
89