File size: 3,159 Bytes
a1f0b25
 
 
 
ebd5740
a1f0b25
 
 
 
 
 
daaf4f3
6c478b4
2794e9d
 
 
daaf4f3
1f876d1
2794e9d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e54d19f
 
a1f0b25
 
52b50f8
a1f0b25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
---
language: zh
tags:
- summarization
inference: True
---

Task: Summarization

## Usage
```python

from transformers import PegasusForConditionalGeneration,BertTokenizer
class PegasusTokenizer(BertTokenizer):
    model_input_names = ["input_ids", "attention_mask"]
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        # super().__init__(**kwargs)
        self.add_special_tokens({'additional_special_tokens':["<mask_1>"]})

    def build_inputs_with_special_tokens(
            self,
            token_ids_0: List[int],
            token_ids_1: Optional[List[int]] = None) -> List[int]:

        if token_ids_1 is None:
            return token_ids_0 + [self.eos_token_id]
        return token_ids_0 + token_ids_1 + [self.eos_token_id]

    def _special_token_mask(self, seq):
        all_special_ids = set(
            self.all_special_ids)  # call it once instead of inside list comp
        # all_special_ids.remove(self.unk_token_id)  # <unk> is only sometimes special
        return [1 if x in all_special_ids else 0 for x in seq]

    def get_special_tokens_mask(
            self,
            token_ids_0: List[int],
            token_ids_1: Optional[List[int]] = None,
            already_has_special_tokens: bool = False) -> List[int]:
        if already_has_special_tokens:
            return self._special_token_mask(token_ids_0)
        elif token_ids_1 is None:
            return self._special_token_mask(token_ids_0) + [self.eos_token_id]
        else:
            return self._special_token_mask(token_ids_0 +
                                            token_ids_1) + [self.eos_token_id]                              
model = PegasusForConditionalGeneration.from_pretrained('IDEA-CCNL/Randeng-Pegasus-238M-Summary-Chinese')
tokenizer = PegasusTokenizer.from_pretrained('IDEA-CCNL/Randeng-Pegasus-238M-Summary-Chinese')

text = "在北京冬奥会自由式滑雪女子坡面障碍技巧决赛中,中国选手谷爱凌夺得银牌。祝贺谷爱凌!今天上午,自由式滑雪女子坡面障碍技巧决赛举行。决赛分三轮进行,取选手最佳成绩排名决出奖牌。第一跳,中国选手谷爱凌获得69.90分。在12位选手中排名第三。完成动作后,谷爱凌又扮了个鬼脸,甚是可爱。第二轮中,谷爱凌在道具区第三个障碍处失误,落地时摔倒。获得16.98分。网友:摔倒了也没关系,继续加油!在第二跳失误摔倒的情况下,谷爱凌顶住压力,第三跳稳稳发挥,流畅落地!获得86.23分!此轮比赛,共12位选手参赛,谷爱凌第10位出场。网友:看比赛时我比谷爱凌紧张,加油!"
inputs = tokenizer(text, max_length=512, return_tensors="pt")

# Generate Summary
summary_ids = model.generate(inputs["input_ids"])
tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
```

## Citation
If you find the resource is useful, please cite the following website in your paper.
```
@misc{Fengshenbang-LM,
  title={Fengshenbang-LM},
  author={IDEA-CCNL},
  year={2022},
  howpublished={\url{https://github.com/IDEA-CCNL/Fengshenbang-LM}},
}
```