txya900619 commited on
Commit
4a0fd18
1 Parent(s): a3a5dd5

fix: number not parse prob

Browse files
Files changed (1) hide show
  1. ipa/ipa.py +2 -2
ipa/ipa.py CHANGED
@@ -24,7 +24,7 @@ delimiter_regex, replace_regex, v2f_regex = prep_regex(
24
 
25
  def get_ipa(raw_text: str, dialect: str) -> tuple[str, str, str, list[str]]:
26
  pinyin_split = re.split(
27
- r"(?<![\da-z])(?=[\da-z])|(?<=[\da-z])(?![\da-z])", raw_text
28
  )
29
 
30
  final_words = []
@@ -35,7 +35,7 @@ def get_ipa(raw_text: str, dialect: str) -> tuple[str, str, str, list[str]]:
35
  if len(hanzi_or_pinyin.strip()) == 0:
36
  continue
37
 
38
- if re.search(r"[\da-z]", hanzi_or_pinyin):
39
  final_words.append(hanzi_or_pinyin)
40
  final_pinyin.append(hanzi_or_pinyin)
41
  pinyin, tone = re.match(r"([a-z]+)(\d+)?", hanzi_or_pinyin).groups()
 
24
 
25
  def get_ipa(raw_text: str, dialect: str) -> tuple[str, str, str, list[str]]:
26
  pinyin_split = re.split(
27
+ r"([a-z]+\d+)", raw_text
28
  )
29
 
30
  final_words = []
 
35
  if len(hanzi_or_pinyin.strip()) == 0:
36
  continue
37
 
38
+ if re.search(r"[a-z]+\d+", hanzi_or_pinyin):
39
  final_words.append(hanzi_or_pinyin)
40
  final_pinyin.append(hanzi_or_pinyin)
41
  pinyin, tone = re.match(r"([a-z]+)(\d+)?", hanzi_or_pinyin).groups()