aria-dev commited on
Commit
ab181d6
1 Parent(s): ba771dc

fix: tokenizer can be none in preprocessors

Browse files
Files changed (1) hide show
  1. processing_aria.py +5 -1
processing_aria.py CHANGED
@@ -18,6 +18,7 @@
18
  # under the License.
19
 
20
  import inspect
 
21
  import re
22
  from typing import List, Optional, Union
23
 
@@ -34,6 +35,8 @@ from transformers.tokenization_utils import (
34
 
35
  from .vision_processor import AriaVisionProcessor
36
 
 
 
37
 
38
  class AriaProcessor(ProcessorMixin):
39
  """
@@ -73,7 +76,7 @@ class AriaProcessor(ProcessorMixin):
73
  else:
74
  self.tokenizer = tokenizer
75
 
76
- if self.tokenizer.pad_token is None:
77
  self.tokenizer.pad_token = self.tokenizer.unk_token
78
 
79
  self.image_token = image_token
@@ -229,6 +232,7 @@ class AriaProcessor(ProcessorMixin):
229
  **cls._extract_kwargs(AriaVisionProcessor.from_pretrained, **kwargs),
230
  )
231
  if "use_fast" in kwargs:
 
232
  kwargs.pop("use_fast")
233
  try:
234
  tokenizer = AutoTokenizer.from_pretrained(
 
18
  # under the License.
19
 
20
  import inspect
21
+ import logging
22
  import re
23
  from typing import List, Optional, Union
24
 
 
35
 
36
  from .vision_processor import AriaVisionProcessor
37
 
38
+ logger = logging.getLogger(__name__)
39
+
40
 
41
  class AriaProcessor(ProcessorMixin):
42
  """
 
76
  else:
77
  self.tokenizer = tokenizer
78
 
79
+ if self.tokenizer is not None and self.tokenizer.pad_token is None:
80
  self.tokenizer.pad_token = self.tokenizer.unk_token
81
 
82
  self.image_token = image_token
 
232
  **cls._extract_kwargs(AriaVisionProcessor.from_pretrained, **kwargs),
233
  )
234
  if "use_fast" in kwargs:
235
+ logger.warning("use_fast is not supported for AriaProcessor. Ignoring...")
236
  kwargs.pop("use_fast")
237
  try:
238
  tokenizer = AutoTokenizer.from_pretrained(