Elron commited on
Commit
e3ab2c6
1 Parent(s): e7f788e

Upload loaders.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. loaders.py +24 -0
loaders.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .stream import MultiStream
2
+ from .operator import SourceOperator
3
+
4
+ from typing import Optional, Union, Sequence, Mapping
5
+ from datasets import load_dataset as hf_load_dataset
6
+
7
+
8
+ class Loader(SourceOperator):
9
+ pass
10
+
11
+
12
+ class LoadHF(Loader):
13
+ path: str
14
+ name: Optional[str] = None
15
+ data_dir: Optional[str] = None
16
+ data_files: Optional[Union[str, Sequence[str], Mapping[str, Union[str, Sequence[str]]]]] = None
17
+ cached = False
18
+
19
+ def process(self):
20
+ dataset = hf_load_dataset(
21
+ self.path, name=self.name, data_dir=self.data_dir, data_files=self.data_files, streaming=True
22
+ )
23
+
24
+ return MultiStream.from_iterables(dataset)