Upload splitters.py with huggingface_hub
Browse files- splitters.py +15 -15
splitters.py
CHANGED
@@ -188,10 +188,10 @@ class DiverseLabelsSampler(Sampler):
|
|
188 |
super().prepare()
|
189 |
self.labels_cache = None
|
190 |
|
191 |
-
def
|
192 |
-
if "inputs" not in
|
193 |
-
raise ValueError(f"'inputs' field is missing from '{
|
194 |
-
inputs =
|
195 |
if self.choices not in inputs:
|
196 |
raise ValueError(f"'{self.choices}' field is missing from '{inputs}'.")
|
197 |
choices = inputs[self.choices]
|
@@ -200,29 +200,29 @@ class DiverseLabelsSampler(Sampler):
|
|
200 |
f"Unexpected input choices value '{choices}'. Expected a list."
|
201 |
)
|
202 |
|
203 |
-
if "outputs" not in
|
204 |
-
raise ValueError(f"'outputs' field is missing from '{
|
205 |
-
outputs =
|
206 |
if self.labels not in outputs:
|
207 |
raise ValueError(f"'{self.labels}' field is missing from '{outputs}'.")
|
208 |
|
209 |
-
|
210 |
-
if not isinstance(
|
211 |
raise ValueError(
|
212 |
-
f"Unexpected
|
213 |
)
|
214 |
|
215 |
-
return str([choice for choice in choices if choice in
|
216 |
|
217 |
-
def divide_by_repr(self,
|
218 |
labels = {}
|
219 |
-
for
|
220 |
-
label_repr = self.
|
221 |
if label_repr == "[]" and not self.include_empty_label:
|
222 |
continue
|
223 |
if label_repr not in labels:
|
224 |
labels[label_repr] = []
|
225 |
-
labels[label_repr].append(
|
226 |
return labels
|
227 |
|
228 |
def sample(
|
|
|
188 |
super().prepare()
|
189 |
self.labels_cache = None
|
190 |
|
191 |
+
def exemplar_repr(self, exemplar):
|
192 |
+
if "inputs" not in exemplar:
|
193 |
+
raise ValueError(f"'inputs' field is missing from '{exemplar}'.")
|
194 |
+
inputs = exemplar["inputs"]
|
195 |
if self.choices not in inputs:
|
196 |
raise ValueError(f"'{self.choices}' field is missing from '{inputs}'.")
|
197 |
choices = inputs[self.choices]
|
|
|
200 |
f"Unexpected input choices value '{choices}'. Expected a list."
|
201 |
)
|
202 |
|
203 |
+
if "outputs" not in exemplar:
|
204 |
+
raise ValueError(f"'outputs' field is missing from '{exemplar}'.")
|
205 |
+
outputs = exemplar["outputs"]
|
206 |
if self.labels not in outputs:
|
207 |
raise ValueError(f"'{self.labels}' field is missing from '{outputs}'.")
|
208 |
|
209 |
+
exemplar_outputs = exemplar["outputs"][self.labels]
|
210 |
+
if not isinstance(exemplar_outputs, list):
|
211 |
raise ValueError(
|
212 |
+
f"Unexpected exemplar_outputs value '{exemplar_outputs}'. Expected a list."
|
213 |
)
|
214 |
|
215 |
+
return str([choice for choice in choices if choice in exemplar_outputs])
|
216 |
|
217 |
+
def divide_by_repr(self, exemplars_pool):
|
218 |
labels = {}
|
219 |
+
for exemplar in exemplars_pool:
|
220 |
+
label_repr = self.exemplar_repr(exemplar)
|
221 |
if label_repr == "[]" and not self.include_empty_label:
|
222 |
continue
|
223 |
if label_repr not in labels:
|
224 |
labels[label_repr] = []
|
225 |
+
labels[label_repr].append(exemplar)
|
226 |
return labels
|
227 |
|
228 |
def sample(
|