Spaces:

unitxt
/

metric

Running

App Files Files Community

Elron commited on Mar 13

Commit

460af71

•

1 Parent(s): 37ba515

Upload operators.py with huggingface_hub

Browse files

Files changed (1) hide show

operators.py +32 -22

operators.py CHANGED Viewed

@@ -180,7 +180,7 @@ class MapInstanceValues(StreamInstanceOperator):
             if value is not None:
                 if (self.process_every_value is True) and (not isinstance(value, list)):
                     raise ValueError(
-                        f"'process_every_field' == True is allowed only when all fields which have mappers, i.e., {list(self.mappers.keys())} are lists. Instace = {instance}"
                     )
                 if isinstance(value, list) and self.process_every_value:
                     for i, val in enumerate(value):
@@ -285,7 +285,7 @@ class RemoveFields(StreamInstanceOperator):
         return instance
-class FieldOperator(StreamInstanceOperator):
     """A general stream instance operator that processes the values of a field (or multiple ones).
     Args:
@@ -336,7 +336,7 @@ class FieldOperator(StreamInstanceOperator):
         # self._field_to_field is built explicitly by pairs, or copied from argument 'field_to_field'
         if self.field_to_field is None:
             return
-        # for backward compatibility also allow list of tupples of two strings
         if isoftype(self.field_to_field, List[List[str]]) or isoftype(
             self.field_to_field, List[Tuple[str, str]]
         ):
@@ -365,7 +365,7 @@ class FieldOperator(StreamInstanceOperator):
         )
     @abstractmethod
-    def process_value(self, value: Any) -> Any:
         pass
     def prepare(self):
@@ -408,9 +408,12 @@ class FieldOperator(StreamInstanceOperator):
                 ) from e
             try:
                 if self.process_every_value:
-                    new_value = [self.process_value(value) for value in old_value]
                 else:
-                    new_value = self.process_value(old_value)
             except Exception as e:
                 raise ValueError(
                     f"Failed to process '{from_field}' from {instance} due to : {e}"
@@ -427,6 +430,15 @@ class FieldOperator(StreamInstanceOperator):
         return instance
 class RenameFields(FieldOperator):
     """Renames fields.
@@ -773,11 +785,11 @@ class Apply(StreamInstanceOperator):
         return ".".join(parts)
     def str_to_function(self, function_str: str) -> Callable:
-        splitted = function_str.split(".", 1)
-        if len(splitted) == 1:
-            return __builtins__[splitted[0]]
-        module_name, function_name = splitted
         if module_name in __builtins__:
             obj = __builtins__[module_name]
         elif module_name in globals():
@@ -892,34 +904,32 @@ class TakeByField(StreamInstanceOperator):
         return instance
-class Perturbate(FieldOperator):
-    """Slightly perturbates the contents of 'field'. Could be Handy for imitating prediction from given target.
     When task was classification, argument 'select_from' can be used to list the other potential classes, as a
     relevant perturbation
     """
     select_from: List[Any] = []
-    percentage_to_perturbate: int = 1  # 1 percent
     def verify(self):
         assert (
-            0 <= self.percentage_to_perturbate and self.percentage_to_perturbate <= 100
-        ), f"'percentage_to_perturbate' should be in the range 0..100. Received {self.percentage_to_perturbate}"
     def prepare(self):
         super().prepare()
         self.random_generator = new_random_generator(sub_seed="CopyWithPerturbation")
     def process_value(self, value: Any) -> Any:
-        perturbate = (
-            self.random_generator.randint(1, 100) <= self.percentage_to_perturbate
-        )
-        if not perturbate:
             return value
         if value in self.select_from:
-            # 80% of cases, return a decent class, otherwise, perturbate the value itself as follows
             if self.random_generator.random() < 0.8:
                 return self.random_generator.choice(self.select_from)
@@ -1397,7 +1407,7 @@ class ExtractMostCommonFieldValues(MultiStreamOperator):
             else:
                 # content of 'field' is a list and process_every_value == True: add one occurrence on behalf of each individual value
                 counter.update(instance[self.field])
-        # here counter counts occurrences of individual values, or tupples.
         values_and_counts = counter.most_common()
         if self.overall_top_frequency_percent < 100:
             top_frequency = (
@@ -1606,7 +1616,7 @@ class ApplyMetric(SingleStreamOperator, ArtifactFetcherMixin):
         # by the first listed metric (as desired).
         metric_names = list(reversed(metric_names))
-        # Workaround: The metric/MetricPipeline modifies the stream itself, sometines making it incompatible
         # for further metrics' processing, instead of just modifying the score field.
         # Here we keep all the fields besides the score, and restore them after the metric finishes.
         first_instance = stream.peek()

             if value is not None:
                 if (self.process_every_value is True) and (not isinstance(value, list)):
                     raise ValueError(
+                        f"'process_every_field' == True is allowed only when all fields which have mappers, i.e., {list(self.mappers.keys())} are lists. Instance = {instance}"
                     )
                 if isinstance(value, list) and self.process_every_value:
                     for i, val in enumerate(value):
         return instance
+class InstanceFieldOperator(StreamInstanceOperator):
     """A general stream instance operator that processes the values of a field (or multiple ones).
     Args:
         # self._field_to_field is built explicitly by pairs, or copied from argument 'field_to_field'
         if self.field_to_field is None:
             return
+        # for backward compatibility also allow list of tuples of two strings
         if isoftype(self.field_to_field, List[List[str]]) or isoftype(
             self.field_to_field, List[Tuple[str, str]]
         ):
         )
     @abstractmethod
+    def process_instance_value(self, value: Any, instance: Dict[str, Any]):
         pass
     def prepare(self):
                 ) from e
             try:
                 if self.process_every_value:
+                    new_value = [
+                        self.process_instance_value(value, instance)
+                        for value in old_value
+                    ]
                 else:
+                    new_value = self.process_instance_value(old_value, instance)
             except Exception as e:
                 raise ValueError(
                     f"Failed to process '{from_field}' from {instance} due to : {e}"
         return instance
+class FieldOperator(InstanceFieldOperator):
+    def process_instance_value(self, value: Any, instance: Dict[str, Any]):
+        return self.process_value(value)
+    @abstractmethod
+    def process_value(self, value: Any) -> Any:
+        pass
 class RenameFields(FieldOperator):
     """Renames fields.
         return ".".join(parts)
     def str_to_function(self, function_str: str) -> Callable:
+        parts = function_str.split(".", 1)
+        if len(parts) == 1:
+            return __builtins__[parts[0]]
+        module_name, function_name = parts
         if module_name in __builtins__:
             obj = __builtins__[module_name]
         elif module_name in globals():
         return instance
+class Perturb(FieldOperator):
+    """Slightly perturbs the contents of 'field'. Could be Handy for imitating prediction from given target.
     When task was classification, argument 'select_from' can be used to list the other potential classes, as a
     relevant perturbation
     """
     select_from: List[Any] = []
+    percentage_to_perturb: int = 1  # 1 percent
     def verify(self):
         assert (
+            0 <= self.percentage_to_perturb and self.percentage_to_perturb <= 100
+        ), f"'percentage_to_perturb' should be in the range 0..100. Received {self.percentage_to_perturb}"
     def prepare(self):
         super().prepare()
         self.random_generator = new_random_generator(sub_seed="CopyWithPerturbation")
     def process_value(self, value: Any) -> Any:
+        perturb = self.random_generator.randint(1, 100) <= self.percentage_to_perturb
+        if not perturb:
             return value
         if value in self.select_from:
+            # 80% of cases, return a decent class, otherwise, perturb the value itself as follows
             if self.random_generator.random() < 0.8:
                 return self.random_generator.choice(self.select_from)
             else:
                 # content of 'field' is a list and process_every_value == True: add one occurrence on behalf of each individual value
                 counter.update(instance[self.field])
+        # here counter counts occurrences of individual values, or tuples.
         values_and_counts = counter.most_common()
         if self.overall_top_frequency_percent < 100:
             top_frequency = (
         # by the first listed metric (as desired).
         metric_names = list(reversed(metric_names))
+        # Workaround: The metric/MetricPipeline modifies the stream itself, sometimes making it incompatible
         # for further metrics' processing, instead of just modifying the score field.
         # Here we keep all the fields besides the score, and restore them after the metric finishes.
         first_instance = stream.peek()