Spaces:

unitxt
/

metric

Running

App Files Files Community

Elron commited on Mar 5

Commit

cd9d84b

•

1 Parent(s): 99b503f

Upload standard.py with huggingface_hub

Browse files

Files changed (1) hide show

standard.py +10 -14

standard.py CHANGED Viewed

@@ -3,7 +3,6 @@ from typing import List
 from .card import TaskCard
 from .dataclass import Field, InternalField, OptionalField
 from .formats import Format, SystemFormat
-from .instructions import EmptyInstruction, Instruction
 from .logging_utils import get_logger
 from .operator import SourceSequentialOperator, StreamingOperator
 from .operators import (
@@ -14,6 +13,7 @@ from .operators import (
 from .recipe import Recipe
 from .schema import ToUnitxtGroup
 from .splitters import Sampler, SeparateSplit, SpreadSplit
 from .templates import Template
 logger = get_logger()
@@ -31,7 +31,7 @@ class AddDemosField(SpreadSplit):
 class BaseRecipe(Recipe, SourceSequentialOperator):
     card: TaskCard
     template: Template = None
-    instruction: Instruction = Field(default_factory=EmptyInstruction)
     format: Format = Field(default_factory=SystemFormat)
     loader_limit: int = None
@@ -46,6 +46,7 @@ class BaseRecipe(Recipe, SourceSequentialOperator):
     demos_pool_size: int = None
     num_demos: int = 0
     demos_pool_name: str = "demos_pool"
     demos_taken_from: str = "train"
@@ -135,6 +136,7 @@ class BaseRecipe(Recipe, SourceSequentialOperator):
                     from_split=self.demos_taken_from,
                     to_split_names=[self.demos_pool_name, self.demos_taken_from],
                     to_split_sizes=[int(self.demos_pool_size)],
                 )
             )
@@ -160,7 +162,7 @@ class BaseRecipe(Recipe, SourceSequentialOperator):
                     sampler=self.sampler,
                 )
             )
-        self.steps.append(self.instruction)
         self.steps.append(self.format)
         if self.augmentor.augment_model_input:
             self.steps.append(self.augmentor)
@@ -177,7 +179,6 @@ class BaseRecipe(Recipe, SourceSequentialOperator):
 class StandardRecipeWithIndexes(BaseRecipe):
-    instruction_card_index: int = None
     template_card_index: int = None
     def prepare(self):
@@ -192,17 +193,12 @@ class StandardRecipeWithIndexes(BaseRecipe):
                 self.template = self.card.templates[self.template_card_index]
             except Exception as e:
                 if isinstance(self.card.templates, dict):
-                    options = self.card.templates.keys()
                 else:
                     options = list(range(0, len(self.card.templates)))
                 raise ValueError(
-                    f"card_template_index '{self.template_card_index}' is not in card. Available options: {options}"
                 ) from e
-        assert (
-            self.instruction_card_index is None or self.instruction is None
-        ), "Specify either instruction or instruction_card_index"
-        if self.instruction_card_index is not None:
-            self.instruction = self.card.instructions[int(self.instruction_card_index)]
         super().prepare()
@@ -217,7 +213,7 @@ class StandardRecipe(StandardRecipeWithIndexes):
     Attributes:
         card (TaskCard): TaskCard object associated with the recipe.
         template (Template, optional): Template object to be used for the recipe.
-        instruction (Instruction, optional): Instruction object to be used for the recipe.
         loader_limit (int, optional): Specifies the maximum number of instances per stream to be returned from the loader (used to reduce loading time in large datasets)
         format (SystemFormat, optional): SystemFormat object to be used for the recipe.
         train_refiner (StreamRefiner, optional): Train refiner to be used in the recipe.
@@ -231,6 +227,7 @@ class StandardRecipe(StandardRecipeWithIndexes):
         demos_pool_name (str, optional): Name of the demos pool. Default is "demos_pool".
         demos_taken_from (str, optional): Specifies from where the demos are taken. Default is "train".
         demos_field (str, optional): Field name for demos. Default is "demos".
         sampler (Sampler, optional): Sampler object to be used in the recipe.
         steps (List[StreamingOperator], optional): List of StreamingOperator objects to be used in the recipe.
         augmentor (Augmentor) : Augmentor to be used to pseudo randomly augment the source text
@@ -244,8 +241,7 @@ class StandardRecipe(StandardRecipeWithIndexes):
             by arranging all the steps, refiners, and renderers in a sequential manner.
     Raises:
-        AssertionError: If both template and template_card_index, or instruction and instruction_card_index
-            are specified at the same time.
     """
     pass

 from .card import TaskCard
 from .dataclass import Field, InternalField, OptionalField
 from .formats import Format, SystemFormat
 from .logging_utils import get_logger
 from .operator import SourceSequentialOperator, StreamingOperator
 from .operators import (
 from .recipe import Recipe
 from .schema import ToUnitxtGroup
 from .splitters import Sampler, SeparateSplit, SpreadSplit
+from .system_prompts import EmptySystemPrompt, SystemPrompt
 from .templates import Template
 logger = get_logger()
 class BaseRecipe(Recipe, SourceSequentialOperator):
     card: TaskCard
     template: Template = None
+    system_prompt: SystemPrompt = Field(default_factory=EmptySystemPrompt)
     format: Format = Field(default_factory=SystemFormat)
     loader_limit: int = None
     demos_pool_size: int = None
     num_demos: int = 0
+    demos_removed_from_data: bool = True
     demos_pool_name: str = "demos_pool"
     demos_taken_from: str = "train"
                     from_split=self.demos_taken_from,
                     to_split_names=[self.demos_pool_name, self.demos_taken_from],
                     to_split_sizes=[int(self.demos_pool_size)],
+                    remove_targets_from_source_split=self.demos_removed_from_data,
                 )
             )
                     sampler=self.sampler,
                 )
             )
+        self.steps.append(self.system_prompt)
         self.steps.append(self.format)
         if self.augmentor.augment_model_input:
             self.steps.append(self.augmentor)
 class StandardRecipeWithIndexes(BaseRecipe):
     template_card_index: int = None
     def prepare(self):
                 self.template = self.card.templates[self.template_card_index]
             except Exception as e:
                 if isinstance(self.card.templates, dict):
+                    options = list(self.card.templates.keys())
                 else:
                     options = list(range(0, len(self.card.templates)))
                 raise ValueError(
+                    f"card_template_index '{self.template_card_index}' is not defined in card. Possible card_template_index options: {options}"
                 ) from e
         super().prepare()
     Attributes:
         card (TaskCard): TaskCard object associated with the recipe.
         template (Template, optional): Template object to be used for the recipe.
+        system_prompt (SystemPrompt, optional): SystemPrompt object to be used for the recipe.
         loader_limit (int, optional): Specifies the maximum number of instances per stream to be returned from the loader (used to reduce loading time in large datasets)
         format (SystemFormat, optional): SystemFormat object to be used for the recipe.
         train_refiner (StreamRefiner, optional): Train refiner to be used in the recipe.
         demos_pool_name (str, optional): Name of the demos pool. Default is "demos_pool".
         demos_taken_from (str, optional): Specifies from where the demos are taken. Default is "train".
         demos_field (str, optional): Field name for demos. Default is "demos".
+        demos_removed_from_data (bool, optional): whether to remove the demos from the source data, Default is True
         sampler (Sampler, optional): Sampler object to be used in the recipe.
         steps (List[StreamingOperator], optional): List of StreamingOperator objects to be used in the recipe.
         augmentor (Augmentor) : Augmentor to be used to pseudo randomly augment the source text
             by arranging all the steps, refiners, and renderers in a sequential manner.
     Raises:
+        AssertionError: If both template and template_card_index are specified at the same time.
     """
     pass