Spaces:

ka1kuk
/

LLM-api

Running

App Files Files Community

ka1kuk commited on Feb 24

Commit

73c21f4

•

1 Parent(s): 03c75c9

Update apis/chat_api.py

Browse files

Files changed (1) hide show

apis/chat_api.py +25 -6

apis/chat_api.py CHANGED Viewed

@@ -97,19 +97,29 @@ class ChatAPIApp:
             description="(list) Messages",
         )
         temperature: Union[float, None] = Field(
-            default=0,
             description="(float) Temperature",
         )
         max_tokens: Union[int, None] = Field(
             default=-1,
             description="(int) Max tokens",
         )
-        stream: bool = Field(
             default=False,
             description="(bool) Stream",
         )
-    def chat_completions(self, item: ChatCompletionsPostItem, api_key: str = Depends(extract_api_key)):
         streamer = MessageStreamer(model=item.model)
         composer = MessageComposer(model=item.model)
         composer.merge(messages=item.messages)
@@ -118,8 +128,10 @@ class ChatAPIApp:
         stream_response = streamer.chat_response(
             prompt=composer.merged_str,
             temperature=item.temperature,
             max_new_tokens=item.max_tokens,
             api_key=api_key,
         )
         if item.stream:
             event_source_response = EventSourceResponse(
@@ -133,7 +145,16 @@ class ChatAPIApp:
             data_response = streamer.chat_return_dict(stream_response)
             return data_response
-     def setup_routes(self):
         for prefix in ["", "/v1", "/api", "/api/v1"]:
             if prefix in ["/api/v1"]:
                 include_in_schema = True
@@ -153,8 +174,6 @@ class ChatAPIApp:
             )(self.chat_completions)
 class ArgParser(argparse.ArgumentParser):
     def __init__(self, *args, **kwargs):
         super(ArgParser, self).__init__(*args, **kwargs)

             description="(list) Messages",
         )
         temperature: Union[float, None] = Field(
+            default=0.5,
             description="(float) Temperature",
         )
+        top_p: Union[float, None] = Field(
+            default=0.95,
+            description="(float) top p",
+        )
         max_tokens: Union[int, None] = Field(
             default=-1,
             description="(int) Max tokens",
         )
+        use_cache: bool = Field(
             default=False,
+            description="(bool) Use cache",
+        )
+        stream: bool = Field(
+            default=True,
             description="(bool) Stream",
         )
+    def chat_completions(
+        self, item: ChatCompletionsPostItem, api_key: str = Depends(extract_api_key)
+    ):
         streamer = MessageStreamer(model=item.model)
         composer = MessageComposer(model=item.model)
         composer.merge(messages=item.messages)
         stream_response = streamer.chat_response(
             prompt=composer.merged_str,
             temperature=item.temperature,
+            top_p=item.top_p,
             max_new_tokens=item.max_tokens,
             api_key=api_key,
+            use_cache=item.use_cache,
         )
         if item.stream:
             event_source_response = EventSourceResponse(
             data_response = streamer.chat_return_dict(stream_response)
             return data_response
+    def get_readme(self):
+        readme_path = Path(__file__).parents[1] / "README.md"
+        with open(readme_path, "r", encoding="utf-8") as rf:
+            readme_str = rf.read()
+        readme_html = markdown2.markdown(
+            readme_str, extras=["table", "fenced-code-blocks", "highlightjs-lang"]
+        )
+        return readme_html
+    def setup_routes(self):
         for prefix in ["", "/v1", "/api", "/api/v1"]:
             if prefix in ["/api/v1"]:
                 include_in_schema = True
             )(self.chat_completions)
 class ArgParser(argparse.ArgumentParser):
     def __init__(self, *args, **kwargs):
         super(ArgParser, self).__init__(*args, **kwargs)