nk-test / langchainTest.py
asylwan's picture
Upload folder using huggingface_hub
c51674d
raw
history blame contribute delete
No virus
1.92 kB
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
loader = TextLoader("") #put the path and name of the file here, if its in the same directory of the code file you can just use the target file name
documents = loader.load()
llm = ChatOpenAI(model = "gpt-3.5-turbo", temperature=0) //change the model to the one you want to use, tweak the temperature to see which one gives better answers
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0) # you can set the size of each doc chunk from your own doc
texts = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings() #this will create the vector embeddings of your text
docsearch = Chroma.from_documents(texts, embeddings)
from langchain.chains import LLMChain
prompt_template = """Use the context below to write a 400 word blog post about the topic below:
Context: {context}
Topic: {topic}
Blog post:"""
#this is the standard prompt template, you can change and experiment with it
PROMPT = PromptTemplate(
template=prompt_template, input_variables=["context", "topic"]
)
chain = LLMChain(llm=llm, prompt=PROMPT)
def generate_blog_post(topic):
docs = search_index.similarity_search(topic, k=4)
#k is basically how many chunks of context will be given to the LLM for each search, more could give more context, but it could cost more tokens or someties even confuse the model, test it and be aware
inputs = [{"context": doc.page_content, "topic": topic} for doc in docs]
print(chain.apply(inputs))
generate_blog_post("your question/subject")