from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import Chroma from langchain.text_splitter import CharacterTextSplitter from langchain.llms import OpenAI from langchain.chains import RetrievalQA from langchain.document_loaders import TextLoader from langchain.prompts import PromptTemplate from langchain.chat_models import ChatOpenAI loader = TextLoader("") #put the path and name of the file here, if its in the same directory of the code file you can just use the target file name documents = loader.load() llm = ChatOpenAI(model = "gpt-3.5-turbo", temperature=0) //change the model to the one you want to use, tweak the temperature to see which one gives better answers text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0) # you can set the size of each doc chunk from your own doc texts = text_splitter.split_documents(documents) embeddings = OpenAIEmbeddings() #this will create the vector embeddings of your text docsearch = Chroma.from_documents(texts, embeddings) from langchain.chains import LLMChain prompt_template = """Use the context below to write a 400 word blog post about the topic below: Context: {context} Topic: {topic} Blog post:""" #this is the standard prompt template, you can change and experiment with it PROMPT = PromptTemplate( template=prompt_template, input_variables=["context", "topic"] ) chain = LLMChain(llm=llm, prompt=PROMPT) def generate_blog_post(topic): docs = search_index.similarity_search(topic, k=4) #k is basically how many chunks of context will be given to the LLM for each search, more could give more context, but it could cost more tokens or someties even confuse the model, test it and be aware inputs = [{"context": doc.page_content, "topic": topic} for doc in docs] print(chain.apply(inputs)) generate_blog_post("your question/subject")