Spaces:

asylwan
/

nk-test

Sleeping

App Files Files Community

nk-test / langchainTest.py

asylwan

Upload folder using huggingface_hub

c51674d 10 months ago

raw

history blame contribute delete

No virus

1.92 kB

	from langchain.embeddings.openai import OpenAIEmbeddings
	from langchain.vectorstores import Chroma
	from langchain.text_splitter import CharacterTextSplitter
	from langchain.llms import OpenAI
	from langchain.chains import RetrievalQA
	from langchain.document_loaders import TextLoader
	from langchain.prompts import PromptTemplate
	from langchain.chat_models import ChatOpenAI

	loader = TextLoader("") #put the path and name of the file here, if its in the same directory of the code file you can just use the target file name
	documents = loader.load()
	llm = ChatOpenAI(model = "gpt-3.5-turbo", temperature=0) //change the model to the one you want to use, tweak the temperature to see which one gives better answers
	text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0) # you can set the size of each doc chunk from your own doc
	texts = text_splitter.split_documents(documents)
	embeddings = OpenAIEmbeddings() #this will create the vector embeddings of your text
	docsearch = Chroma.from_documents(texts, embeddings)

	from langchain.chains import LLMChain
	prompt_template = """Use the context below to write a 400 word blog post about the topic below:
	Context: {context}
	Topic: {topic}
	Blog post:"""
	#this is the standard prompt template, you can change and experiment with it
	PROMPT = PromptTemplate(
	template=prompt_template, input_variables=["context", "topic"]
	)

	chain = LLMChain(llm=llm, prompt=PROMPT)


	def generate_blog_post(topic):
	docs = search_index.similarity_search(topic, k=4)
	#k is basically how many chunks of context will be given to the LLM for each search, more could give more context, but it could cost more tokens or someties even confuse the model, test it and be aware
	inputs = [{"context": doc.page_content, "topic": topic} for doc in docs]
	print(chain.apply(inputs))
	generate_blog_post("your question/subject")