Spaces:
Sleeping
Sleeping
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.document_loaders import UnstructuredURLLoader | |
| from langchain.vectorstores.faiss import FAISS | |
| from langchain.embeddings import OpenAIEmbeddings | |
| import pickle | |
| # Load Data | |
| urls = [ | |
| "https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-february-4-2023", | |
| "https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-february-6-2023", | |
| "https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-february-7-2023", | |
| "https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-february-8-2023", | |
| "https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-february-9-2023", | |
| ] | |
| loader = UnstructuredURLLoader(urls=urls) | |
| raw_documents = loader.load() | |
| # Split text | |
| text_splitter = RecursiveCharacterTextSplitter() | |
| documents = text_splitter.split_documents(raw_documents) | |
| # Load Data to vectorstore | |
| embeddings = OpenAIEmbeddings() | |
| vectorstore = FAISS.from_documents(documents, embeddings) | |
| # Save vectorstore | |
| with open("vectorstore.pkl", "wb") as f: | |
| pickle.dump(vectorstore, f) | |