Spaces:
Sleeping
Sleeping
| import os | |
| import base64 | |
| from langchain.docstore.document import Document | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain.llms.openai import OpenAI | |
| from langchain.chains.summarize import load_summarize_chain | |
| from langchain.document_loaders import UnstructuredURLLoader | |
| import nltk | |
| import openai | |
| nltk.download('punkt') | |
| OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | |
| SCRAP_API_KEY = os.getenv("SCRAP_API_KEY") | |
| def create_brand_html(brand_link): | |
| urls = [brand_link] | |
| loader = UnstructuredURLLoader(urls=urls) | |
| data = loader.load() | |
| chunk_size = 3000 | |
| chunk_overlap = 200 | |
| text_splitter = CharacterTextSplitter( | |
| chunk_size=chunk_size, | |
| chunk_overlap=chunk_overlap, | |
| length_function=len, | |
| ) | |
| texts = text_splitter.split_text(data[0].page_content) | |
| docs = [Document(page_content=t) for t in texts[:]] | |
| return docs | |
| def create_langchain_openai_query(docs): | |
| openai.api_key = OPENAI_API_KEY | |
| llm = OpenAI(temperature=0, openai_api_key=openai.api_key) | |
| map_reduce_chain = load_summarize_chain(llm, chain_type="map_reduce") | |
| output = map_reduce_chain.run(docs) | |
| return output | |
| def create_screenshot_from_scrap_fly(link_to_fetch): | |
| import requests | |
| import random | |
| try: | |
| params = { | |
| 'key': SCRAP_API_KEY, | |
| 'url': link_to_fetch, | |
| 'auto_scroll': True, | |
| 'capture': 'fullpage', | |
| 'options': 'block_banners' | |
| } | |
| response = requests.get('https://api.scrapfly.io/screenshot', params=params) | |
| location = f"brand_ss_{random.randint(1, 100000000)}.png" | |
| with open(location, 'wb') as file: | |
| file.write(response.content) | |
| return {"location": location, "success": True} | |
| except Exception as e: | |
| return {"success": False, "error": e} | |
| def check_and_compress_image(image_path, output_path, target_size_mb=4, max_size_mb=5): | |
| from PIL import Image | |
| image_size = os.path.getsize(image_path) / (1024 * 1024) # Convert bytes to MB | |
| print(f"Original image size: {image_size:.2f} MB") | |
| if image_size > max_size_mb: | |
| img = Image.open(image_path) | |
| quality = 95 | |
| while image_size > target_size_mb and quality > 10: | |
| img.save(output_path, optimize=True, quality=quality) | |
| image_size = os.path.getsize(output_path) / (1024 * 1024) | |
| quality -= 5 | |
| return {"success": True} | |
| else: | |
| return {"success": False} | |