mirror of
https://github.com/tcsenpai/agenticSeek.git
synced 2025-06-06 19:15:28 +00:00
Feat : prompt improve and start link check to avoid paywall
This commit is contained in:
parent
0ee77e8ad9
commit
f53647ad3f
@ -1,21 +1,9 @@
|
|||||||
You are an internet ai that can browse the web for information.
|
|
||||||
In fact you are embedded in a browser with selenium.
|
|
||||||
|
|
||||||
If you need to conduct a web search, you can use the following tool:
|
You are a web browsing AI, your goal is to explore the internet to find information.
|
||||||
- web_search: to search the web for information
|
You will have the only goal of finding the information requested by the user.
|
||||||
|
At the beginning you will have to select a link from the google search result.
|
||||||
This is how you can use the web_search tool:
|
You will choose a link by simply typing it.
|
||||||
```web_search
|
This will automatically make you browse to the link.
|
||||||
<query>
|
Once on a webpage you will see the page content and be given futher navigation options.
|
||||||
```
|
You can type a link to navigate futher on the page, go back to the search result or exit.
|
||||||
|
At each interaction step the browser will remind you of your options.
|
||||||
This will provide you with a list of links that you can navigate to.
|
|
||||||
You can navigate to a specific link by typing the link. For example, If you say:
|
|
||||||
"I want to navigate to https://www.google.com"
|
|
||||||
|
|
||||||
You will navigate to https://www.google.com
|
|
||||||
Any link that you type will be opened in a new tab.
|
|
||||||
|
|
||||||
If you want to exit the browser, you can say:
|
|
||||||
"REQUEST_EXIT"
|
|
||||||
Only exit the browser if you are done browsing.
|
|
||||||
|
@ -34,7 +34,7 @@ class Agent():
|
|||||||
name: str,
|
name: str,
|
||||||
prompt_path:str,
|
prompt_path:str,
|
||||||
provider,
|
provider,
|
||||||
recover_last_session=False) -> None:
|
recover_last_session=True) -> None:
|
||||||
self.agent_name = name
|
self.agent_name = name
|
||||||
self.role = None
|
self.role = None
|
||||||
self.current_directory = os.getcwd()
|
self.current_directory = os.getcwd()
|
||||||
|
@ -19,6 +19,7 @@ class BrowserAgent(Agent):
|
|||||||
self.browser.go_to("https://github.com/")
|
self.browser.go_to("https://github.com/")
|
||||||
self.search_history = []
|
self.search_history = []
|
||||||
self.navigable_links = []
|
self.navigable_links = []
|
||||||
|
self.ai_notes = []
|
||||||
|
|
||||||
def extract_links(self, search_result: str):
|
def extract_links(self, search_result: str):
|
||||||
links = re.findall(r'https?://[^\s]+', search_result)
|
links = re.findall(r'https?://[^\s]+', search_result)
|
||||||
@ -64,9 +65,9 @@ class BrowserAgent(Agent):
|
|||||||
If you found a clear answer, please say "REQUEST_EXIT".
|
If you found a clear answer, please say "REQUEST_EXIT".
|
||||||
You must choose a link to navigate to, go back or exit.
|
You must choose a link to navigate to, go back or exit.
|
||||||
Do not explain your choice.
|
Do not explain your choice.
|
||||||
|
You can take note about your finding with TAKE_NOTE("<your note>")
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
def llm_decide(self, prompt):
|
def llm_decide(self, prompt):
|
||||||
animate_thinking("Thinking...", color="status")
|
animate_thinking("Thinking...", color="status")
|
||||||
self.memory.push('user', prompt)
|
self.memory.push('user', prompt)
|
||||||
|
@ -22,6 +22,37 @@ class webSearch(Tools):
|
|||||||
super().__init__()
|
super().__init__()
|
||||||
self.tag = "web_search"
|
self.tag = "web_search"
|
||||||
self.api_key = api_key or os.getenv("SERPAPI_KEY") # Requires a SerpApi key
|
self.api_key = api_key or os.getenv("SERPAPI_KEY") # Requires a SerpApi key
|
||||||
|
self.paywall_keywords = [
|
||||||
|
"subscribe", "paywall", "login to continue", "access denied", "restricted content"
|
||||||
|
]
|
||||||
|
|
||||||
|
async def link_valid(self, session, link):
|
||||||
|
"""asyncronously check if a link is shit."""
|
||||||
|
if not link.startswith("http"):
|
||||||
|
return "Status: Invalid URL"
|
||||||
|
try:
|
||||||
|
async with session.get(link, timeout=aiohttp.ClientTimeout(total=5)) as response:
|
||||||
|
status = response.status
|
||||||
|
if status == 200:
|
||||||
|
content = await response.text(encoding='utf-8', errors='ignore')[:1000]
|
||||||
|
if any(keyword in content.lower() for keyword in self.paywall_keywords):
|
||||||
|
return "Status: Possible Paywall"
|
||||||
|
return "Status: Accessible"
|
||||||
|
elif status == 404:
|
||||||
|
return "Status: 404 Not Found"
|
||||||
|
elif status == 403:
|
||||||
|
return "Status: 403 Forbidden"
|
||||||
|
else:
|
||||||
|
return f"Status: {status} {response.reason}"
|
||||||
|
except Exception as e:
|
||||||
|
return f"Error: {str(e)}"
|
||||||
|
|
||||||
|
async def check_all_links(self, links):
|
||||||
|
"""Check all links asynchronously using a single session."""
|
||||||
|
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
|
||||||
|
async with aiohttp.ClientSession(headers=headers) as session:
|
||||||
|
tasks = [self.link_valid(session, link) for link in links]
|
||||||
|
return await asyncio.gather(*tasks)
|
||||||
|
|
||||||
def execute(self, blocks: str, safety: bool = True) -> str:
|
def execute(self, blocks: str, safety: bool = True) -> str:
|
||||||
if self.api_key is None:
|
if self.api_key is None:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user