added windows app focus option

will now bring an app to the front in windows. need to add logic where it will launch the app if its not open

minor issue is that it grabs the first instance of an app, so if multiple notepads are open, only one of them is grabbed
This commit is contained in:
maglore9900 2024-08-28 13:21:32 -04:00
parent 010aa0346e
commit 10e76f00b8
2 changed files with 68 additions and 4 deletions

View File

@ -1,6 +1,6 @@
from typing import TypedDict, Annotated, List, Union
import operator
from modules import adapter, spotify, app_launcher
from modules import adapter, spotify, app_launcher, windows_focus
from langchain_core.agents import AgentAction, AgentFinish
from langchain.agents import create_openai_tools_agent
from langchain import hub
@ -16,6 +16,7 @@ class Agent:
self.ad = adapter.Adapter()
self.sp = spotify.Spotify()
self.ap = app_launcher.AppLauncher()
self.wf = windows_focus.WindowFocusManager()
self.llm = self.ad.llm_chat
# self.final_answer_llm = self.llm.bind_tools(
# [self.rag_final_answer_tool], tool_choice="rag_final_answer"
@ -28,7 +29,8 @@ class Agent:
tools=[
# self.rag_final_answer_tool,
self.spotify,
self.app_launcher
self.app_launcher,
self.windows_focus
],
prompt=self.prompt,
)
@ -62,6 +64,14 @@ class Agent:
pass the name of the app to this tool as app_name
"""
@tool("windows_focus")
async def windows_focus(self, app_name: str):
"""Use this tool to focus on a window on your computer.
The user query will contain the app name, as well as focus, switch, show, or similar type words
pass the name of the app to this tool as app_name
"""
return ""
# @tool("rag_final_answer")
# async def rag_final_answer_tool(self, answer: str, source: str):
# """Returns a natural language response to the user in `answer`, and a
@ -75,6 +85,7 @@ class Agent:
self.graph.add_node("query_agent", self.run_query_agent)
self.graph.add_node("spotify", self.spotify_tool)
self.graph.add_node("app_launcher", self.app_launcher_tool)
self.graph.add_node("windows_focus", self.windows_focus_tool)
# self.graph.add_node("rag_final_answer", self.rag_final_answer)
# self.graph.add_node("error", self.rag_final_answer)
self.graph.add_node("respond", self.respond)
@ -89,10 +100,12 @@ class Agent:
# "error": "error",
"respond": "respond",
"app_launcher": "app_launcher",
"windows_focus": "windows_focus"
},
)
self.graph.add_edge("spotify", END)
self.graph.add_edge("app_launcher", END)
self.graph.add_edge("windows_focus", END)
# self.graph.add_edge("error", END)
# self.graph.add_edge("rag_final_answer", END)
# self.graph.add_edge("query_agent", END)
@ -136,13 +149,19 @@ class Agent:
tool_action = state['agent_out'][0]
app_name = tool_action.tool_input['app_name']
print(f"app_name: {app_name}")
# print(f"search: {search}")
self.ap.find_and_open_app(app_name)
async def windows_focus_tool(self, state: str):
print("> windows_focus_tool")
print(f"state: {state}")
tool_action = state['agent_out'][0]
app_name = tool_action.tool_input['app_name']
print(f"app_name: {app_name}")
self.wf.bring_specific_instance_to_front(app_name)
async def respond(self, answer: str):
print("> respond")
print(f"answer: {answer}")
# answer = answer.agent_out.return_values.get('output', None)
agent_out = answer.get('agent_out')
output_value = agent_out.return_values.get('output', None)
return {"agent_out": output_value}

45
modules/windows_focus.py Normal file
View File

@ -0,0 +1,45 @@
import win32gui
import win32con
class WindowFocusManager:
def __init__(self):
self.windows = []
def enum_windows_callback(self, hwnd, window_list):
# Append the window handle and title to the list if it's visible
if win32gui.IsWindowVisible(hwnd):
window_list.append((hwnd, win32gui.GetWindowText(hwnd)))
def find_windows(self, partial_window_title):
self.windows = []
win32gui.EnumWindows(self.enum_windows_callback, self.windows)
# Filter windows that match the partial title
matching_windows = [hwnd for hwnd, title in self.windows if partial_window_title.lower() in title.lower()]
return matching_windows
def bring_window_to_front(self, hwnd):
# Bring the window to the foreground
win32gui.SetForegroundWindow(hwnd)
# If the window is minimized, restore it
if win32gui.IsIconic(hwnd):
win32gui.ShowWindow(hwnd, win32con.SW_RESTORE)
window_title = win32gui.GetWindowText(hwnd)
print(f"Brought window '{window_title}' to the front.")
def bring_specific_instance_to_front(self, partial_window_title):
matching_windows = self.find_windows(partial_window_title)
if matching_windows:
# If there are multiple matches, select the first one (or customize as needed)
hwnd = matching_windows[0]
self.bring_window_to_front(hwnd)
else:
print(f"No windows found with title containing '{partial_window_title}'.")
# Example usage:
# window_manager = WindowFocusManager()
# window_manager.bring_specific_instance_to_front("outlook") # Bring the first matching instance of Visual Studio Code to the front
# window_manager.bring_specific_instance_to_front("Notepad") # Bring the first matching instance of Notepad to the front