From 5e7dd321f0638d3464da2e9bfdde60c98e53c174 Mon Sep 17 00:00:00 2001 From: martin legrand Date: Sat, 22 Mar 2025 12:12:11 +0100 Subject: [PATCH 1/7] Fix : pyaudio.paInt16 missing in AudioRecorder class init --- Dockerfile | 26 ++++++++++++++++++++++++++ requirements.txt | 12 ++++++------ server/Dockerfile | 14 ++++++++++++++ server/requirements.txt | 2 ++ server/{server_ollama.py => server.py} | 5 ++++- setup.py | 3 +-- 6 files changed, 53 insertions(+), 9 deletions(-) create mode 100644 Dockerfile create mode 100644 server/Dockerfile create mode 100644 server/requirements.txt rename server/{server_ollama.py => server.py} (95%) diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..6f010a0 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,26 @@ +# Use official Python 3.11 image as the base +FROM python:3.11 + +# Set working directory +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + gcc \ + g++ \ + gfortran \ + libportaudio2 \ + portaudio19-dev \ + ffmpeg \ + libavcodec-dev \ + libavformat-dev \ + libavutil-dev \ + chromium \ + chromium-driver \ + && rm -rf /var/lib/apt/lists/* + +RUN pip cache purge + +COPY . . + +RUN BLIS_ARCH=generic pip install --no-cache-dir -r requirements.txt \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index db9b52c..e216504 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,6 @@ +setuptools>=75.6.0 requests>=2.31.0 +numpy>=1.24.4 colorama>=0.4.6 python-dotenv>=1.0.0 playsound>=1.3.0 @@ -7,17 +9,15 @@ transformers>=4.46.3 torch>=2.4.1 python-dotenv>=1.0.0 ollama>=0.4.7 -scipy>=1.15.1 +scipy>=1.9.3 kokoro>=0.7.12 -flask>=3.1.0 soundfile>=0.13.1 protobuf>=3.20.3 -termcolor>=2.5.0 -ipython>=8.34.0 -gliclass>=0.1.8 +termcolor>=2.4.0 +ipython>=8.13.0 pyaudio>=0.2.14 librosa>=0.10.2.post1 -selenium>=4.29.0 +selenium>=4.27.1 markdownify>=1.1.0 text2emotion>=0.0.5 langid>=1.1.6 diff --git a/server/Dockerfile b/server/Dockerfile new file mode 100644 index 0000000..98287c0 --- /dev/null +++ b/server/Dockerfile @@ -0,0 +1,14 @@ +FROM ubuntu:20.04 + +WORKDIR /app + +RUN apt-get update && \ + apt-get install -y python3 python3-pip && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +COPY requirements.txt . + +RUN pip3 install --no-cache-dir -r requirements.txt + +CMD ["python3", "--version"] \ No newline at end of file diff --git a/server/requirements.txt b/server/requirements.txt new file mode 100644 index 0000000..7709179 --- /dev/null +++ b/server/requirements.txt @@ -0,0 +1,2 @@ +flask>=2.3.0 +ollama>=0.4.7 \ No newline at end of file diff --git a/server/server_ollama.py b/server/server.py similarity index 95% rename from server/server_ollama.py rename to server/server.py index c90d0a9..5bb4269 100644 --- a/server/server_ollama.py +++ b/server/server.py @@ -37,7 +37,10 @@ class GenerationState: state = GenerationState() -def generate_response(history): # Only takes history as an argument +def generate_response_vllm(history): + pass + +def generate_response_ollama(history): # Only takes history as an argument global state try: with state.lock: diff --git a/setup.py b/setup.py index 002f842..bde5055 100644 --- a/setup.py +++ b/setup.py @@ -24,12 +24,11 @@ setup( "transformers>=4.46.3", "torch>=2.4.1", "ollama>=0.4.7", - "scipy>=1.15.1", + "scipy>=1.9.3", "kokoro>=0.7.12", "flask>=3.1.0", "protobuf>=3.20.3", "termcolor>=2.5.0", - "gliclass>=0.1.8", "ipython>=8.34.0", "librosa>=0.10.2.post1", "selenium>=4.29.0", From 47b3bcf297c9a24f29fd35f07bd932d444db1b8f Mon Sep 17 00:00:00 2001 From: martin legrand Date: Sat, 22 Mar 2025 12:36:43 +0100 Subject: [PATCH 2/7] fix : wheel error on some linux system --- scripts/linux_install.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/linux_install.sh b/scripts/linux_install.sh index c43a41e..c0738b5 100644 --- a/scripts/linux_install.sh +++ b/scripts/linux_install.sh @@ -7,6 +7,9 @@ sudo apt-get update pip install --upgrade pip +# install wheel +pip install --upgrade pip setuptools wheel + # Install Python dependencies from requirements.txt pip3 install -r requirements.txt From 771ac22d7f157e004ea2d48f960260b1fee19849 Mon Sep 17 00:00:00 2001 From: martin legrand Date: Sat, 22 Mar 2025 12:39:37 +0100 Subject: [PATCH 3/7] install: ensure port audio installation --- scripts/linux_install.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/linux_install.sh b/scripts/linux_install.sh index c0738b5..f8998c2 100644 --- a/scripts/linux_install.sh +++ b/scripts/linux_install.sh @@ -7,6 +7,8 @@ sudo apt-get update pip install --upgrade pip +# install port audio +sudo apt-get install portaudio19-dev python-pyaudio python3-pyaudio # install wheel pip install --upgrade pip setuptools wheel From 76f52846debd1c6d9843e4c11260b1b3ef84ab88 Mon Sep 17 00:00:00 2001 From: martin legrand Date: Sat, 22 Mar 2025 12:44:41 +0100 Subject: [PATCH 4/7] feat : dont import pyaudio if stt not enabled --- sources/speech_to_text.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sources/speech_to_text.py b/sources/speech_to_text.py index 2d76c11..016ac2f 100644 --- a/sources/speech_to_text.py +++ b/sources/speech_to_text.py @@ -3,10 +3,8 @@ import queue import threading import numpy as np import torch -from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline import time -import librosa -import pyaudio +from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline audio_queue = queue.Queue() done = False @@ -16,6 +14,7 @@ class AudioRecorder: AudioRecorder is a class that records audio from the microphone and adds it to the audio queue. """ def __init__(self, format: int = pyaudio.paInt16, channels: int = 1, rate: int = 4096, chunk: int = 8192, record_seconds: int = 5, verbose: bool = False): + import librosa import pyaudio self.format = format self.channels = channels From bc38385fe90fe47ea7c3a3bd1ec4713327b72462 Mon Sep 17 00:00:00 2001 From: martin legrand Date: Sat, 22 Mar 2025 13:22:49 +0100 Subject: [PATCH 5/7] fix : pyaudio install --- scripts/linux_install.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/linux_install.sh b/scripts/linux_install.sh index f8998c2..e9c71b2 100644 --- a/scripts/linux_install.sh +++ b/scripts/linux_install.sh @@ -7,6 +7,8 @@ sudo apt-get update pip install --upgrade pip +# install pyaudio +pip install pyaudio # install port audio sudo apt-get install portaudio19-dev python-pyaudio python3-pyaudio # install wheel From 751212db47f2dde430d420b6b4f9d141ec93acee Mon Sep 17 00:00:00 2001 From: martin legrand Date: Sat, 22 Mar 2025 13:29:06 +0100 Subject: [PATCH 6/7] script udpate --- scripts/linux_install.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/linux_install.sh b/scripts/linux_install.sh index e9c71b2..28dd9d6 100644 --- a/scripts/linux_install.sh +++ b/scripts/linux_install.sh @@ -9,10 +9,14 @@ pip install --upgrade pip # install pyaudio pip install pyaudio +# make sure essential tool are installed +sudo apt install python3-dev python3-pip python3-wheel build-essential # install port audio sudo apt-get install portaudio19-dev python-pyaudio python3-pyaudio # install wheel pip install --upgrade pip setuptools wheel +# install docker compose +sudo apt install docker-compose # Install Python dependencies from requirements.txt pip3 install -r requirements.txt From 0397183f2a6ebd7ff294bb08db91aaf7ee026a53 Mon Sep 17 00:00:00 2001 From: martin legrand Date: Sat, 22 Mar 2025 13:34:05 +0100 Subject: [PATCH 7/7] fix : error pyaudio import --- sources/speech_to_text.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sources/speech_to_text.py b/sources/speech_to_text.py index 016ac2f..92f50b3 100644 --- a/sources/speech_to_text.py +++ b/sources/speech_to_text.py @@ -5,6 +5,8 @@ import numpy as np import torch import time from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline +import librosa +import pyaudio audio_queue = queue.Queue() done = False @@ -14,8 +16,6 @@ class AudioRecorder: AudioRecorder is a class that records audio from the microphone and adds it to the audio queue. """ def __init__(self, format: int = pyaudio.paInt16, channels: int = 1, rate: int = 4096, chunk: int = 8192, record_seconds: int = 5, verbose: bool = False): - import librosa - import pyaudio self.format = format self.channels = channels self.rate = rate