diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..6f010a0 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,26 @@ +# Use official Python 3.11 image as the base +FROM python:3.11 + +# Set working directory +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + gcc \ + g++ \ + gfortran \ + libportaudio2 \ + portaudio19-dev \ + ffmpeg \ + libavcodec-dev \ + libavformat-dev \ + libavutil-dev \ + chromium \ + chromium-driver \ + && rm -rf /var/lib/apt/lists/* + +RUN pip cache purge + +COPY . . + +RUN BLIS_ARCH=generic pip install --no-cache-dir -r requirements.txt \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index db9b52c..e216504 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,6 @@ +setuptools>=75.6.0 requests>=2.31.0 +numpy>=1.24.4 colorama>=0.4.6 python-dotenv>=1.0.0 playsound>=1.3.0 @@ -7,17 +9,15 @@ transformers>=4.46.3 torch>=2.4.1 python-dotenv>=1.0.0 ollama>=0.4.7 -scipy>=1.15.1 +scipy>=1.9.3 kokoro>=0.7.12 -flask>=3.1.0 soundfile>=0.13.1 protobuf>=3.20.3 -termcolor>=2.5.0 -ipython>=8.34.0 -gliclass>=0.1.8 +termcolor>=2.4.0 +ipython>=8.13.0 pyaudio>=0.2.14 librosa>=0.10.2.post1 -selenium>=4.29.0 +selenium>=4.27.1 markdownify>=1.1.0 text2emotion>=0.0.5 langid>=1.1.6 diff --git a/scripts/linux_install.sh b/scripts/linux_install.sh index c43a41e..28dd9d6 100644 --- a/scripts/linux_install.sh +++ b/scripts/linux_install.sh @@ -7,6 +7,17 @@ sudo apt-get update pip install --upgrade pip +# install pyaudio +pip install pyaudio +# make sure essential tool are installed +sudo apt install python3-dev python3-pip python3-wheel build-essential +# install port audio +sudo apt-get install portaudio19-dev python-pyaudio python3-pyaudio +# install wheel +pip install --upgrade pip setuptools wheel +# install docker compose +sudo apt install docker-compose + # Install Python dependencies from requirements.txt pip3 install -r requirements.txt diff --git a/server/Dockerfile b/server/Dockerfile new file mode 100644 index 0000000..98287c0 --- /dev/null +++ b/server/Dockerfile @@ -0,0 +1,14 @@ +FROM ubuntu:20.04 + +WORKDIR /app + +RUN apt-get update && \ + apt-get install -y python3 python3-pip && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +COPY requirements.txt . + +RUN pip3 install --no-cache-dir -r requirements.txt + +CMD ["python3", "--version"] \ No newline at end of file diff --git a/server/requirements.txt b/server/requirements.txt new file mode 100644 index 0000000..7709179 --- /dev/null +++ b/server/requirements.txt @@ -0,0 +1,2 @@ +flask>=2.3.0 +ollama>=0.4.7 \ No newline at end of file diff --git a/server/server_ollama.py b/server/server.py similarity index 95% rename from server/server_ollama.py rename to server/server.py index c90d0a9..5bb4269 100644 --- a/server/server_ollama.py +++ b/server/server.py @@ -37,7 +37,10 @@ class GenerationState: state = GenerationState() -def generate_response(history): # Only takes history as an argument +def generate_response_vllm(history): + pass + +def generate_response_ollama(history): # Only takes history as an argument global state try: with state.lock: diff --git a/setup.py b/setup.py index 002f842..bde5055 100644 --- a/setup.py +++ b/setup.py @@ -24,12 +24,11 @@ setup( "transformers>=4.46.3", "torch>=2.4.1", "ollama>=0.4.7", - "scipy>=1.15.1", + "scipy>=1.9.3", "kokoro>=0.7.12", "flask>=3.1.0", "protobuf>=3.20.3", "termcolor>=2.5.0", - "gliclass>=0.1.8", "ipython>=8.34.0", "librosa>=0.10.2.post1", "selenium>=4.29.0", diff --git a/sources/speech_to_text.py b/sources/speech_to_text.py index 2d76c11..92f50b3 100644 --- a/sources/speech_to_text.py +++ b/sources/speech_to_text.py @@ -3,8 +3,8 @@ import queue import threading import numpy as np import torch -from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline import time +from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline import librosa import pyaudio @@ -16,7 +16,6 @@ class AudioRecorder: AudioRecorder is a class that records audio from the microphone and adds it to the audio queue. """ def __init__(self, format: int = pyaudio.paInt16, channels: int = 1, rate: int = 4096, chunk: int = 8192, record_seconds: int = 5, verbose: bool = False): - import pyaudio self.format = format self.channels = channels self.rate = rate