From 6685b2399336111e710cd45d64a487420bf7b210 Mon Sep 17 00:00:00 2001
From: leslie <ly624928214@gmail.com>
Date: Sat, 19 Apr 2025 19:59:32 +0800
Subject: [PATCH 1/9] fix:Fix translation paragraph count mismatch by
 explicitly instructing LLM about paragraph requirements

---
 .../translator/chatgptapi_translator.py       | 93 ++++++++++++++-----
 1 file changed, 71 insertions(+), 22 deletions(-)

diff --git a/book_maker/translator/chatgptapi_translator.py b/book_maker/translator/chatgptapi_translator.py
index 47fbba6..bfccabe 100644
--- a/book_maker/translator/chatgptapi_translator.py
+++ b/book_maker/translator/chatgptapi_translator.py
@@ -75,7 +75,7 @@ class ChatGPTAPI(Base):
         api_base=None,
         prompt_template=None,
         prompt_sys_msg=None,
-        temperature=1.0,
+        temperature=0.2,
         context_flag=False,
         context_paragraph_limit=0,
         **kwargs,
@@ -155,6 +155,7 @@ class ChatGPTAPI(Base):
             model=self.model,
             messages=messages,
             temperature=self.temperature,
+            top_p=0.1
         )
         return completion
 
@@ -240,28 +241,39 @@ class ChatGPTAPI(Base):
     ):
         if len(result_list) == plist_len:
             return result_list, 0
-
         best_result_list = result_list
         retry_count = 0
-
+        # Save the original templates
+        original_prompt_template = self.prompt_template
+        original_system_content = self.system_content
         while retry_count < max_retries and len(result_list) != plist_len:
             print(
                 f"bug: {plist_len} -> {len(result_list)} : Number of paragraphs before and after translation",
             )
-            print(f"sleep for {sleep_dur}s and retry {retry_count+1} ...")
+            print(f"sleep for {sleep_dur}s and retry {retry_count + 1} ...")
             time.sleep(sleep_dur)
             retry_count += 1
-            result_list = self.translate_and_split_lines(new_str)
+
+            # Use increasingly forceful prompts on retries
+            self.prompt_template = f"Translate the following text to {{language}}. IMPORTANT: The text has EXACTLY {plist_len} numbered paragraphs. Your translation MUST have EXACTLY {plist_len} paragraphs with the same numbering (1), (2), etc. `{{text}}`"
+            self.system_content = f"You are a precise translator. The text contains {plist_len} paragraphs. Your output MUST contain exactly {plist_len} paragraphs, no more and no less."
+
+            # Try again with modified instruction
+            result_str = self.translate(new_str, False)
+            result_list = self.extract_paragraphs(result_str, plist_len)
+
             if (
                 len(result_list) == plist_len
                 or len(best_result_list) < len(result_list) <= plist_len
                 or (
-                    len(result_list) < len(best_result_list)
-                    and len(best_result_list) > plist_len
-                )
+                len(result_list) < len(best_result_list)
+                and len(best_result_list) > plist_len
+            )
             ):
                 best_result_list = result_list
-
+        # Restore the original templates
+        self.prompt_template = original_prompt_template
+        self.system_content = original_system_content
         return best_result_list, retry_count
 
     def log_retry(self, state, retry_count, elapsed_time, log_path="log/buglog.txt"):
@@ -334,8 +346,9 @@ class ChatGPTAPI(Base):
 
     def translate_list(self, plist):
         sep = "\n\n\n\n\n"
-        # new_str = sep.join([item.text for item in plist])
+        plist_len = len(plist)
 
+        # Construct the text to be translated
         new_str = ""
         i = 1
         for p in plist:
@@ -347,34 +360,70 @@ class ChatGPTAPI(Base):
 
         if new_str.endswith(sep):
             new_str = new_str[: -len(sep)]
-
         new_str = self.join_lines(new_str)
 
-        plist_len = len(plist)
+        print(f"plist len = {plist_len}")
 
-        print(f"plist len = {len(plist)}")
+        # Save the original prompt template and system message
+        original_prompt_template = self.prompt_template
+        original_system_content = self.system_content
 
-        result_list = self.translate_and_split_lines(new_str)
+        # Modify the prompt template and system message to include paragraph count requirement
+        self.prompt_template = f"Please translate the following {plist_len} numbered paragraphs to {{language}}. Ensure your translation maintains exactly {plist_len} paragraphs and preserves the paragraph numbers. `{{text}}`"
+        self.system_content = f"You are a translator. The text contains {plist_len} numbered paragraphs. Your translation must have exactly {plist_len} paragraphs with the same numbering structure."
+
+        # Translate with explicit paragraph count instruction
+        result_str = self.translate(new_str, False)
+
+        # Extract paragraphs with a robust strategy
+        result_list = self.extract_paragraphs(result_str, plist_len)
+
+        # Restore original templates
+        self.prompt_template = original_prompt_template
+        self.system_content = original_system_content
 
         start_time = time.time()
-
         result_list, retry_count = self.get_best_result_list(
             plist_len,
             new_str,
-            6,  # WTF this magic number here?
+            6,
             result_list,
         )
-
         end_time = time.time()
-
         state = "fail" if len(result_list) != plist_len else "success"
         log_path = "log/buglog.txt"
-
         self.log_retry(state, retry_count, end_time - start_time, log_path)
-        self.log_translation_mismatch(plist_len, result_list, new_str, sep, log_path)
+        self.log_translation_mismatch(plist_len, result_list, new_str, sep,
+                                      log_path)
+        # Remove paragraph numbers from the result
+        result_list = [re.sub(r"^(\(\d+\)|\d+\.|(\d+))\s*", "", s) for s in
+                       result_list]
+        return result_list
+
+    def extract_paragraphs(self, text, paragraph_count):
+        """Extract paragraphs from translated text, ensuring paragraph count is preserved."""
+        # First try to extract by paragraph numbers (1), (2), etc.
+        result_list = []
+        for i in range(1, paragraph_count + 1):
+            pattern = rf'\({i}\)\s*(.*?)(?=\s*\({i + 1}\)|\Z)'
+            match = re.search(pattern, text, re.DOTALL)
+            if match:
+                result_list.append(match.group(1).strip())
+
+        # If exact pattern matching failed, try another approach
+        if len(result_list) != paragraph_count:
+            pattern = r'\((\d+)\)\s*(.*?)(?=\s*\(\d+\)|\Z)'
+            matches = re.findall(pattern, text, re.DOTALL)
+            if matches:
+                # Sort by paragraph number
+                matches.sort(key=lambda x: int(x[0]))
+                result_list = [match[1].strip() for match in matches]
+
+        # Fallback to original line-splitting approach
+        if len(result_list) != paragraph_count:
+            lines = text.splitlines()
+            result_list = [line.strip() for line in lines if line.strip() != ""]
 
-        # del (num), num. sometime (num) will translated to num.
-        result_list = [re.sub(r"^(\(\d+\)|\d+\.|(\d+))\s*", "", s) for s in result_list]
         return result_list
 
     def set_deployment_id(self, deployment_id):

From b0dbed8826fe01c66c4d80ed2d7ef2a7c4d5a78a Mon Sep 17 00:00:00 2001
From: leslie <ly624928214@gmail.com>
Date: Sat, 19 Apr 2025 20:15:56 +0800
Subject: [PATCH 2/9] fix:Fix translation paragraph count mismatch by
 explicitly instructing LLM about paragraph requirements

---
 .../translator/chatgptapi_translator.py       | 62 +++++++++----------
 1 file changed, 31 insertions(+), 31 deletions(-)

diff --git a/book_maker/translator/chatgptapi_translator.py b/book_maker/translator/chatgptapi_translator.py
index bfccabe..b1a6585 100644
--- a/book_maker/translator/chatgptapi_translator.py
+++ b/book_maker/translator/chatgptapi_translator.py
@@ -75,7 +75,7 @@ class ChatGPTAPI(Base):
         api_base=None,
         prompt_template=None,
         prompt_sys_msg=None,
-        temperature=0.2,
+        temperature=1.0,
         context_flag=False,
         context_paragraph_limit=0,
         **kwargs,
@@ -155,7 +155,6 @@ class ChatGPTAPI(Base):
             model=self.model,
             messages=messages,
             temperature=self.temperature,
-            top_p=0.1
         )
         return completion
 
@@ -241,39 +240,43 @@ class ChatGPTAPI(Base):
     ):
         if len(result_list) == plist_len:
             return result_list, 0
+
         best_result_list = result_list
         retry_count = 0
-        # Save the original templates
+
+        # Save original prompt template
         original_prompt_template = self.prompt_template
-        original_system_content = self.system_content
+
         while retry_count < max_retries and len(result_list) != plist_len:
             print(
                 f"bug: {plist_len} -> {len(result_list)} : Number of paragraphs before and after translation",
             )
-            print(f"sleep for {sleep_dur}s and retry {retry_count + 1} ...")
+            print(f"sleep for {sleep_dur}s and retry {retry_count+1} ...")
             time.sleep(sleep_dur)
             retry_count += 1
 
-            # Use increasingly forceful prompts on retries
-            self.prompt_template = f"Translate the following text to {{language}}. IMPORTANT: The text has EXACTLY {plist_len} numbered paragraphs. Your translation MUST have EXACTLY {plist_len} paragraphs with the same numbering (1), (2), etc. `{{text}}`"
-            self.system_content = f"You are a precise translator. The text contains {plist_len} paragraphs. Your output MUST contain exactly {plist_len} paragraphs, no more and no less."
+            # Make instructions increasingly explicit with each retry
+            emphasis = "!" * min(retry_count,
+                                 3)  # Add up to 3 exclamation marks
+            paragraph_instruction = f"IMPORTANT{emphasis} The text contains exactly {plist_len} numbered paragraphs. Your translation MUST maintain exactly {plist_len} paragraphs with the same numbering structure."
 
-            # Try again with modified instruction
-            result_str = self.translate(new_str, False)
-            result_list = self.extract_paragraphs(result_str, plist_len)
+            # Extend the original prompt
+            self.prompt_template = f"{original_prompt_template} {paragraph_instruction}"
 
+            result_list = self.translate_and_split_lines(new_str)
             if (
                 len(result_list) == plist_len
                 or len(best_result_list) < len(result_list) <= plist_len
                 or (
-                len(result_list) < len(best_result_list)
-                and len(best_result_list) > plist_len
-            )
+                    len(result_list) < len(best_result_list)
+                    and len(best_result_list) > plist_len
+                )
             ):
                 best_result_list = result_list
-        # Restore the original templates
+
+        # Restore original prompt
         self.prompt_template = original_prompt_template
-        self.system_content = original_system_content
+
         return best_result_list, retry_count
 
     def log_retry(self, state, retry_count, elapsed_time, log_path="log/buglog.txt"):
@@ -348,7 +351,6 @@ class ChatGPTAPI(Base):
         sep = "\n\n\n\n\n"
         plist_len = len(plist)
 
-        # Construct the text to be translated
         new_str = ""
         i = 1
         for p in plist:
@@ -360,41 +362,39 @@ class ChatGPTAPI(Base):
 
         if new_str.endswith(sep):
             new_str = new_str[: -len(sep)]
+
         new_str = self.join_lines(new_str)
 
         print(f"plist len = {plist_len}")
 
-        # Save the original prompt template and system message
+        # Preserve original prompt and append paragraph count requirements
         original_prompt_template = self.prompt_template
-        original_system_content = self.system_content
+        self.prompt_template = f"{original_prompt_template} The text contains exactly {plist_len} paragraphs numbered as (1), (2), etc. Your translation MUST maintain exactly {plist_len} paragraphs with the same numbering."
 
-        # Modify the prompt template and system message to include paragraph count requirement
-        self.prompt_template = f"Please translate the following {plist_len} numbered paragraphs to {{language}}. Ensure your translation maintains exactly {plist_len} paragraphs and preserves the paragraph numbers. `{{text}}`"
-        self.system_content = f"You are a translator. The text contains {plist_len} numbered paragraphs. Your translation must have exactly {plist_len} paragraphs with the same numbering structure."
+        # Translate with enhanced prompt
+        result_list = self.translate_and_split_lines(new_str)
 
-        # Translate with explicit paragraph count instruction
-        result_str = self.translate(new_str, False)
-
-        # Extract paragraphs with a robust strategy
-        result_list = self.extract_paragraphs(result_str, plist_len)
-
-        # Restore original templates
+        # Restore original prompt
         self.prompt_template = original_prompt_template
-        self.system_content = original_system_content
 
         start_time = time.time()
+
         result_list, retry_count = self.get_best_result_list(
             plist_len,
             new_str,
-            6,
+            6,  # WTF this magic number here?
             result_list,
         )
+
         end_time = time.time()
+
         state = "fail" if len(result_list) != plist_len else "success"
         log_path = "log/buglog.txt"
+
         self.log_retry(state, retry_count, end_time - start_time, log_path)
         self.log_translation_mismatch(plist_len, result_list, new_str, sep,
                                       log_path)
+
         # Remove paragraph numbers from the result
         result_list = [re.sub(r"^(\(\d+\)|\d+\.|(\d+))\s*", "", s) for s in
                        result_list]

From 83303d1dd844a5997f90bc117db129eeb2312a75 Mon Sep 17 00:00:00 2001
From: leslie <ly624928214@gmail.com>
Date: Sat, 19 Apr 2025 20:31:22 +0800
Subject: [PATCH 3/9] fix:Fix translation paragraph count mismatch by
 explicitly instructing LLM about paragraph requirements

---
 book_maker/translator/chatgptapi_translator.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/book_maker/translator/chatgptapi_translator.py b/book_maker/translator/chatgptapi_translator.py
index b1a6585..08fd226 100644
--- a/book_maker/translator/chatgptapi_translator.py
+++ b/book_maker/translator/chatgptapi_translator.py
@@ -256,8 +256,7 @@ class ChatGPTAPI(Base):
             retry_count += 1
 
             # Make instructions increasingly explicit with each retry
-            emphasis = "!" * min(retry_count,
-                                 3)  # Add up to 3 exclamation marks
+            emphasis = "!" * min(retry_count, 3)  # Add up to 3 exclamation marks
             paragraph_instruction = f"IMPORTANT{emphasis} The text contains exactly {plist_len} numbered paragraphs. Your translation MUST maintain exactly {plist_len} paragraphs with the same numbering structure."
 
             # Extend the original prompt
@@ -392,12 +391,10 @@ class ChatGPTAPI(Base):
         log_path = "log/buglog.txt"
 
         self.log_retry(state, retry_count, end_time - start_time, log_path)
-        self.log_translation_mismatch(plist_len, result_list, new_str, sep,
-                                      log_path)
+        self.log_translation_mismatch(plist_len, result_list, new_str, sep, log_path)
 
         # Remove paragraph numbers from the result
-        result_list = [re.sub(r"^(\(\d+\)|\d+\.|(\d+))\s*", "", s) for s in
-                       result_list]
+        result_list = [re.sub(r"^(\(\d+\)|\d+\.|(\d+))\s*", "", s) for s in result_list]
         return result_list
 
     def extract_paragraphs(self, text, paragraph_count):
@@ -405,14 +402,14 @@ class ChatGPTAPI(Base):
         # First try to extract by paragraph numbers (1), (2), etc.
         result_list = []
         for i in range(1, paragraph_count + 1):
-            pattern = rf'\({i}\)\s*(.*?)(?=\s*\({i + 1}\)|\Z)'
+            pattern = rf"\({i}\)\s*(.*?)(?=\s*\({i + 1}\)|\Z)"
             match = re.search(pattern, text, re.DOTALL)
             if match:
                 result_list.append(match.group(1).strip())
 
         # If exact pattern matching failed, try another approach
         if len(result_list) != paragraph_count:
-            pattern = r'\((\d+)\)\s*(.*?)(?=\s*\(\d+\)|\Z)'
+            pattern = r"\((\d+)\)\s*(.*?)(?=\s*\(\d+\)|\Z)"
             matches = re.findall(pattern, text, re.DOTALL)
             if matches:
                 # Sort by paragraph number

From 70a19628041020b0df1e016895d416a2b8a86a0d Mon Sep 17 00:00:00 2001
From: leslie <ly624928214@gmail.com>
Date: Sat, 19 Apr 2025 22:18:47 +0800
Subject: [PATCH 4/9] fix:Fix translation paragraph count mismatch by
 explicitly instructing LLM about paragraph requirements

---
 .../translator/chatgptapi_translator.py       | 167 ++++++++++++++----
 1 file changed, 129 insertions(+), 38 deletions(-)

diff --git a/book_maker/translator/chatgptapi_translator.py b/book_maker/translator/chatgptapi_translator.py
index 08fd226..2c946bd 100644
--- a/book_maker/translator/chatgptapi_translator.py
+++ b/book_maker/translator/chatgptapi_translator.py
@@ -240,29 +240,33 @@ class ChatGPTAPI(Base):
     ):
         if len(result_list) == plist_len:
             return result_list, 0
-
         best_result_list = result_list
         retry_count = 0
 
         # Save original prompt template
         original_prompt_template = self.prompt_template
-
         while retry_count < max_retries and len(result_list) != plist_len:
             print(
                 f"bug: {plist_len} -> {len(result_list)} : Number of paragraphs before and after translation",
             )
-            print(f"sleep for {sleep_dur}s and retry {retry_count+1} ...")
+            print(f"sleep for {sleep_dur}s and retry {retry_count + 1} ...")
             time.sleep(sleep_dur)
             retry_count += 1
 
-            # Make instructions increasingly explicit with each retry
-            emphasis = "!" * min(retry_count, 3)  # Add up to 3 exclamation marks
-            paragraph_instruction = f"IMPORTANT{emphasis} The text contains exactly {plist_len} numbered paragraphs. Your translation MUST maintain exactly {plist_len} paragraphs with the same numbering structure."
+            # Create increasingly strict prompts
+            structured_prompt = (
+                f"CRITICAL!!! Translate the following {plist_len} paragraphs to {{language}}. "
+                f"Your output MUST have EXACTLY {plist_len} paragraphs - NO MORE, NO LESS. "
+                f"Each paragraph must be wrapped in numbered XML tags: <p1>text</p1>, <p2>text</p2>, etc. "
+                f"DO NOT skip any paragraph numbers. DO NOT add extra paragraphs. "
+                f"Required format: <p1>translated text</p1>\n<p2>translated text</p2>\n...\n<p{plist_len}>translated text</p{plist_len}>"
+            )
 
-            # Extend the original prompt
-            self.prompt_template = f"{original_prompt_template} {paragraph_instruction}"
+            self.prompt_template = structured_prompt + " `{text}`"
+
+            translated_text = self.translate(new_str, False)
+            result_list = self.extract_tagged_paragraphs(translated_text, plist_len)
 
-            result_list = self.translate_and_split_lines(new_str)
             if (
                 len(result_list) == plist_len
                 or len(best_result_list) < len(result_list) <= plist_len
@@ -272,10 +276,18 @@ class ChatGPTAPI(Base):
                 )
             ):
                 best_result_list = result_list
-
         # Restore original prompt
         self.prompt_template = original_prompt_template
 
+        # If we still don't have the right number, force it by padding or trimming
+        if len(best_result_list) != plist_len:
+            if len(best_result_list) < plist_len:
+                # Pad with empty strings if we have too few
+                best_result_list.extend([""] * (plist_len - len(best_result_list)))
+            else:
+                # Trim if we have too many
+                best_result_list = best_result_list[:plist_len]
+
         return best_result_list, retry_count
 
     def log_retry(self, state, retry_count, elapsed_time, log_path="log/buglog.txt"):
@@ -347,54 +359,133 @@ class ChatGPTAPI(Base):
         return new_text
 
     def translate_list(self, plist):
-        sep = "\n\n\n\n\n"
         plist_len = len(plist)
 
-        new_str = ""
-        i = 1
-        for p in plist:
+        # Format input with explicit paragraph numbering
+        formatted_paragraphs = []
+        for i, p in enumerate(plist, 1):
             temp_p = copy(p)
             for sup in temp_p.find_all("sup"):
                 sup.extract()
-            new_str += f"({i}) {temp_p.get_text().strip()}{sep}"
-            i = i + 1
+            formatted_paragraphs.append(f"<p{i}>{temp_p.get_text().strip()}</p{i}>")
 
-        if new_str.endswith(sep):
-            new_str = new_str[: -len(sep)]
-
-        new_str = self.join_lines(new_str)
+        # Join with single newlines for cleaner input
+        new_str = "\n".join(formatted_paragraphs)
 
         print(f"plist len = {plist_len}")
 
-        # Preserve original prompt and append paragraph count requirements
+        # Save original prompt template
         original_prompt_template = self.prompt_template
-        self.prompt_template = f"{original_prompt_template} The text contains exactly {plist_len} paragraphs numbered as (1), (2), etc. Your translation MUST maintain exactly {plist_len} paragraphs with the same numbering."
 
-        # Translate with enhanced prompt
-        result_list = self.translate_and_split_lines(new_str)
+        # Create a structured prompt that forces exact paragraph count
+        structured_prompt = (
+            f"Translate the following {plist_len} paragraphs to {{language}}. "
+            f"CRUCIAL: Your output MUST contain EXACTLY {plist_len} paragraphs. "
+            f"Each paragraph is wrapped in numbered tags like <p1>text</p1>. "
+            f"Preserve these exact tags in your output, only translating the text inside them. "
+            f"Example output format: <p1>translated text for paragraph 1</p1>\n<p2>translated text for paragraph 2</p2>\n...\n<p{plist_len}>translated text for paragraph {plist_len}</p{plist_len}>"
+        )
+
+        self.prompt_template = structured_prompt + " `{text}`"
+
+        # First translation attempt
+        translated_text = self.translate(new_str, False)
+
+        # Extract paragraphs using the tags
+        result_list = self.extract_tagged_paragraphs(translated_text, plist_len)
+
+        # If we still don't have the right number, try the retry approach
+        start_time = time.time()
+        if len(result_list) != plist_len:
+            result_list, retry_count = self.get_best_result_list(
+                plist_len,
+                new_str,
+                6,  # WTF this magic number here?
+                result_list,
+            )
+        else:
+            retry_count = 0
+
+        end_time = time.time()
 
         # Restore original prompt
         self.prompt_template = original_prompt_template
 
-        start_time = time.time()
-
-        result_list, retry_count = self.get_best_result_list(
-            plist_len,
-            new_str,
-            6,  # WTF this magic number here?
-            result_list,
-        )
-
-        end_time = time.time()
-
         state = "fail" if len(result_list) != plist_len else "success"
         log_path = "log/buglog.txt"
 
         self.log_retry(state, retry_count, end_time - start_time, log_path)
-        self.log_translation_mismatch(plist_len, result_list, new_str, sep, log_path)
+        if state == "fail":
+            self.log_translation_mismatch(
+                plist_len, result_list, new_str, "\n", log_path
+            )
+
+        return result_list
+
+    def extract_tagged_paragraphs(self, text, plist_len):
+        """Extract paragraphs from text with <p1>...</p1> tags."""
+        result_list = []
+
+        # Try extracting with tags first
+        for i in range(1, plist_len + 1):
+            pattern = rf"<p{i}>(.*?)</p{i}>"
+            matches = re.findall(pattern, text, re.DOTALL)
+            if matches:
+                result_list.append(matches[0].strip())
+
+        # If we got all paragraphs, return them
+        if len(result_list) == plist_len:
+            return result_list
+
+        # Fallback: try general tag pattern
+        pattern = r"<p(\d+)>(.*?)</p\1>"
+        matches = re.findall(pattern, text, re.DOTALL)
+
+        if matches and len(matches) == plist_len:
+            # Sort by paragraph number
+            matches.sort(key=lambda x: int(x[0]))
+            result_list = [match[1].strip() for match in matches]
+            return result_list
+
+        # Second fallback: try another approach with numbered paragraphs
+        result_list = []
+        for i in range(1, plist_len + 1):
+            pattern = rf"\({i}\)\s*(.*?)(?=\s*\({i + 1}\)|\Z)"
+            match = re.search(pattern, text, re.DOTALL)
+            if match:
+                result_list.append(match.group(1).strip())
+
+        # If all else fails, fall back to splitting by lines
+        if len(result_list) != plist_len:
+            lines = text.splitlines()
+            non_empty_lines = [line.strip() for line in lines if line.strip()]
+
+            # Attempt to find paragraph markers and divide accordingly
+            paragraph_markers = [
+                i
+                for i, line in enumerate(non_empty_lines)
+                if re.match(r"^\s*(\(\d+\)|\d+\.)", line)
+            ]
+
+            if len(paragraph_markers) == plist_len:
+                result_list = []
+                for i in range(len(paragraph_markers)):
+                    start = paragraph_markers[i]
+                    end = (
+                        paragraph_markers[i + 1]
+                        if i < len(paragraph_markers) - 1
+                        else len(non_empty_lines)
+                    )
+                    paragraph = " ".join(non_empty_lines[start:end])
+                    result_list.append(re.sub(r"^\s*(\(\d+\)|\d+\.)\s*", "", paragraph))
+            else:
+                # Last resort: try to split evenly
+                result_list = (
+                    non_empty_lines[:plist_len]
+                    if len(non_empty_lines) >= plist_len
+                    else non_empty_lines
+                )
 
-        # Remove paragraph numbers from the result
-        result_list = [re.sub(r"^(\(\d+\)|\d+\.|(\d+))\s*", "", s) for s in result_list]
         return result_list
 
     def extract_paragraphs(self, text, paragraph_count):

From 750ecd7d9358a6e50bcf0e182ac8e1263bf8f029 Mon Sep 17 00:00:00 2001
From: leslie <ly624928214@gmail.com>
Date: Sat, 19 Apr 2025 22:57:47 +0800
Subject: [PATCH 5/9] fix:Fix translation paragraph count mismatch by
 explicitly instructing LLM about paragraph requirements

---
 .../translator/chatgptapi_translator.py       | 47 +++++++++++++++++--
 1 file changed, 43 insertions(+), 4 deletions(-)

diff --git a/book_maker/translator/chatgptapi_translator.py b/book_maker/translator/chatgptapi_translator.py
index 2c946bd..499aa63 100644
--- a/book_maker/translator/chatgptapi_translator.py
+++ b/book_maker/translator/chatgptapi_translator.py
@@ -377,12 +377,14 @@ class ChatGPTAPI(Base):
         # Save original prompt template
         original_prompt_template = self.prompt_template
 
-        # Create a structured prompt that forces exact paragraph count
+        # Create a structured prompt that forces exact paragraph count and prevents merging
         structured_prompt = (
             f"Translate the following {plist_len} paragraphs to {{language}}. "
             f"CRUCIAL: Your output MUST contain EXACTLY {plist_len} paragraphs. "
             f"Each paragraph is wrapped in numbered tags like <p1>text</p1>. "
-            f"Preserve these exact tags in your output, only translating the text inside them. "
+            f"DO NOT merge paragraphs. Keep each paragraph separate. "
+            f"DO NOT combine multiple paragraphs into one. "
+            f"Each original paragraph should become exactly one translated paragraph. "
             f"Example output format: <p1>translated text for paragraph 1</p1>\n<p2>translated text for paragraph 2</p2>\n...\n<p{plist_len}>translated text for paragraph {plist_len}</p{plist_len}>"
         )
 
@@ -400,7 +402,7 @@ class ChatGPTAPI(Base):
             result_list, retry_count = self.get_best_result_list(
                 plist_len,
                 new_str,
-                6,  # WTF this magic number here?
+                6,
                 result_list,
             )
         else:
@@ -411,6 +413,38 @@ class ChatGPTAPI(Base):
         # Restore original prompt
         self.prompt_template = original_prompt_template
 
+        # Clean up the results - strip any XML tags from the final output
+        cleaned_result_list = []
+        for paragraph in result_list:
+            # Remove any XML tags that might be in the output
+            cleaned_text = re.sub(r"<p\d+>(.*?)</p\d+>", r"\1", paragraph)
+            # Also clean any partial tags
+            cleaned_text = re.sub(r"</?p\d*>", "", cleaned_text).strip()
+            cleaned_result_list.append(cleaned_text)
+
+        # Check for merged paragraphs and attempt to split them
+        final_result_list = []
+        for paragraph in cleaned_result_list:
+            # If this is potentially a merged paragraph, try to split it
+            if len(paragraph) > 200 and ". " in paragraph:
+                # Look for sentence patterns that might indicate paragraph breaks
+                potential_paragraphs = re.split(r"(?<=[.!?])\s+(?=[A-Z0-9])", paragraph)
+                # Only split if it would help us get closer to the target paragraph count
+                if (
+                    len(potential_paragraphs) > 1
+                    and len(final_result_list) + len(potential_paragraphs) <= plist_len
+                ):
+                    final_result_list.extend(potential_paragraphs)
+                    continue
+            final_result_list.append(paragraph)
+
+        # Ensure we have plist_len paragraphs
+        if len(final_result_list) > plist_len:
+            final_result_list = final_result_list[:plist_len]
+        elif len(final_result_list) < plist_len:
+            final_result_list.extend([""] * (plist_len - len(final_result_list)))
+
+        # Log results
         state = "fail" if len(result_list) != plist_len else "success"
         log_path = "log/buglog.txt"
 
@@ -420,7 +454,12 @@ class ChatGPTAPI(Base):
                 plist_len, result_list, new_str, "\n", log_path
             )
 
-        return result_list
+        # Del paragraph numbers if any remain
+        final_result_list = [
+            re.sub(r"^(\(\d+\)|\d+\.|(\d+))\s*", "", s) for s in final_result_list
+        ]
+
+        return final_result_list
 
     def extract_tagged_paragraphs(self, text, plist_len):
         """Extract paragraphs from text with <p1>...</p1> tags."""

From 09589c626da0d185da1111d3cf0715257af1a7b3 Mon Sep 17 00:00:00 2001
From: leslie <ly624928214@gmail.com>
Date: Sat, 19 Apr 2025 23:28:02 +0800
Subject: [PATCH 6/9] fix:Fix translation paragraph count mismatch by
 explicitly instructing LLM about paragraph requirements

---
 book_maker/translator/chatgptapi_translator.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/book_maker/translator/chatgptapi_translator.py b/book_maker/translator/chatgptapi_translator.py
index 499aa63..02332ce 100644
--- a/book_maker/translator/chatgptapi_translator.py
+++ b/book_maker/translator/chatgptapi_translator.py
@@ -384,6 +384,7 @@ class ChatGPTAPI(Base):
             f"Each paragraph is wrapped in numbered tags like <p1>text</p1>. "
             f"DO NOT merge paragraphs. Keep each paragraph separate. "
             f"DO NOT combine multiple paragraphs into one. "
+            f"Preserve numbers at the beginning of paragraphs like '17' or '10x'. "
             f"Each original paragraph should become exactly one translated paragraph. "
             f"Example output format: <p1>translated text for paragraph 1</p1>\n<p2>translated text for paragraph 2</p2>\n...\n<p{plist_len}>translated text for paragraph {plist_len}</p{plist_len}>"
         )
@@ -438,7 +439,7 @@ class ChatGPTAPI(Base):
                     continue
             final_result_list.append(paragraph)
 
-        # Ensure we have plist_len paragraphs
+        # Ensure we have exactly plist_len paragraphs
         if len(final_result_list) > plist_len:
             final_result_list = final_result_list[:plist_len]
         elif len(final_result_list) < plist_len:
@@ -454,9 +455,9 @@ class ChatGPTAPI(Base):
                 plist_len, result_list, new_str, "\n", log_path
             )
 
-        # Del paragraph numbers if any remain
+        # Remove ONLY the paragraph numbering formats, not all numbers at the start
         final_result_list = [
-            re.sub(r"^(\(\d+\)|\d+\.|(\d+))\s*", "", s) for s in final_result_list
+            re.sub(r"^(\(\d+\)|\d+\.)\s*", "", s) for s in final_result_list
         ]
 
         return final_result_list

From 57ca4da847abbddb772c9889c1506012226a22c0 Mon Sep 17 00:00:00 2001
From: leslie <ly624928214@gmail.com>
Date: Sun, 20 Apr 2025 16:48:11 +0800
Subject: [PATCH 7/9] fix:Fix translation paragraph count mismatch by
 explicitly instructing LLM about paragraph requirements

---
 .../translator/chatgptapi_translator.py       | 158 +++++++++---------
 1 file changed, 81 insertions(+), 77 deletions(-)

diff --git a/book_maker/translator/chatgptapi_translator.py b/book_maker/translator/chatgptapi_translator.py
index 02332ce..67c6239 100644
--- a/book_maker/translator/chatgptapi_translator.py
+++ b/book_maker/translator/chatgptapi_translator.py
@@ -361,106 +361,110 @@ class ChatGPTAPI(Base):
     def translate_list(self, plist):
         plist_len = len(plist)
 
-        # Format input with explicit paragraph numbering
-        formatted_paragraphs = []
+        # 创建原始文本列表，并为每个段落添加明确的编号标记
+        formatted_text = ""
         for i, p in enumerate(plist, 1):
             temp_p = copy(p)
             for sup in temp_p.find_all("sup"):
                 sup.extract()
-            formatted_paragraphs.append(f"<p{i}>{temp_p.get_text().strip()}</p{i}>")
-
-        # Join with single newlines for cleaner input
-        new_str = "\n".join(formatted_paragraphs)
+            para_text = temp_p.get_text().strip()
+            # 使用特殊的分隔符和明确的编号
+            formatted_text += f"PARAGRAPH {i}:\n{para_text}\n\n"
 
         print(f"plist len = {plist_len}")
 
-        # Save original prompt template
+        # 保存原始提示模板
         original_prompt_template = self.prompt_template
 
-        # Create a structured prompt that forces exact paragraph count and prevents merging
+        # 创建明确要求保持段落结构的提示
         structured_prompt = (
             f"Translate the following {plist_len} paragraphs to {{language}}. "
-            f"CRUCIAL: Your output MUST contain EXACTLY {plist_len} paragraphs. "
-            f"Each paragraph is wrapped in numbered tags like <p1>text</p1>. "
-            f"DO NOT merge paragraphs. Keep each paragraph separate. "
-            f"DO NOT combine multiple paragraphs into one. "
-            f"Preserve numbers at the beginning of paragraphs like '17' or '10x'. "
-            f"Each original paragraph should become exactly one translated paragraph. "
-            f"Example output format: <p1>translated text for paragraph 1</p1>\n<p2>translated text for paragraph 2</p2>\n...\n<p{plist_len}>translated text for paragraph {plist_len}</p{plist_len}>"
+            f"CRUCIAL INSTRUCTION: Format your response using EXACTLY this structure:\n\n"
+            f"TRANSLATION OF PARAGRAPH 1:\n[Your translation of paragraph 1 here]\n\n"
+            f"TRANSLATION OF PARAGRAPH 2:\n[Your translation of paragraph 2 here]\n\n"
+            f"... and so on for all {plist_len} paragraphs.\n\n"
+            f"You MUST provide EXACTLY {plist_len} translated paragraphs. "
+            f"Do not merge, split, or rearrange paragraphs. "
+            f"Translate each paragraph independently but consistently. "
+            f"Keep all numbers and special formatting in your translation. "
+            f"Each original paragraph must correspond to exactly one translated paragraph."
         )
 
-        self.prompt_template = structured_prompt + " `{text}`"
+        self.prompt_template = structured_prompt + " ```{text}```"
 
-        # First translation attempt
-        translated_text = self.translate(new_str, False)
+        # 翻译
+        translated_text = self.translate(formatted_text, False)
 
-        # Extract paragraphs using the tags
-        result_list = self.extract_tagged_paragraphs(translated_text, plist_len)
-
-        # If we still don't have the right number, try the retry approach
-        start_time = time.time()
-        if len(result_list) != plist_len:
-            result_list, retry_count = self.get_best_result_list(
-                plist_len,
-                new_str,
-                6,
-                result_list,
+        # 从结构化输出中提取翻译
+        translated_paragraphs = []
+        for i in range(1, plist_len + 1):
+            pattern = (
+                r"TRANSLATION OF PARAGRAPH "
+                + str(i)
+                + r":(.*?)(?=TRANSLATION OF PARAGRAPH \d+:|\Z)"
             )
-        else:
-            retry_count = 0
+            matches = re.findall(pattern, translated_text, re.DOTALL)
 
-        end_time = time.time()
+            if matches:
+                translated_paragraph = matches[0].strip()
+                translated_paragraphs.append(translated_paragraph)
+            else:
+                print(f"Warning: Could not find translation for paragraph {i}")
+                # 尝试更宽松的匹配
+                loose_pattern = (
+                    r"(?:TRANSLATION|PARAGRAPH|PARA).*?"
+                    + str(i)
+                    + r".*?:(.*?)(?=(?:TRANSLATION|PARAGRAPH|PARA).*?\d+.*?:|\Z)"
+                )
+                loose_matches = re.findall(loose_pattern, translated_text, re.DOTALL)
+                if loose_matches:
+                    translated_paragraphs.append(loose_matches[0].strip())
+                else:
+                    translated_paragraphs.append("")
 
-        # Restore original prompt
+        # 恢复原始提示
         self.prompt_template = original_prompt_template
 
-        # Clean up the results - strip any XML tags from the final output
-        cleaned_result_list = []
-        for paragraph in result_list:
-            # Remove any XML tags that might be in the output
-            cleaned_text = re.sub(r"<p\d+>(.*?)</p\d+>", r"\1", paragraph)
-            # Also clean any partial tags
-            cleaned_text = re.sub(r"</?p\d*>", "", cleaned_text).strip()
-            cleaned_result_list.append(cleaned_text)
-
-        # Check for merged paragraphs and attempt to split them
-        final_result_list = []
-        for paragraph in cleaned_result_list:
-            # If this is potentially a merged paragraph, try to split it
-            if len(paragraph) > 200 and ". " in paragraph:
-                # Look for sentence patterns that might indicate paragraph breaks
-                potential_paragraphs = re.split(r"(?<=[.!?])\s+(?=[A-Z0-9])", paragraph)
-                # Only split if it would help us get closer to the target paragraph count
-                if (
-                    len(potential_paragraphs) > 1
-                    and len(final_result_list) + len(potential_paragraphs) <= plist_len
-                ):
-                    final_result_list.extend(potential_paragraphs)
-                    continue
-            final_result_list.append(paragraph)
-
-        # Ensure we have exactly plist_len paragraphs
-        if len(final_result_list) > plist_len:
-            final_result_list = final_result_list[:plist_len]
-        elif len(final_result_list) < plist_len:
-            final_result_list.extend([""] * (plist_len - len(final_result_list)))
-
-        # Log results
-        state = "fail" if len(result_list) != plist_len else "success"
-        log_path = "log/buglog.txt"
-
-        self.log_retry(state, retry_count, end_time - start_time, log_path)
-        if state == "fail":
-            self.log_translation_mismatch(
-                plist_len, result_list, new_str, "\n", log_path
+        # 如果提取到的段落数不正确，尝试备用提取方法
+        if len(translated_paragraphs) != plist_len:
+            print(
+                f"Warning: Extracted {len(translated_paragraphs)}/{plist_len} paragraphs. Using fallback extraction."
             )
+            # 提取所有可能的段落标记
+            all_para_pattern = r"(?:TRANSLATION|PARAGRAPH|PARA).*?(\d+).*?:(.*?)(?=(?:TRANSLATION|PARAGRAPH|PARA).*?\d+.*?:|\Z)"
+            all_matches = re.findall(all_para_pattern, translated_text, re.DOTALL)
 
-        # Remove ONLY the paragraph numbering formats, not all numbers at the start
-        final_result_list = [
-            re.sub(r"^(\(\d+\)|\d+\.)\s*", "", s) for s in final_result_list
-        ]
+            if all_matches:
+                # 创建一个字典，根据段落编号映射翻译内容
+                para_dict = {}
+                for num_str, content in all_matches:
+                    try:
+                        num = int(num_str)
+                        if 1 <= num <= plist_len:
+                            para_dict[num] = content.strip()
+                    except ValueError:
+                        continue
 
-        return final_result_list
+                # 按原始顺序重建翻译列表
+                new_translated_paragraphs = []
+                for i in range(1, plist_len + 1):
+                    if i in para_dict:
+                        new_translated_paragraphs.append(para_dict[i])
+                    else:
+                        new_translated_paragraphs.append("")
+
+                if len(new_translated_paragraphs) == plist_len:
+                    translated_paragraphs = new_translated_paragraphs
+
+        # 确保最终有正确数量的段落
+        if len(translated_paragraphs) < plist_len:
+            translated_paragraphs.extend(
+                [""] * (plist_len - len(translated_paragraphs))
+            )
+        elif len(translated_paragraphs) > plist_len:
+            translated_paragraphs = translated_paragraphs[:plist_len]
+
+        return translated_paragraphs
 
     def extract_tagged_paragraphs(self, text, plist_len):
         """Extract paragraphs from text with <p1>...</p1> tags."""

From cc4f4c4daed9fcf5dac9d9b9f729d406194771c3 Mon Sep 17 00:00:00 2001
From: leslie <ly624928214@gmail.com>
Date: Mon, 21 Apr 2025 15:05:21 +0800
Subject: [PATCH 8/9] fix:Fix translation paragraph count mismatch by
 explicitly instructing LLM about paragraph requirements

---
 .../translator/chatgptapi_translator.py       | 128 +-----------------
 1 file changed, 1 insertion(+), 127 deletions(-)

diff --git a/book_maker/translator/chatgptapi_translator.py b/book_maker/translator/chatgptapi_translator.py
index 67c6239..6d11437 100644
--- a/book_maker/translator/chatgptapi_translator.py
+++ b/book_maker/translator/chatgptapi_translator.py
@@ -75,7 +75,7 @@ class ChatGPTAPI(Base):
         api_base=None,
         prompt_template=None,
         prompt_sys_msg=None,
-        temperature=1.0,
+        temperature=1.3,
         context_flag=False,
         context_paragraph_limit=0,
         **kwargs,
@@ -230,66 +230,6 @@ class ChatGPTAPI(Base):
         lines = [line.strip() for line in lines if line.strip() != ""]
         return lines
 
-    def get_best_result_list(
-        self,
-        plist_len,
-        new_str,
-        sleep_dur,
-        result_list,
-        max_retries=15,
-    ):
-        if len(result_list) == plist_len:
-            return result_list, 0
-        best_result_list = result_list
-        retry_count = 0
-
-        # Save original prompt template
-        original_prompt_template = self.prompt_template
-        while retry_count < max_retries and len(result_list) != plist_len:
-            print(
-                f"bug: {plist_len} -> {len(result_list)} : Number of paragraphs before and after translation",
-            )
-            print(f"sleep for {sleep_dur}s and retry {retry_count + 1} ...")
-            time.sleep(sleep_dur)
-            retry_count += 1
-
-            # Create increasingly strict prompts
-            structured_prompt = (
-                f"CRITICAL!!! Translate the following {plist_len} paragraphs to {{language}}. "
-                f"Your output MUST have EXACTLY {plist_len} paragraphs - NO MORE, NO LESS. "
-                f"Each paragraph must be wrapped in numbered XML tags: <p1>text</p1>, <p2>text</p2>, etc. "
-                f"DO NOT skip any paragraph numbers. DO NOT add extra paragraphs. "
-                f"Required format: <p1>translated text</p1>\n<p2>translated text</p2>\n...\n<p{plist_len}>translated text</p{plist_len}>"
-            )
-
-            self.prompt_template = structured_prompt + " `{text}`"
-
-            translated_text = self.translate(new_str, False)
-            result_list = self.extract_tagged_paragraphs(translated_text, plist_len)
-
-            if (
-                len(result_list) == plist_len
-                or len(best_result_list) < len(result_list) <= plist_len
-                or (
-                    len(result_list) < len(best_result_list)
-                    and len(best_result_list) > plist_len
-                )
-            ):
-                best_result_list = result_list
-        # Restore original prompt
-        self.prompt_template = original_prompt_template
-
-        # If we still don't have the right number, force it by padding or trimming
-        if len(best_result_list) != plist_len:
-            if len(best_result_list) < plist_len:
-                # Pad with empty strings if we have too few
-                best_result_list.extend([""] * (plist_len - len(best_result_list)))
-            else:
-                # Trim if we have too many
-                best_result_list = best_result_list[:plist_len]
-
-        return best_result_list, retry_count
-
     def log_retry(self, state, retry_count, elapsed_time, log_path="log/buglog.txt"):
         if retry_count == 0:
             return
@@ -466,72 +406,6 @@ class ChatGPTAPI(Base):
 
         return translated_paragraphs
 
-    def extract_tagged_paragraphs(self, text, plist_len):
-        """Extract paragraphs from text with <p1>...</p1> tags."""
-        result_list = []
-
-        # Try extracting with tags first
-        for i in range(1, plist_len + 1):
-            pattern = rf"<p{i}>(.*?)</p{i}>"
-            matches = re.findall(pattern, text, re.DOTALL)
-            if matches:
-                result_list.append(matches[0].strip())
-
-        # If we got all paragraphs, return them
-        if len(result_list) == plist_len:
-            return result_list
-
-        # Fallback: try general tag pattern
-        pattern = r"<p(\d+)>(.*?)</p\1>"
-        matches = re.findall(pattern, text, re.DOTALL)
-
-        if matches and len(matches) == plist_len:
-            # Sort by paragraph number
-            matches.sort(key=lambda x: int(x[0]))
-            result_list = [match[1].strip() for match in matches]
-            return result_list
-
-        # Second fallback: try another approach with numbered paragraphs
-        result_list = []
-        for i in range(1, plist_len + 1):
-            pattern = rf"\({i}\)\s*(.*?)(?=\s*\({i + 1}\)|\Z)"
-            match = re.search(pattern, text, re.DOTALL)
-            if match:
-                result_list.append(match.group(1).strip())
-
-        # If all else fails, fall back to splitting by lines
-        if len(result_list) != plist_len:
-            lines = text.splitlines()
-            non_empty_lines = [line.strip() for line in lines if line.strip()]
-
-            # Attempt to find paragraph markers and divide accordingly
-            paragraph_markers = [
-                i
-                for i, line in enumerate(non_empty_lines)
-                if re.match(r"^\s*(\(\d+\)|\d+\.)", line)
-            ]
-
-            if len(paragraph_markers) == plist_len:
-                result_list = []
-                for i in range(len(paragraph_markers)):
-                    start = paragraph_markers[i]
-                    end = (
-                        paragraph_markers[i + 1]
-                        if i < len(paragraph_markers) - 1
-                        else len(non_empty_lines)
-                    )
-                    paragraph = " ".join(non_empty_lines[start:end])
-                    result_list.append(re.sub(r"^\s*(\(\d+\)|\d+\.)\s*", "", paragraph))
-            else:
-                # Last resort: try to split evenly
-                result_list = (
-                    non_empty_lines[:plist_len]
-                    if len(non_empty_lines) >= plist_len
-                    else non_empty_lines
-                )
-
-        return result_list
-
     def extract_paragraphs(self, text, paragraph_count):
         """Extract paragraphs from translated text, ensuring paragraph count is preserved."""
         # First try to extract by paragraph numbers (1), (2), etc.

From c780f7c516c0d79c8da467f103f40dfb1b0a41bd Mon Sep 17 00:00:00 2001
From: leslie <ly624928214@gmail.com>
Date: Mon, 21 Apr 2025 16:11:33 +0800
Subject: [PATCH 9/9] fix:Fix translation paragraph count mismatch by
 explicitly instructing LLM about paragraph requirements

---
 .../translator/chatgptapi_translator.py       | 22 +++++++------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/book_maker/translator/chatgptapi_translator.py b/book_maker/translator/chatgptapi_translator.py
index 6d11437..2dc9f14 100644
--- a/book_maker/translator/chatgptapi_translator.py
+++ b/book_maker/translator/chatgptapi_translator.py
@@ -75,7 +75,7 @@ class ChatGPTAPI(Base):
         api_base=None,
         prompt_template=None,
         prompt_sys_msg=None,
-        temperature=1.3,
+        temperature=1.0,
         context_flag=False,
         context_paragraph_limit=0,
         **kwargs,
@@ -301,22 +301,20 @@ class ChatGPTAPI(Base):
     def translate_list(self, plist):
         plist_len = len(plist)
 
-        # 创建原始文本列表，并为每个段落添加明确的编号标记
+        # Create a list of original texts and add clear numbering markers to each paragraph
         formatted_text = ""
         for i, p in enumerate(plist, 1):
             temp_p = copy(p)
             for sup in temp_p.find_all("sup"):
                 sup.extract()
             para_text = temp_p.get_text().strip()
-            # 使用特殊的分隔符和明确的编号
+            # Using special delimiters and clear numbering
             formatted_text += f"PARAGRAPH {i}:\n{para_text}\n\n"
 
         print(f"plist len = {plist_len}")
 
-        # 保存原始提示模板
         original_prompt_template = self.prompt_template
 
-        # 创建明确要求保持段落结构的提示
         structured_prompt = (
             f"Translate the following {plist_len} paragraphs to {{language}}. "
             f"CRUCIAL INSTRUCTION: Format your response using EXACTLY this structure:\n\n"
@@ -332,10 +330,9 @@ class ChatGPTAPI(Base):
 
         self.prompt_template = structured_prompt + " ```{text}```"
 
-        # 翻译
         translated_text = self.translate(formatted_text, False)
 
-        # 从结构化输出中提取翻译
+        # Extract translations from structured output
         translated_paragraphs = []
         for i in range(1, plist_len + 1):
             pattern = (
@@ -350,7 +347,6 @@ class ChatGPTAPI(Base):
                 translated_paragraphs.append(translated_paragraph)
             else:
                 print(f"Warning: Could not find translation for paragraph {i}")
-                # 尝试更宽松的匹配
                 loose_pattern = (
                     r"(?:TRANSLATION|PARAGRAPH|PARA).*?"
                     + str(i)
@@ -362,20 +358,19 @@ class ChatGPTAPI(Base):
                 else:
                     translated_paragraphs.append("")
 
-        # 恢复原始提示
         self.prompt_template = original_prompt_template
 
-        # 如果提取到的段落数不正确，尝试备用提取方法
+        # If the number of extracted paragraphs is incorrect, try the alternative extraction method.
         if len(translated_paragraphs) != plist_len:
             print(
                 f"Warning: Extracted {len(translated_paragraphs)}/{plist_len} paragraphs. Using fallback extraction."
             )
-            # 提取所有可能的段落标记
+
             all_para_pattern = r"(?:TRANSLATION|PARAGRAPH|PARA).*?(\d+).*?:(.*?)(?=(?:TRANSLATION|PARAGRAPH|PARA).*?\d+.*?:|\Z)"
             all_matches = re.findall(all_para_pattern, translated_text, re.DOTALL)
 
             if all_matches:
-                # 创建一个字典，根据段落编号映射翻译内容
+                # Create a dictionary to map translation content based on paragraph numbers
                 para_dict = {}
                 for num_str, content in all_matches:
                     try:
@@ -385,7 +380,7 @@ class ChatGPTAPI(Base):
                     except ValueError:
                         continue
 
-                # 按原始顺序重建翻译列表
+                # Rebuild the translation list in the original order
                 new_translated_paragraphs = []
                 for i in range(1, plist_len + 1):
                     if i in para_dict:
@@ -396,7 +391,6 @@ class ChatGPTAPI(Base):
                 if len(new_translated_paragraphs) == plist_len:
                     translated_paragraphs = new_translated_paragraphs
 
-        # 确保最终有正确数量的段落
         if len(translated_paragraphs) < plist_len:
             translated_paragraphs.extend(
                 [""] * (plist_len - len(translated_paragraphs))