diff --git a/book_maker/translator/chatgptapi_translator.py b/book_maker/translator/chatgptapi_translator.py
index 67c6239..6d11437 100644
--- a/book_maker/translator/chatgptapi_translator.py
+++ b/book_maker/translator/chatgptapi_translator.py
@@ -75,7 +75,7 @@ class ChatGPTAPI(Base):
api_base=None,
prompt_template=None,
prompt_sys_msg=None,
- temperature=1.0,
+ temperature=1.3,
context_flag=False,
context_paragraph_limit=0,
**kwargs,
@@ -230,66 +230,6 @@ class ChatGPTAPI(Base):
lines = [line.strip() for line in lines if line.strip() != ""]
return lines
- def get_best_result_list(
- self,
- plist_len,
- new_str,
- sleep_dur,
- result_list,
- max_retries=15,
- ):
- if len(result_list) == plist_len:
- return result_list, 0
- best_result_list = result_list
- retry_count = 0
-
- # Save original prompt template
- original_prompt_template = self.prompt_template
- while retry_count < max_retries and len(result_list) != plist_len:
- print(
- f"bug: {plist_len} -> {len(result_list)} : Number of paragraphs before and after translation",
- )
- print(f"sleep for {sleep_dur}s and retry {retry_count + 1} ...")
- time.sleep(sleep_dur)
- retry_count += 1
-
- # Create increasingly strict prompts
- structured_prompt = (
- f"CRITICAL!!! Translate the following {plist_len} paragraphs to {{language}}. "
- f"Your output MUST have EXACTLY {plist_len} paragraphs - NO MORE, NO LESS. "
- f"Each paragraph must be wrapped in numbered XML tags:
translated text
" - ) - - self.prompt_template = structured_prompt + " `{text}`" - - translated_text = self.translate(new_str, False) - result_list = self.extract_tagged_paragraphs(translated_text, plist_len) - - if ( - len(result_list) == plist_len - or len(best_result_list) < len(result_list) <= plist_len - or ( - len(result_list) < len(best_result_list) - and len(best_result_list) > plist_len - ) - ): - best_result_list = result_list - # Restore original prompt - self.prompt_template = original_prompt_template - - # If we still don't have the right number, force it by padding or trimming - if len(best_result_list) != plist_len: - if len(best_result_list) < plist_len: - # Pad with empty strings if we have too few - best_result_list.extend([""] * (plist_len - len(best_result_list))) - else: - # Trim if we have too many - best_result_list = best_result_list[:plist_len] - - return best_result_list, retry_count - def log_retry(self, state, retry_count, elapsed_time, log_path="log/buglog.txt"): if retry_count == 0: return @@ -466,72 +406,6 @@ class ChatGPTAPI(Base): return translated_paragraphs - def extract_tagged_paragraphs(self, text, plist_len): - """Extract paragraphs from text with(.*?)
" - matches = re.findall(pattern, text, re.DOTALL) - if matches: - result_list.append(matches[0].strip()) - - # If we got all paragraphs, return them - if len(result_list) == plist_len: - return result_list - - # Fallback: try general tag pattern - pattern = r"(.*?)
" - matches = re.findall(pattern, text, re.DOTALL) - - if matches and len(matches) == plist_len: - # Sort by paragraph number - matches.sort(key=lambda x: int(x[0])) - result_list = [match[1].strip() for match in matches] - return result_list - - # Second fallback: try another approach with numbered paragraphs - result_list = [] - for i in range(1, plist_len + 1): - pattern = rf"\({i}\)\s*(.*?)(?=\s*\({i + 1}\)|\Z)" - match = re.search(pattern, text, re.DOTALL) - if match: - result_list.append(match.group(1).strip()) - - # If all else fails, fall back to splitting by lines - if len(result_list) != plist_len: - lines = text.splitlines() - non_empty_lines = [line.strip() for line in lines if line.strip()] - - # Attempt to find paragraph markers and divide accordingly - paragraph_markers = [ - i - for i, line in enumerate(non_empty_lines) - if re.match(r"^\s*(\(\d+\)|\d+\.)", line) - ] - - if len(paragraph_markers) == plist_len: - result_list = [] - for i in range(len(paragraph_markers)): - start = paragraph_markers[i] - end = ( - paragraph_markers[i + 1] - if i < len(paragraph_markers) - 1 - else len(non_empty_lines) - ) - paragraph = " ".join(non_empty_lines[start:end]) - result_list.append(re.sub(r"^\s*(\(\d+\)|\d+\.)\s*", "", paragraph)) - else: - # Last resort: try to split evenly - result_list = ( - non_empty_lines[:plist_len] - if len(non_empty_lines) >= plist_len - else non_empty_lines - ) - - return result_list - def extract_paragraphs(self, text, paragraph_count): """Extract paragraphs from translated text, ensuring paragraph count is preserved.""" # First try to extract by paragraph numbers (1), (2), etc.