mirror of
https://github.com/yihong0618/bilingual_book_maker.git
synced 2025-06-05 19:15:34 +00:00
fix:Fix translation paragraph count mismatch by explicitly instructing LLM about paragraph requirements
This commit is contained in:
parent
b0dbed8826
commit
83303d1dd8
@ -256,8 +256,7 @@ class ChatGPTAPI(Base):
|
|||||||
retry_count += 1
|
retry_count += 1
|
||||||
|
|
||||||
# Make instructions increasingly explicit with each retry
|
# Make instructions increasingly explicit with each retry
|
||||||
emphasis = "!" * min(retry_count,
|
emphasis = "!" * min(retry_count, 3) # Add up to 3 exclamation marks
|
||||||
3) # Add up to 3 exclamation marks
|
|
||||||
paragraph_instruction = f"IMPORTANT{emphasis} The text contains exactly {plist_len} numbered paragraphs. Your translation MUST maintain exactly {plist_len} paragraphs with the same numbering structure."
|
paragraph_instruction = f"IMPORTANT{emphasis} The text contains exactly {plist_len} numbered paragraphs. Your translation MUST maintain exactly {plist_len} paragraphs with the same numbering structure."
|
||||||
|
|
||||||
# Extend the original prompt
|
# Extend the original prompt
|
||||||
@ -392,12 +391,10 @@ class ChatGPTAPI(Base):
|
|||||||
log_path = "log/buglog.txt"
|
log_path = "log/buglog.txt"
|
||||||
|
|
||||||
self.log_retry(state, retry_count, end_time - start_time, log_path)
|
self.log_retry(state, retry_count, end_time - start_time, log_path)
|
||||||
self.log_translation_mismatch(plist_len, result_list, new_str, sep,
|
self.log_translation_mismatch(plist_len, result_list, new_str, sep, log_path)
|
||||||
log_path)
|
|
||||||
|
|
||||||
# Remove paragraph numbers from the result
|
# Remove paragraph numbers from the result
|
||||||
result_list = [re.sub(r"^(\(\d+\)|\d+\.|(\d+))\s*", "", s) for s in
|
result_list = [re.sub(r"^(\(\d+\)|\d+\.|(\d+))\s*", "", s) for s in result_list]
|
||||||
result_list]
|
|
||||||
return result_list
|
return result_list
|
||||||
|
|
||||||
def extract_paragraphs(self, text, paragraph_count):
|
def extract_paragraphs(self, text, paragraph_count):
|
||||||
@ -405,14 +402,14 @@ class ChatGPTAPI(Base):
|
|||||||
# First try to extract by paragraph numbers (1), (2), etc.
|
# First try to extract by paragraph numbers (1), (2), etc.
|
||||||
result_list = []
|
result_list = []
|
||||||
for i in range(1, paragraph_count + 1):
|
for i in range(1, paragraph_count + 1):
|
||||||
pattern = rf'\({i}\)\s*(.*?)(?=\s*\({i + 1}\)|\Z)'
|
pattern = rf"\({i}\)\s*(.*?)(?=\s*\({i + 1}\)|\Z)"
|
||||||
match = re.search(pattern, text, re.DOTALL)
|
match = re.search(pattern, text, re.DOTALL)
|
||||||
if match:
|
if match:
|
||||||
result_list.append(match.group(1).strip())
|
result_list.append(match.group(1).strip())
|
||||||
|
|
||||||
# If exact pattern matching failed, try another approach
|
# If exact pattern matching failed, try another approach
|
||||||
if len(result_list) != paragraph_count:
|
if len(result_list) != paragraph_count:
|
||||||
pattern = r'\((\d+)\)\s*(.*?)(?=\s*\(\d+\)|\Z)'
|
pattern = r"\((\d+)\)\s*(.*?)(?=\s*\(\d+\)|\Z)"
|
||||||
matches = re.findall(pattern, text, re.DOTALL)
|
matches = re.findall(pattern, text, re.DOTALL)
|
||||||
if matches:
|
if matches:
|
||||||
# Sort by paragraph number
|
# Sort by paragraph number
|
||||||
|
Loading…
x
Reference in New Issue
Block a user