v1.0.2 - style prompting, drop grammar correction

This commit is contained in:
2026-01-25 13:42:06 +02:00
parent d73656a75c
commit 4f077df03d
12 changed files with 246 additions and 37 deletions

38
test_m2m.py Normal file
View File

@@ -0,0 +1,38 @@
import sys
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
def test_m2m():
model_name = "facebook/m2m100_418M"
print(f"Loading {model_name}...")
tokenizer = M2M100Tokenizer.from_pretrained(model_name)
model = M2M100ForConditionalGeneration.from_pretrained(model_name)
# Test cases: (Language Code, Input)
test_cases = [
("en", "he go to school yesterday"),
("pl", "on iść do szkoła wczoraj"), # Intentional broken grammar in Polish
]
print("\nStarting M2M Tests (Self-Translation):\n")
for lang, input_text in test_cases:
tokenizer.src_lang = lang
encoded = tokenizer(input_text, return_tensors="pt")
# Translate to SAME language
generated_tokens = model.generate(
**encoded,
forced_bos_token_id=tokenizer.get_lang_id(lang)
)
corrected = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
print(f"[{lang}]")
print(f"Input: {input_text}")
print(f"Output: {corrected}")
print("-" * 20)
if __name__ == "__main__":
test_m2m()