fix(ner): convertir les entrees ONNX en int64

Force input_ids et attention_mask en int64 avant inference CamemBERT ONNX, pour eviter les erreurs de dtype selon les tokenizers/environnements Windows. Test cible: test_camembert_manager_cache.py.
This commit is contained in:
2026-06-17 18:01:57 +02:00
parent 60fb41c2e7
commit dc0616f744
2 changed files with 53 additions and 2 deletions

View File

@@ -183,8 +183,16 @@ class CamembertNerManager:
)
offsets = encoding.pop("offset_mapping")[0] # (seq_len, 2)
# Inférence
inputs = {k: v for k, v in encoding.items() if k in ("input_ids", "attention_mask")}
# Inférence. Certains tokenizers renvoient des tableaux int32 sous
# Windows, alors que le graphe CamemBERT ONNX attend des int64.
inputs = {}
for key, value in encoding.items():
if key not in ("input_ids", "attention_mask"):
continue
array = np.asarray(value)
if array.dtype != np.int64:
array = array.astype(np.int64)
inputs[key] = array
outputs = self._session.run(None, inputs)
logits = outputs[0][0] # (seq_len, num_labels)