Rename NER

I was dealing with Prodigy annotations sometimes where I combine the outputs of multiple models that also have different names for what is effectively the same entity. This like "PERSON" and "HUMAN_NAME". For these cases this script could be useful.

# /// script
# dependencies = [
#   "srsly", "typer"
# ]
# ///
import json
from pathlib import Path

import srsly
import typer


def _replace_ents(item, table):
    for span in item["spans"]:
        for k, v in table.items():
            if span["label"] == k:
                span["label"] = v
    return item


def rename_ner(
    # fmt: off
    file_in: Path = typer.Option(None, help="Path to write text into"),
    file_out: Path = typer.Option(None, help="Path to write text into"),
    translate: str = typer.Option(..., help="Path to write text into"),
    # fmt: on
):
    """Rename a named entity label in a Prodigy .jsonl file"""
    pairs = [kv for kv in translate.split(",")]
    ent_table = dict([kv.split(":") for kv in pairs])
    stream = (_replace_ents(ex, ent_table) for ex in srsly.read_jsonl(file_in))
    if file_out:
        srsly.write_jsonl(file_out, stream)
    else:
        for item in stream:
            print(json.dumps(item))


if __name__ == "__main__":
    typer.run(rename_ner)