Skip to content

Commit

Permalink
add salience and disrpt handling
Browse files Browse the repository at this point in the history
  • Loading branch information
amir-zeldes committed Feb 2, 2023
1 parent 8388c09 commit cd17cea
Show file tree
Hide file tree
Showing 6 changed files with 511 additions and 43 deletions.
10 changes: 8 additions & 2 deletions _build/build_gum.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from glob import glob
from argparse import ArgumentParser
from utils.pepper_runner import run_pepper
from utils.make_rst_rel_data import main as make_disrpt
import datetime
import ntpath, platform

Expand Down Expand Up @@ -203,7 +204,7 @@ def check_diff(xml, ptb, docname):
# * find instances of "'s" that are not included in any immediately preceding
# markables and merge them into those markables if genitive_s is True
# * return conllu-a style bracket informatio to add entity data to conllu files later
conllua_data, centering_data = fix_tsv(gum_source, gum_target, reddit=reddit)
conllua_data, centering_data, salience_data = fix_tsv(gum_source, gum_target, reddit=reddit)

# Adjust rst/ files:
# * refresh token strings in case of inconsistency
Expand Down Expand Up @@ -275,6 +276,7 @@ def check_diff(xml, ptb, docname):
sys.stdout.write("\n")
else:
conllua_data = None
salience_data = None
sys.stderr.write("i Pepper only conversion, entities in conllu-a data will be generated from Pepper output (no infsat or min IDs)\n")

## Step 3: merge and convert source formats to target formats
Expand Down Expand Up @@ -331,7 +333,7 @@ def check_diff(xml, ptb, docname):
## Step 4: propagate entity types, coref, discourse relations and XML annotations into conllu dep files
from utils.propagate import add_entities_to_conllu, add_rsd_to_conllu, add_bridging_to_conllu, add_xml_to_conllu

add_entities_to_conllu(gum_target, reddit=reddit, ontogum=False, conllua_data=conllua_data)
add_entities_to_conllu(gum_target, reddit=reddit, ontogum=False, conllua_data=conllua_data, salience_data=salience_data)
if not options.skip_ontogum:
if options.no_pepper:
sys.__stdout__.write("\ni Not adding entity information to UD parses in OntoGUM version since Pepper conversion was skipped\n")
Expand All @@ -347,3 +349,7 @@ def check_diff(xml, ptb, docname):
add_xml_to_conllu(gum_target,reddit=reddit,ontogum=True)

sys.__stdout__.write("\no Added discourse relations and XML tags to UD parses\n")

make_disrpt(reddit=reddit)

sys.__stdout__.write("\no Created DISRPT shared task discourse relation formats in target rst/disrpt/\n")
Loading

0 comments on commit cd17cea

Please sign in to comment.