Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dataset recognition #141

Merged
merged 4 commits into from
May 23, 2022
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
add annotation method
  • Loading branch information
kermitt2 committed May 21, 2022
commit 1ba1def4c5bdb10e43d3dfa27c80a0f0c4806044
29 changes: 29 additions & 0 deletions delft/applications/datasetTagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def configure(architecture, output_path=None, max_sequence_length=-1, batch_size

return batch_size, max_sequence_length, model_name, embeddings_name, max_epoch, multiprocessing, early_stop


# train a model with all available data
def train(embeddings_name=None, architecture='BidLSTM_CRF', transformer=None,
input_path=None, output_path=None, fold_count=1,
Expand Down Expand Up @@ -96,6 +97,7 @@ def train(embeddings_name=None, architecture='BidLSTM_CRF', transformer=None,
else:
model.save()


# split data, train a model and evaluate it
def train_eval(embeddings_name=None, architecture='BidLSTM_CRF', transformer=None,
input_path=None, output_path=None, fold_count=1,
Expand Down Expand Up @@ -154,10 +156,37 @@ def train_eval(embeddings_name=None, architecture='BidLSTM_CRF', transformer=Non
else:
model.save()


def eval_(input_path=None, architecture=None):
return


# annotate a list of texts
def annotate_text(texts, model, output_format, architecture='BidLSTM_CRF', features=None, use_ELMo=False):
annotations = []

# load model
model_name = 'datasets'
model_name += '-'+architecture
if use_ELMo:
model_name += '-with_ELMo'

model = Sequence(model_name)
model.load()

start_time = time.time()

annotations = model.tag(texts, output_format, features=features)
runtime = round(time.time() - start_time, 3)

if output_format == 'json':
annotations["runtime"] = runtime
else:
print("runtime: %s seconds " % (runtime))

return annotations


if __name__ == "__main__":
parser = argparse.ArgumentParser(description = "Trainer for dataset recognition models using the DeLFT library")

Expand Down