Skip to content

Commit 4bb6bc3

Browse files
committed
Add support for sent_start to GoldParse
1 parent 44589fb commit 4bb6bc3

File tree

2 files changed

+6
-0
lines changed

2 files changed

+6
-0
lines changed

spacy/gold.pxd

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ cdef struct GoldParseC:
99
int* tags
1010
int* heads
1111
int* has_dep
12+
int* sent_start
1213
attr_t* labels
1314
int** brackets
1415
Transition* ner

spacy/gold.pyx

+5
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,7 @@ cdef class GoldParse:
426426
self.c.heads = <int*>self.mem.alloc(len(doc), sizeof(int))
427427
self.c.labels = <attr_t*>self.mem.alloc(len(doc), sizeof(attr_t))
428428
self.c.has_dep = <int*>self.mem.alloc(len(doc), sizeof(int))
429+
self.c.sent_start = <int*>self.mem.alloc(len(doc), sizeof(int))
429430
self.c.ner = <Transition*>self.mem.alloc(len(doc), sizeof(Transition))
430431

431432
self.cats = list(cats)
@@ -482,6 +483,10 @@ cdef class GoldParse:
482483
"""
483484
return not nonproj.is_nonproj_tree(self.heads)
484485

486+
@property
487+
def sent_starts(self):
488+
return [self.c.sent_start[i] for i in range(self.length)]
489+
485490

486491
def biluo_tags_from_offsets(doc, entities, missing='O'):
487492
"""Encode labelled spans into per-token tags, using the Begin/In/Last/Unit/Out

0 commit comments

Comments
 (0)