import kindred
import networkx as nx
import itertools
import sys
import six
from collections import defaultdict
[docs]class Sentence:
"""
Set of tokens for a sentence after parsing
:ivar text: Text of the sentence
:ivar tokens: List of tokens in sentence
:ivar dependencies: List of dependencies from dependency path. Should be a list of tuples with form (tokenindex1,tokenindex2,dependency_type)
:ivar sourceFilename: Filename of the source document
:ivar entityAnnotations: List of entities associated with token indices
"""
[docs] def __init__(self, text, tokens, dependencies, sourceFilename=None):
"""
Constructor for Sentence class
:param text: Text of the sentence
:param tokens: List of tokens in sentence
:param dependencies: List of dependencies from dependency path. Should be a list of tuples with form (tokenindex1,tokenindex2,dependency_type)
:param sourceFilename: Filename of the source document
:type text: str
:type tokens: list of kindred.Token
:type dependencies: list of tuples
:type sourceFilename: str
"""
assert isinstance(text, six.string_types)
assert isinstance(tokens, list)
for token in tokens:
assert isinstance(token,kindred.Token)
# Check the format of the Dependencies
dependencyErrorMsg = "Each dependency is expected to be a tuple of (tokenindex1,tokenindex2,dependency_type). Token index can be -1 to indicate an incoming edge."
assert isinstance(dependencies, list), dependencyErrorMsg
for dependency in dependencies:
assert isinstance(dependency,tuple),dependencyErrorMsg
assert len(dependency) == 3,dependencyErrorMsg
assert isinstance(dependency[0],int),dependencyErrorMsg
assert isinstance(dependency[1],int),dependencyErrorMsg
assert isinstance(dependency[2], six.string_types),dependencyErrorMsg
assert dependency[0] >= -1 and dependency[0] < len(tokens), dependencyErrorMsg
assert dependency[1] >= -1 and dependency[1] < len(tokens), dependencyErrorMsg
self.text = text
self.tokens = tokens
self.dependencies = dependencies
self.sourceFilename = sourceFilename
self.entityAnnotations = []
[docs] def addEntityAnnotation(self, entity, tokenIndices):
"""
Add an entity annotation to this sentence. Associated a specific entity with the indices of specific tokens
:param entity: Entity to add to sentence
:param tokenIndices: List of token indices
:type entity: kindred.Entity
:type tokenIndices: List of ints
"""
assert isinstance(entity, kindred.Entity)
assert isinstance(tokenIndices,list)
for l in tokenIndices:
assert l >= 0 and l < len(self.tokens), "Entity location must be an index of one of the tokens"
self.entityAnnotations.append( (entity,tokenIndices) )
def __str__(self):
tokenWords = [ t.word for t in self.tokens ]
return " ".join(tokenWords)
def __repr__(self):
return self.__str__()