Source code for kindred.manualAnnotation

import kindred
from collections import OrderedDict,defaultdict
import six

# Colors to use for output sentences with annotation
class bcolors:
	HEADER = '\033[95m'
	OKBLUE = '\033[94m'
	OKGREEN = '\033[92m'
	WARNING = '\033[93m'
	FAIL = '\033[91m'
	ENDC = '\033[0m'
	BOLD = '\033[1m'
	UNDERLINE = '\033[4m'

class RESPONSE:
	POSITIVE = 1
	NEGATIVE = 0
	ENTITYERROR = -1
		
	TABLE = {'y':POSITIVE,'n':NEGATIVE,'x':ENTITYERROR}


[docs]def manuallyAnnotate(corpus,candidateRelations): """ Provides a method for basic manual annotation of a series of candidate relations. Deals with a corpus, sentence by sentence, and prompts the user to annotate each candidate relation in turn. Can be exited before completion of the full list and the resulting annotations are split into an annotated corpus and unannotated corpus. Each document in the new corpora are individual sentences. :param corpus: Corpus of text for annotation :param candidateRelations: List of candidate relations (created using CandidateBuilder) to manually review and annotate :type corpus: kindred.Corpus :type candidateRelations: List of kindred.CandidateRelation :return: a tuple of an annotated corpus and unannotated corpus :rtype: two kindred.Corpus """ annotatedCorpus = kindred.Corpus() unannotatedCorpus = kindred.Corpus() options = OrderedDict() options['x'] = 'Done' options['0'] = 'None' print() print("For each sentence, choose an existing option or type the name of a new annotation") endAnnotation = False crCounter = 0 #for sentence,crsInSentence in groupedBySentences.items(): for doc in corpus.documents: docSentences = set(doc.sentences) crsInDoc = [ cr for cr in candidateRelations if cr.sentence in docSentences ] doc = kindred.Document(doc.text,doc.entities,[]) if not endAnnotation: for candidateRelation in crsInDoc: crCounter += 1 sentence = candidateRelation.sentence sentenceStart = sentence.tokens[0].startPos e1,e2 = candidateRelation.entities assert len(e1.position) == 1, 'Annotator cannot currently deal with non-continuous entities' assert len(e2.position) == 1, 'Annotator cannot currently deal with non-continuous entities' start1,end1 = e1.position[0] start2,end2 = e2.position[0] start1,end1 = start1-sentenceStart,end1-sentenceStart start2,end2 = start2-sentenceStart,end2-sentenceStart charByChar = list(candidateRelation.sentence.text) charByChar[start1] = bcolors.FAIL + charByChar[start1] charByChar[end1-1] += bcolors.ENDC charByChar[start2] = bcolors.OKGREEN + charByChar[start2] charByChar[end2-1] += bcolors.ENDC sentence = "".join(charByChar) print() print('#'*30 + " (%d/%d)" % (crCounter,len(candidateRelations))) print(sentence) optionTxt = " ".join("%s:%s" % (key,value) for key,value in options.items()) response = None while not response: response = six.moves.input('%s ? ' % optionTxt).strip() if response == 'x': endAnnotation = True break elif response and not response in optionTxt: newKey = str(len(options)-1) options[newKey] = response else: response = options[response] if response != 'None': r = kindred.Relation(response,candidateRelation.entities) doc.addRelation(r) if endAnnotation: # Annotation is incomplete, so wipe any previous annotation on this sentence doc.relations = [] unannotatedCorpus.addDocument(doc) else: annotatedCorpus.addDocument(doc) return annotatedCorpus,unannotatedCorpus