Source code for kindred.LogisticRegressionWithThreshold


import numpy as np
from sklearn.linear_model import LogisticRegression

[docs]class LogisticRegressionWithThreshold:
	"""
	A modified Logistic Regression classifier that will filter calls by a custom threshold, instead of the default 0.5. This allows for control of the precision-recall tradeoff, e.g. false positives versus false negatives.
	
	:ivar clf: The underlying LogisticRegression classifier
	:ivar threshold: Threshold to use, should be between 0 and 1
	"""

[docs]	def __init__(self,threshold=0.5):
		"""
		Set up a Logistic Regression classifier that can use a different threshold for predictions and thereby be more lenient (lower threshold, false positives increase, false negatives decrease) or more conservative (higher threshold, false positives decrease, false negative increase).
		
		:param threshold: Threshold to use, should be between 0 and 1
		:type threshold: float
		"""
		
		assert threshold >= 0 and threshold <= 1, "Threshold must be between 0 and 1"

		self.clf = LogisticRegression(class_weight='balanced',random_state=1,solver='liblinear',multi_class='ovr')
		self.threshold = threshold

[docs]	def fit(self,X,Y):
		"""
		Train the classifier using the associated matrix X and classes Y. Class zero should represent no associated class.
		
		:param X: Training vector
		:param Y: Associated class for each row of X
		:type X: sparse matrix
		:type Y: matrix
		"""

		self.clf.fit(X,Y)
		self.classes_ = self.clf.classes_

[docs]	def predict(self,X):
		"""
		Make predictions for the class of each row in X. Class zero should represent no prediction.
		
		:param X: Testing vector
		:type X: sparse matrix
		:return: Predictions of classes for each row in X
		:rtype: matrix
		"""

		probs = self.clf.predict_proba(X)

		# Ignore probabilities that fall below our threshold
		probs[probs<self.threshold] = -1.0

		# Make sure that the zero class is only select if all other options are below the threshold
		probs[:,0] = -0.5

		# And get the highest probability for each row
		predictions = np.argmax(probs,axis=1)

		return predictions

[docs]	def predict_proba(self,X):
		"""
		Calculate probabilities for the class of each row in X. Class zero should represent no prediction.
		Returns a matrix of probabilities
		
		:param X: Testing vector
		:type X: sparse matrix
		:return: Probabilities of classes for each row in X
		:rtype: matrix
		"""

		return self.clf.predict_proba(X)