-
Notifications
You must be signed in to change notification settings - Fork 2.7k
/
Copy pathnaivebayes.py
67 lines (50 loc) · 1.97 KB
/
naivebayes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
"""
Naive Bayes Classifier Implementation from scratch
To run the code structure the code in the following way:
X be size: (num_training_examples, num_features)
y be size: (num_classes, )
Where the classes are 0, 1, 2, etc. Then an example run looks like:
NB = NaiveBayes(X, y)
NB.fit(X)
predictions = NB.predict(X)
Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
* 2020-04-21 Initial coding
"""
import numpy as np
class NaiveBayes:
def __init__(self, X, y):
self.num_examples, self.num_features = X.shape
self.num_classes = len(np.unique(y))
self.eps = 1e-6
def fit(self, X):
self.classes_mean = {}
self.classes_variance = {}
self.classes_prior = {}
for c in range(self.num_classes):
X_c = X[y == c]
self.classes_mean[str(c)] = np.mean(X_c, axis=0)
self.classes_variance[str(c)] = np.var(X_c, axis=0)
self.classes_prior[str(c)] = X_c.shape[0] / X.shape[0]
def predict(self, X):
probs = np.zeros((self.num_examples, self.num_classes))
for c in range(self.num_classes):
prior = self.classes_prior[str(c)]
probs_c = self.density_function(
X, self.classes_mean[str(c)], self.classes_variance[str(c)]
)
probs[:, c] = probs_c + np.log(prior)
return np.argmax(probs, 1)
def density_function(self, x, mean, sigma):
# Calculate probability from Gaussian density function
const = -self.num_features / 2 * np.log(2 * np.pi) - 0.5 * np.sum(
np.log(sigma + self.eps)
)
probs = 0.5 * np.sum(np.power(x - mean, 2) / (sigma + self.eps), 1)
return const - probs
if __name__ == "__main__":
X = np.loadtxt("example_data/data.txt", delimiter=",")
y = np.loadtxt("example_data/targets.txt") - 1
NB = NaiveBayes(X, y)
NB.fit(X)
y_pred = NB.predict(X)
print(f"Accuracy: {sum(y_pred==y)/X.shape[0]}")