-
-
Notifications
You must be signed in to change notification settings - Fork 1.4k
/
Copy pathgenerate_data_categorical_example.py
49 lines (40 loc) · 1.44 KB
/
generate_data_categorical_example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# -*- coding: utf-8 -*-
"""Example of using and visualizing ``generate_data_categorical`` function.
"""
# Author: Yahya Almardeny <[email protected]>
# License: BSD 2 clause
from __future__ import division
from __future__ import print_function
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
# temporary solution for relative imports in case pyod is not installed
# if pyod is installed, no need to use the following line
sys.path.append(
os.path.abspath(os.path.join(os.path.dirname("__file__"), '..')))
from pyod.utils.data import generate_data_categorical
if __name__ == "__main__":
contamination = 0.1 # percentage of outliers
# Generate sample data in clusters
X_train, X_test, y_train, y_test = generate_data_categorical \
(n_train=200, n_test=50,
n_category_in=8, n_category_out=5,
n_informative=1, n_features=1,
contamination=contamination,
shuffle=True, random_state=42)
# note that visalizing it can only be in 1 dimension!
cats = list(np.ravel(X_train))
labels = list(y_train)
fig, axs = plt.subplots(1, 2)
axs[0].bar(cats, labels)
axs[1].plot(cats, labels)
plt.title('Synthetic Categorical Train Data')
plt.show()
cats = list(np.ravel(X_test))
labels = list(y_test)
fig, axs = plt.subplots(1, 2)
axs[0].bar(cats, labels)
axs[1].plot(cats, labels)
plt.title('Synthetic Categorical Test Data')
plt.show()