-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathUtility.py
68 lines (52 loc) · 2.37 KB
/
Utility.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author: Simone Boglio
"""
import scipy.sparse as sps
import numpy as np
def filter_urm(urm, user_min_number_ratings=1, item_min_number_ratings=1):
# keep only users with at least n ratings, same for the items
# NOTE: this operation re index both users and items (we get a more compact URM)
urm = sps.csr_matrix(urm)
urm.eliminate_zeros()
users_to_select_mask = np.ediff1d(urm.indptr) >= user_min_number_ratings
urm = urm[users_to_select_mask, :]
urm = sps.csc_matrix(urm)
items_to_select_mask = np.ediff1d(urm.indptr) >= item_min_number_ratings
urm = urm[:, items_to_select_mask]
return urm.tocsr()
def print_stat_urm(urm, title=''):
if title!='': title = '{:10}'.format(title)+'-> '
n_users = urm.shape[0]
n_items = urm.shape[1]
n_ratings = urm.data.shape[0]
density = n_ratings / (n_users * n_items) * 100
print('{}users: {} \titems: {} \tratings: {:8d} \tdensity: {:.3f}%'.format(title,n_users, n_items, n_ratings, density))
def print_stat_icm(urm, title=''):
if title!='': title = '{:10}'.format(title)+'-> '
n_users = urm.shape[0]
n_items = urm.shape[1]
n_ratings = urm.data.shape[0]
density = n_ratings / (n_users * n_items) * 100
print('{}items: {} \tfeatures: {} \tvalues: {:9d} \tdensity: {:.3f}%'.format(title,n_users, n_items, n_ratings, density))
def print_stat_ucm(urm, title=''):
if title!='': title = '{:10}'.format(title)+'-> '
n_users = urm.shape[0]
n_items = urm.shape[1]
n_ratings = urm.data.shape[0]
density = n_ratings / (n_users * n_items) * 100
print('{}users: {} \tfeatures: {} \tvalues: {:9d} \tdensity: {:.3f}%'.format(title,n_users, n_items, n_ratings, density))
def print_stat_datareader(datareader):
URM_train = datareader.URM_DICT["URM_train"]
URM_validation = datareader.URM_DICT["URM_validation"]
URM_test = datareader.URM_DICT["URM_test"]
print_stat_urm(URM_train + URM_test + URM_validation, title='DATASET')
for URM_name, URM_object in datareader.URM_DICT.items():
print_stat_urm(URM_object, title=URM_name)
if len(datareader.ICM_DICT)>0:
for ICM_name, ICM_object in datareader.ICM_DICT.items():
print_stat_icm(ICM_object, title=ICM_name)
if hasattr(datareader, 'UCM'):
# Rimpiazzarlo con UCM_DICT ?
print_stat_ucm(datareader.UCM, title='UCM')