-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathdataset_families.py
125 lines (119 loc) · 3.88 KB
/
dataset_families.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
family_map = {
"Amazon": {
"AmazonAllBeautyReader",
"AmazonAllElectronicsReader",
"AmazonAlternativeRockReader",
"AmazonAmazonFashionReader",
"AmazonAmazonInstantVideoReader",
"AmazonAppliancesReader",
"AmazonAppsforAndroidReader",
"AmazonAppstoreforAndroidReader",
"AmazonArtsCraftsSewingReader",
"AmazonAutomotiveReader",
"AmazonBabyReader",
"AmazonBabyProductsReader",
"AmazonBeautyReader",
"AmazonBluesReader",
"AmazonBooksReader",
"AmazonBuyaKindleReader",
"AmazonCDsVinylReader",
"AmazonCellPhonesAccessoriesReader",
"AmazonChristianReader",
"AmazonClassicalReader",
"AmazonClothingShoesJewelryReader",
"AmazonCollectiblesFineArtReader",
"AmazonComputersReader",
"AmazonCountryReader",
"AmazonDanceElectronicReader",
"AmazonDavisReader",
"AmazonDigitalMusicReader",
"AmazonElectronicsReader",
"AmazonFolkReader",
"AmazonGiftCardsReader",
"AmazonGospelReader",
"AmazonGroceryGourmetFoodReader",
"AmazonHardRockMetalReader",
"AmazonHealthPersonalCareReader",
"AmazonHomeImprovementReader",
"AmazonHomeKitchenReader",
"AmazonIndustrialScientificReader",
"AmazonInternationalReader",
"AmazonJazzReader",
"AmazonKindleStoreReader",
"AmazonKitchenDiningReader",
"AmazonLatinMusicReader",
"AmazonLuxuryBeautyReader",
"AmazonMagazineSubscriptionsReader",
"AmazonMiscellaneousReader",
"AmazonMoviesTVReader",
"AmazonMP3PlayersAccessoriesReader",
"AmazonMusicalInstrumentsReader",
"AmazonNewAgeReader",
"AmazonOfficeProductsReader",
"AmazonOfficeSchoolSuppliesReader",
"AmazonPatioLawnGardenReader",
"AmazonPetSuppliesReader",
"AmazonPopReader",
"AmazonPurchaseCirclesReader",
"AmazonRapHipHopReader",
"AmazonRBReader",
"AmazonRockReader",
"AmazonSoftwareReader",
"AmazonSportsOutdoorsReader",
"AmazonToolsHomeImprovementReader",
"AmazonToysGamesReader",
"AmazonVideoGamesReader",
"AmazonWineReader"
},
"Movielens": {
"Movielens100KReader",
"Movielens10MReader",
"Movielens1MReader",
"Movielens20MReader",
"MovielensHetrec2011Reader"
},
"Yahoo": {
"YahooMoviesReader",
"YahooMusicReader"
}
}
unique_datasets = {
"AnimeReader",
"BookCrossingReader",
"CiaoDVDReader",
"DatingReader",
"EpinionsReader",
"FilmTrustReader",
"FrappeReader",
"GoogleLocalReviewsReader",
"GowallaReader",
"Jester2Reader",
"LastFMReader",
"MarketBiasAmazonReader",
"MarketBiasModClothReader",
"MovieTweetingsReader",
"NetflixPrizeReader",
"RecipesReader",
"WikilensReader"
}
reverse_family_map = {
dataset: family for family, datasets in family_map.items()
for dataset in datasets
}
reverse_family_map.update({
dataset: dataset for dataset in unique_datasets
})
def dataset_family_lookup(dataset_name, strict_match=True):
"""Looks up the dataset family corresponding to the dataset."""
if strict_match:
if dataset_name not in reverse_family_map:
raise RuntimeError(f"Strict match for dataset name not found: {dataset_name}")
return reverse_family_map[dataset_name]
else:
return reverse_family_map.get(dataset_name, dataset_name)
def get_dataset_families():
"""This returns all of the dataset families (useful for hold-one-out validation with dataset families)"""
return set(family_map.keys()).union(unique_datasets)
def get_all_datasets():
"""Returns all of the datasets we have."""
return set([k for v in family_map.values() for k in v]).union(unique_datasets)