1. 原来的算法模型中,没有考虑买了又买的物品,对于钢材行业,用户的需求是一致的,经常买了又买,所以增加此功能

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
 
a 模型训练中增加如下代码
  
        
//buy item for users
        
JavaPairRDD<String, String> buyItemForUser = data.getBuyEvents().mapToPair(
new 
PairFunction<UserItemEvent, Tuple2<String, String>, Integer>() {
            
@Override
            
public 
Tuple2<Tuple2<String, String>, Integer> call(UserItemEvent buyEvent) 
throws 
Exception {
                
return  
new 
Tuple2<>(
new 
Tuple2<>(buyEvent.getUser(), buyEvent.getItem()), 
1
);
            
}
        
}).mapToPair(
new 
PairFunction<Tuple2<Tuple2<String, String>, Integer>, String, Integer>() {
            
@Override
            
public 
Tuple2<String, Integer> call(Tuple2<Tuple2<String, String>, Integer> element) 
throws 
Exception {
                
return 
new 
Tuple2<>(element._1()._1()+
":::"
+element._1()._2(), element._2());
            
}
        
}).reduceByKey(
new 
Function2<Integer, Integer, Integer>() {
            
@Override
            
public 
Integer call(Integer integer, Integer integer2) 
throws 
Exception {
                
return 
integer + integer2;
            
}
        
}).mapToPair(
new 
PairFunction<Tuple2<String, Integer>, String, String>() {
            
@Override
            
public 
Tuple2<String, String> call(Tuple2<String, Integer> element) 
throws 
Exception {
                
String temp[]=element._1().split(
":::"
);
                
if
(temp.length==
2
){
                    
return 
new 
Tuple2<>(temp[
0
], temp[
1
]);
                
}            
                
return 
new 
Tuple2<>(
""
""
);              
            
}
        
});
         
        
b 增加预测算法
            
private 
List<ItemScore> buyItemForUser(Model model, Query query){
        
logger.info(
"start to add buy item for the user"
);
        
final 
JavaRDD<ItemScore> matchedUser = model.getUserBuyItem().filter(
new 
Function<Tuple2<String, String>, Boolean>() {
            
@Override
            
public 
Boolean call(Tuple2<String, String> userIndex) 
throws 
Exception {
                
return 
userIndex._1().equals(query.getUserEntityId());
            
}
        
}).map(
new 
Function<Tuple2<String,String>,ItemScore>() {
            
@Override
            
public 
ItemScore call(Tuple2<String, String> arg0) 
throws 
Exception {
                  
return 
new 
ItemScore(arg0._2(),
10
);
            
}            
        
});
         
        
return 
matchedUser.collect();
         
    
}
     
    
c topItemsForUser 按照你的业务逻辑出来两者的排序规则

基于物的相似性,也是我们期望,加入以下代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
//根据事件查找物品的属性
    
private 
List<Set<String>> getRecentProductCategory(Query query, Model model) {
        
try 
{
            
List<Set<String>> result = 
new 
ArrayList<>();
 
            
List<Event> events = LJavaEventStore.findByEntity(
                    
ap.getAppName(),
                    
"user"
,
                    
query.getUserEntityId(),
                    
OptionHelper.<String>none(),
                    
OptionHelper.some(ap.getSimilarItemEvents()),
                    
OptionHelper.some(OptionHelper.some(
"item"
)),
                    
OptionHelper.<Option<String>>none(),
                    
OptionHelper.<DateTime>none(),
                    
OptionHelper.<DateTime>none(),
                    
OptionHelper.some(
10
),
                    
true
,
                    
Duration.apply(
10
, TimeUnit.SECONDS));
 
            
for 
(
final 
Event event : events) {
                
if 
(event.targetEntityId().isDefined()) {
                    
JavaPairRDD<String, Integer> filtered = model.getItemIndex().filter(
new 
Function<Tuple2<String, Integer>, Boolean>() {
                        
@Override
                        
public 
Boolean call(Tuple2<String, Integer> element) 
throws 
Exception {
                            
return 
element._1().equals(event.targetEntityId().get());
                        
}
                    
});
 
                    
final 
String itemIndex = filtered.first()._1();
                     
                     
                    
Item item = model.getItems().get(itemIndex);
                    
if
(item.getCategories()!=
null 
&& item.getCategories().size()>
0
){
                       
result.add(item.getCategories());
                    
}
                
}
            
}
 
            
return 
result;
        
catch 
(Exception e) {
            
logger.error(
"Error reading recent events for user " 
+ query.getUserEntityId());
            
throw 
new 
RuntimeException(e.getMessage(), e);
        
}
    
}
     
    
//相似性比较
    
private 
List<ItemScore> similarItemsByCategory(
final 
List<Set<String>> category, Model model, Query query) {
         
        
Map<String, Item> items =model.getItems();
        
if
(items==
null 
|| items.size()==
0
){
            
return 
null
;
        
}
         
        
if
(category==
null 
|| category.size()==
0
){
            
return 
null
;
        
}
         
        
JavaRDD<ItemScore> itemScores = model.getItemIndex().map(
new 
Function<Tuple2<String, Integer>, ItemScore>() {
            
@Override
            
public 
ItemScore call(Tuple2<String, Integer> idItem) 
throws 
Exception {
                
String itemid= idItem._1();
                
Item item = items.get(itemid);            
                
double 
similarity = 
0.0
;
                
for
(
int 
i=
0 
; i<category.size(); i++){
                    
similarity+=getDistance(category.get(i),item.getCategories());
                
}
                
logger.info(itemid+
"->"
+similarity);
                
return 
(
new 
ItemScore(itemid, similarity));
                 
            
}
        
});
         
        
itemScores = validScores(itemScores, query.getWhitelist(), query.getBlacklist(), query.getCategories(), model.getItems(), query.getUserEntityId());
         
        
return 
sortAndTake(itemScores, query.getNumber());
         
      
/*  List<ItemScore> itemScores=new ArrayList<ItemScore>();
         
        
for (Map.Entry<String, Item> entry : items.entrySet()) {
             
Item it = entry.getValue();
             
double similarity = 0.0;
             
for(int i=0 ; i<category.size(); i++){
                 
similarity+=getDistance(category.get(i),it.getCategories());
             
}
             
itemScores.add(new ItemScore(it.getEntityId(), similarity));
        
}
         
         
         
        
itemScores = validScoresForList(itemScores, query.getWhitelist(), query.getBlacklist(), query.getCategories(), model.getItems(), query.getUserEntityId());
        
return sortAndTake(itemScores, query.getNumber());*/
    
}
     
    
//相似算法,比较简单
    
public 
static 
int 
getDistance(Set<String> t, Set<String> s) {  
        
if 
(t==
null 
|| t.size()==
0 
|| s==
null 
|| s.size()==
0 
|| t.size() != s.size()) {            
            
return 
0
;
        
}
        
HashSet<String> t_temp=
new 
HashSet<String>(t);
//必须转一下
        
HashSet<String> s_temp=
new 
HashSet<String>(s);
        
t_temp.retainAll(s_temp);
        
return 
t_temp.size();
         
    
}
     
    
最后按照你的业务逻辑,加入相似的物品。