-
原来的算法模型中,没有考虑买了又买的物品,对于钢材行业,用户的需求是一致的,经常买了又买,所以增加此功能
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 | a 模型训练中增加如下代码 //buy item for users JavaPairRDD<String, String> buyItemForUser = data.getBuyEvents().mapToPair( new PairFunction<UserItemEvent, Tuple2<String, String>, Integer>() { @Override public Tuple2<Tuple2<String, String>, Integer> call(UserItemEvent buyEvent) throws Exception { return new Tuple2<>( new Tuple2<>(buyEvent.getUser(), buyEvent.getItem()), 1 ); } }).mapToPair( new PairFunction<Tuple2<Tuple2<String, String>, Integer>, String, Integer>() { @Override public Tuple2<String, Integer> call(Tuple2<Tuple2<String, String>, Integer> element) throws Exception { return new Tuple2<>(element._1()._1()+ ":::" +element._1()._2(), element._2()); } }).reduceByKey( new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer integer, Integer integer2) throws Exception { return integer + integer2; } }).mapToPair( new PairFunction<Tuple2<String, Integer>, String, String>() { @Override public Tuple2<String, String> call(Tuple2<String, Integer> element) throws Exception { String temp[]=element._1().split( ":::" ); if (temp.length== 2 ){ return new Tuple2<>(temp[ 0 ], temp[ 1 ]); } return new Tuple2<>( "" , "" ); } }); b 增加预测算法 private List<ItemScore> buyItemForUser(Model model, Query query){ logger.info( "start to add buy item for the user" ); final JavaRDD<ItemScore> matchedUser = model.getUserBuyItem().filter( new Function<Tuple2<String, String>, Boolean>() { @Override public Boolean call(Tuple2<String, String> userIndex) throws Exception { return userIndex._1().equals(query.getUserEntityId()); } }).map( new Function<Tuple2<String,String>,ItemScore>() { @Override public ItemScore call(Tuple2<String, String> arg0) throws Exception { return new ItemScore(arg0._2(), 10 ); } }); return matchedUser.collect(); } c topItemsForUser 按照你的业务逻辑出来两者的排序规则 |
基于物的相似性,也是我们期望,加入以下代码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 | //根据事件查找物品的属性 private List<Set<String>> getRecentProductCategory(Query query, Model model) { try { List<Set<String>> result = new ArrayList<>(); List<Event> events = LJavaEventStore.findByEntity( ap.getAppName(), "user" , query.getUserEntityId(), OptionHelper.<String>none(), OptionHelper.some(ap.getSimilarItemEvents()), OptionHelper.some(OptionHelper.some( "item" )), OptionHelper.<Option<String>>none(), OptionHelper.<DateTime>none(), OptionHelper.<DateTime>none(), OptionHelper.some( 10 ), true , Duration.apply( 10 , TimeUnit.SECONDS)); for ( final Event event : events) { if (event.targetEntityId().isDefined()) { JavaPairRDD<String, Integer> filtered = model.getItemIndex().filter( new Function<Tuple2<String, Integer>, Boolean>() { @Override public Boolean call(Tuple2<String, Integer> element) throws Exception { return element._1().equals(event.targetEntityId().get()); } }); final String itemIndex = filtered.first()._1(); Item item = model.getItems().get(itemIndex); if (item.getCategories()!= null && item.getCategories().size()> 0 ){ result.add(item.getCategories()); } } } return result; } catch (Exception e) { logger.error( "Error reading recent events for user " + query.getUserEntityId()); throw new RuntimeException(e.getMessage(), e); } } //相似性比较 private List<ItemScore> similarItemsByCategory( final List<Set<String>> category, Model model, Query query) { Map<String, Item> items =model.getItems(); if (items== null || items.size()== 0 ){ return null ; } if (category== null || category.size()== 0 ){ return null ; } JavaRDD<ItemScore> itemScores = model.getItemIndex().map( new Function<Tuple2<String, Integer>, ItemScore>() { @Override public ItemScore call(Tuple2<String, Integer> idItem) throws Exception { String itemid= idItem._1(); Item item = items.get(itemid); double similarity = 0.0 ; for ( int i= 0 ; i<category.size(); i++){ similarity+=getDistance(category.get(i),item.getCategories()); } logger.info(itemid+ "->" +similarity); return ( new ItemScore(itemid, similarity)); } }); itemScores = validScores(itemScores, query.getWhitelist(), query.getBlacklist(), query.getCategories(), model.getItems(), query.getUserEntityId()); return sortAndTake(itemScores, query.getNumber()); /* List<ItemScore> itemScores=new ArrayList<ItemScore>(); for (Map.Entry<String, Item> entry : items.entrySet()) { Item it = entry.getValue(); double similarity = 0.0; for(int i=0 ; i<category.size(); i++){ similarity+=getDistance(category.get(i),it.getCategories()); } itemScores.add(new ItemScore(it.getEntityId(), similarity)); } itemScores = validScoresForList(itemScores, query.getWhitelist(), query.getBlacklist(), query.getCategories(), model.getItems(), query.getUserEntityId()); return sortAndTake(itemScores, query.getNumber());*/ } //相似算法,比较简单 public static int getDistance(Set<String> t, Set<String> s) { if (t== null || t.size()== 0 || s== null || s.size()== 0 || t.size() != s.size()) { return 0 ; } HashSet<String> t_temp= new HashSet<String>(t); //必须转一下 HashSet<String> s_temp= new HashSet<String>(s); t_temp.retainAll(s_temp); return t_temp.size(); } 最后按照你的业务逻辑,加入相似的物品。 |