reduce: cfmr_itembased_step2_iimatrix_r.py
# coding=utf-8
'''
基于step1 生成的归一化数据,生成ii矩阵
cat testdata/ratings_small.csv \
| python cfmr_itembased_step1_normalization_m.py |sort -k 1 \
| python cfmr_itembased_step1_normalization_r.py \
| python cfmr_itembased_step2_iimatrix_m.py |sort -k 1 \
| python cfmr_itembased_step2_iimatrix_r.py
'''
import sys
cur_user = None
item_score_list = []
for line in sys.stdin:
user, item, score = line.strip().split('\t')
if cur_user == None:
cur_user = user
if cur_user != user:
for i in range(0,len(item_score_list) -1):
for j in range(i +1,len(item_score_list)):
item_a, score_a = item_score_list[i]
item_b, score_b = item_score_list[j]
print("%s\t%s\t%s" % (item_a, item_b, score_a * score_b))
print("%s\t%s\t%s" % (item_b, item_a, score_a * score_b))
item_score_list = []
cur_user = user
item_score_list.append((item, float(score)))
for i in range(0, len(item_score_list) - 1):
for j in range(i + 1, len(item_score_list)):
item_a, score_a = item_score_list[i]
item_b, score_b = item_score_list[j]
print("%s\t%s\t%s" % (item_a, item_b, score_a * score_b))
print("%s\t%s\t%s" % (item_b, item_a, score_a * score_b))