reduce: cfmr_itembased_step1_normalization_r.py
# coding=utf-8
'''
step1:将每个item的基于所有的uid给出的score归一处理
cat testdata/ratings_small.csv | python cfmr_itembased_step1_normalization_m.py |sort -k 1 |python cfmr_itembased_step1_normalization_r.py
结果为每个item 的所有uid给出score 组成的向量 的模为1,
'''
import sys
from math import pow,sqrt
cur_item = None
user_score_list = []
for line in sys.stdin:
item, user, score = line.strip().split('\t')
if cur_item == None:
cur_item = item
if item != cur_item:
sum = 0.0
for tuple in user_score_list:
(u, s) = tuple
sum += pow(s,2)
sum = sqrt(sum)
for tuple in user_score_list:
(u, s) = tuple
print("%s\t%s\t%s\t" % (u,cur_item,float(s/sum)))
user_score_list = []
cur_item = item
user_score_list.append((user,float(score)))
# sum=0.0
for tuple in user_score_list:
(u, s) = tuple
sum += pow(s, 2)
sum = sqrt(sum)
for tuple in user_score_list:
(u, s) = tuple
print("%s\t%s\t%s" % (u, cur_item, float(s / sum)))