加載訓練好的word2vec模型，求用戶搜索結果的平均向量

import numpy as np
file_name = './data/train_querylist_writefile-1w.csv'
cur_model = gensim.models.Word2Vec.load('1w_word2vec_300.model')
with open(file_name, 'r') as f:
    cur_index = 0
    lines = f.readlines()
    doc_cev = np.zeros((len(lines),300))
    for line in lines:
        word_vec = np.zeros((1,300))
        words = line.strip().split(' ')
        wrod_num = 0
        #求模型的平均向量
        for word in words:
            if word in cur_model:
                wrod_num += 1
                word_vec += np.array([cur_model[word]])
        doc_cev[cur_index] = word_vec / float(wrod_num)
        cur_index += 1

doc_cev.shape

doc_cev[5]

genderlabel = np.loadtxt(open('./data/train_gender.csv', 'r')).astype(int)
genderlabel.shape

educationlabel = np.loadtxt(open('./data/train_education.csv', 'r')).astype(int)
educationlabel.shape

agelabel = np.loadtxt(open('./data/train_age.csv', 'r')).astype(int)
agelabel.shape

def removezero(x, y):
        nozero = np.nonzero(y)
        y = y[nozero]
        x = np.array(x)
        x = x[nozero]
        return x, y
gender_train, genderlabel = removezero(doc_cev, genderlabel)
age_train, agelabel = removezero(doc_cev, agelabel)
education_train, educationlabel = removezero(doc_cev, educationlabel)
print (gender_train.shape,genderlabel.shape)
print (age_train.shape,agelabel.shape)
print (education_train.shape,educationlabel.shape)

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

用戶畫像（六）：基於用戶搜索數據，構造輸入特徵

加載訓練好的word2vec模型，求用戶搜索結果的平均向量

對促銷活動效果評估的一些思考

SQL中怎麼表示不等於你知道嗎？SQL中怎麼篩選奇數偶數？

對電商數據分析中用戶分析的思考

對預測銷售情況的一些思考，需要從那幾個方面去分析呢？

總體分佈概況符合無界約翰遜分佈（johnsonsu）的情況

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結