用戶畫像(六):基於用戶搜索數據,構造輸入特徵

加載訓練好的word2vec模型,求用戶搜索結果的平均向量

import numpy as np
file_name = './data/train_querylist_writefile-1w.csv'
cur_model = gensim.models.Word2Vec.load('1w_word2vec_300.model')
with open(file_name, 'r') as f:
    cur_index = 0
    lines = f.readlines()
    doc_cev = np.zeros((len(lines),300))
    for line in lines:
        word_vec = np.zeros((1,300))
        words = line.strip().split(' ')
        wrod_num = 0
        #求模型的平均向量
        for word in words:
            if word in cur_model:
                wrod_num += 1
                word_vec += np.array([cur_model[word]])
        doc_cev[cur_index] = word_vec / float(wrod_num)
        cur_index += 1
doc_cev.shape

在這裏插入圖片描述

doc_cev[5]

在這裏插入圖片描述

genderlabel = np.loadtxt(open('./data/train_gender.csv', 'r')).astype(int)
genderlabel.shape

在這裏插入圖片描述

educationlabel = np.loadtxt(open('./data/train_education.csv', 'r')).astype(int)
educationlabel.shape

在這裏插入圖片描述

agelabel = np.loadtxt(open('./data/train_age.csv', 'r')).astype(int)
agelabel.shape

在這裏插入圖片描述

def removezero(x, y):
        nozero = np.nonzero(y)
        y = y[nozero]
        x = np.array(x)
        x = x[nozero]
        return x, y
gender_train, genderlabel = removezero(doc_cev, genderlabel)
age_train, agelabel = removezero(doc_cev, agelabel)
education_train, educationlabel = removezero(doc_cev, educationlabel)
print (gender_train.shape,genderlabel.shape)
print (age_train.shape,agelabel.shape)
print (education_train.shape,educationlabel.shape)

在這裏插入圖片描述

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章