# Convolutional Neural Networks for Sentence Classification阅读梗概

2021/8/21 17:27:28 浏览：

## 模型

xi∈Rk表示第i个单词由一个k维向量表示。长度为n的句子表示为
x1:n = x1 ⊕ x2 ⊕ . . . ⊕ xn ⊕表示向量的连接。
xi:i+j 表示对向量i…i+j连接
ci = f(w · xi:i+h−1 + b)表示了一个特征。其实就是把i…i+h-1的向量首尾连接，与w做向量乘法，得到一个值。这样做其是和卷积等价的。h是窗口大小。w∈R(h*k)是一个卷积核。

## 正则化

dropout用来阻止过拟合。就是让神经元以一定概率失活。

z = [ˆc1, . . . , cˆm]

r是一个向量取值从0，1按伯努利分布随机采样按dropout_rate也就是论文中的p为等于1的概率。

## 最后附上一份自己的代码

``````import torch
import torch.nn as nn
import torch.nn.functional as F

class MyModel(nn.Module):

def __init__(self, embeddings, n_features, n_classes, batch_size, dropout_prob=0.5):
super().__init__()
self.embeddings = embeddings
self.n_features = n_features
self.n_classes = n_classes
self.dropout_prob = dropout_prob
self.batch_size = batch_size
self.embed_size = embeddings.shape[1]
self.my_embeddings = nn.Embedding(embeddings.shape[0], self.embed_size)
#[batch_size,seq_len]输入 每一列代表一句
#[batch_size,seq_len,embedding_size]输出，每列变多维度
self.my_embeddings.weight = nn.Parameter(torch.tensor(embeddings))
self.fc1_1 = nn.Conv1d(in_channels=self.embed_size,out_channels=100,kernel_size = 1)
#[batch_size,embedding_size,seq_len]输入，每列变多维度，一维卷积是在最后维度上扫
#3*[batch_size,100,seq_len-3+1]
self.fc1_2 = nn.Conv1d(in_channels=self.embed_size, out_channels=100, kernel_size=4)
self.fc1_3 = nn.Conv1d(in_channels=self.embed_size, out_channels=100, kernel_size=5)
self.fc1_1_2 = nn.ReLU()
self.fc1_2_2 = nn.ReLU()
self.fc1_3_2 = nn.ReLU()
#做池化 [batch_size,100,seq_len-3+1]输入
#[batch_size,300,1]输出
#线性层，每列变多维度
self.fc3_1 = nn.Linear(in_features = 300, out_features =100)
self.fc3_2 = nn.ReLU()
self.fc3_3 = nn.Dropout(0.5)
self.fc4_1 = nn.Linear(in_features=100, out_features=50)
self.fc4_2 = nn.ReLU()
self.fc4_3 = nn.Dropout(0.5)
self.fc5= nn.Linear(in_features=50,out_features=2)
self.softmax = nn.Softmax(dim=1)
def forward(self,x):
x = x.long()
x = self.my_embeddings(x)
x = x.permute([0,2,1]).to(torch.float32)
x1 = self.fc1_1(x)
x2 = self.fc1_2(x)
x3 = self.fc1_3(x)
x1 = F.relu(self.fc1_1_2(F.max_pool1d(x1, kernel_size = x1.shape[2])))
x2 = F.relu(self.fc1_2_2(F.max_pool1d(x2, kernel_size=x2.shape[2])))
x3 = F.relu(self.fc1_3_2(F.max_pool1d(x3, kernel_size=x3.shape[2])))
x = torch.cat((x3, x2, x1),1).squeeze(2)
x = self.fc3_1(x)
x = self.fc3_2(x)
x = self.fc3_3(x)
x = self.fc4_1(x)
x = self.fc4_2(x)
x = self.fc4_3(x)
x = self.fc5(x)
return self.softmax(x)
``````
``````##训练过程
import model
import dataget
import torch.optim as optim
from sklearn.metrics import f1_score, confusion_matrix, accuracy_score, classification_report, precision_recall_fscore_support
import matplotlib.pyplot as plt
mydataset = dataget.MyDataSet()
mymodel = model.MyModel(batch_size=64,dropout_prob=0.5,embeddings=mydataset.word2vec,n_classes=2,n_features=mydataset.maxlen)
loss_function = torch.nn.CrossEntropyLoss()
history_loss = list()
for epoches in range(100):
tr_loss = 0
nb_tr_steps = 0
train_logits_list = []
train_labels_list = []

tv_loss = 0
nb_tv_steps = 0
valid_logits_list = []
valid_labels_list = []
mymodel.train()

predict = mymodel(data)
labels = labels.long()
print(labels.shape)
print(predict.shape)
#print(predict)
loss = loss_function(predict, labels)
loss.backward()
predict = predict.view(-1, 2).detach().cpu().numpy()
labels = labels.view(-1).to('cpu').numpy()

train_logits_list += [int(x) for x in np.argmax(predict, axis=1)]
train_labels_list += [int(x) for x in labels]
tr_loss = tr_loss+loss.item()
nb_tr_steps = nb_tr_steps +1
optimzer.step()
train_loss = tr_loss
train_accuracy = metrics.accuracy_score(train_labels_list, train_logits_list)
train_w_f1 = metrics.f1_score(train_labels_list, train_logits_list, average='weighted')

mymodel.eval()
predict = mymodel(data)
labels = labels.long()
loss = loss_function(predict, labels)
tv_loss = tv_loss+loss.item()
nb_tv_steps = nb_tv_steps +1
predict = predict.view(-1, 2).detach().cpu().numpy()
labels = labels.view(-1).to('cpu').numpy()
valid_logits_list += [int(x) for x in np.argmax(predict, axis=1)]
valid_labels_list += [int(x) for x in labels]
valid_loss = tv_loss
valid_accuracy = metrics.accuracy_score(valid_labels_list, valid_logits_list)
valid_w_f1 = metrics.f1_score(valid_labels_list, valid_logits_list, average='weighted')
history_loss.append(valid_loss)
print('\nEpoch %d, train_loss=%.5f, train_acc=%.2f, train_w_f1=%.2f,valid_loss=%.5f, valid_acc=%.2f, valid_w_f1=%.2f'
%(epoches, train_loss, train_accuracy * 100, train_w_f1 * 100,valid_loss, valid_accuracy * 100, valid_w_f1 * 100))

fig=plt.figure(num=1,figsize=(4,4))
plt.subplot(111)
plt.plot(np.arange(0,len(history_loss)),history_loss)
plt.show()
``````
``````##数据处理
import random

import chardet
import torch
import pickle
import numpy as np
from torch.utils.data.dataset import Dataset
from PIL import Image
import numpy as np
import torch

from tqdm import tqdm

def get_word2id(data_paths):
print('加载数据集')
for path in data_paths:
with open(path,encoding="Windows-1252") as f:
for line in tqdm(f):
words = line.strip().split()
for word in words:
if word not in wordid.keys():
wordid[word] = len(wordid)
print(wordid)
return wordid

def get_word2vec(word2id):
word2vec = np.array(np.random.uniform(-1., 1., [len(word2id) + 1, 50]))
return word2vec

def get_corpus(word2id):
contents = list()
labels = list()
maxlen = 0
with open('E:\\python_project\\dataset\\rt-polaritydata\\rt-polarity.neg', encoding="Windows-1252") as f:
for line in tqdm(f):
words = line.strip().split()
if not words: continue
content = [word2id[word] for word in words]
if len(content) >maxlen:maxlen = len(content)
label = 0
contents.append(content)
labels.append(label)
with open('E:\\python_project\\dataset\\rt-polaritydata\\rt-polarity.pos', encoding="Windows-1252") as f:
for line in tqdm(f):
words = line.strip().split()
if not words: continue
content = [word2id[word] for word in words]
if len(content) > maxlen: maxlen = len(content)
label = 1
contents.append(content)
labels.append(label)
for index,line in enumerate(contents):
addline = [0 for i in range(maxlen - len(line))]
mask = [1 for i in range(len(line))]
contents[index] = line
cc = list(zip(contents,labels))
random.shuffle(cc)
contents[:], labels[:] = zip(*cc)

class MyDataSet(Dataset):
def __init__(self):
datapath = ['E:\\python_project\\dataset\\rt-polaritydata\\rt-polarity.pos',
'E:\\python_project\\dataset\\rt-polaritydata\\rt-polarity.neg']
self.word2id = get_word2id(data_paths=datapath)
self.corpus = get_corpus(self.word2id)
self.word2vec = get_word2vec(self.word2id)
self.maxlen = self.corpus[3]
def __len__(self):
return 8000

def __getitem__(self, index):
'''
:param index:
:return: 编码，掩码，标签
'''
return np.array(self.corpus[0][index]), np.array(self.corpus[1][index]),np.array(self.corpus[2][index])

class MytestSet(Dataset):
def __init__(self,MyDataSet):
self.mydataset = MyDataSet
def __len__(self):
return 2050

def __getitem__(self, index):
'''
:param index:
:return: 编码，掩码，标签
'''
return np.array(self.mydataset.corpus[0][index+8000]), np.array(self.mydataset.corpus[1][index+8000]), np.array(self.mydataset.corpus[2][index+8000])
``````