|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
救救孩子吧
这是我的测试模型代码,但是告诉我矩阵形状维度不同,我该怎么办,我使用的是pytorch训练模型
import torch
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
import torch.nn as nn
data = pd.read_csv("最终数据集.csv")
X = data.drop("Label", axis=1) # 特征矩阵,假设label列是标签列,需要从特征矩阵中去除
X_encoded = pd.get_dummies(X, columns=['Tokenized_Text']) # 对包含字符串的列进行独热编码
input_size = X_encoded.shape[1]
hidden_size = 64
output_size = 3 # 修改为3,对应标签的数量
class MyModel(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(MyModel, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.relu = nn.ReLU()
self.fc2 = nn.Linear(hidden_size, output_size)
def forward(self, x):
out = self.fc1(x)
out = self.relu(out)
out = self.fc2(out)
return out
# 加载模型
try:
model = MyModel(input_size, hidden_size, output_size)
model.load_state_dict(torch.load("my_model.pth", map_location=torch.device('cpu')))
except (FileNotFoundError, KeyError):
print("无法加载模型,请检查模型文件。")
exit()
# 获取用户输入的文本
text = input("请输入要进行情感分类的文本:")
# 创建包含用户输入文本的DataFrame
input_data = pd.DataFrame({"text": [text]})
# 使用TF-IDF向量化器将输入文本转换为数字表示形式
vectorizer = TfidfVectorizer()
X_input = vectorizer.fit_transform(input_data["text"]).toarray()
# 将数字表示形式的输入转换为tensor
X_input = torch.tensor(X_input, dtype=torch.float32)
# 使用模型进行预测
with torch.no_grad():
y_pred = model(X_input)
# 将预测结果转换为标签
predicted_label = torch.argmax(y_pred, dim=1).item() # 修改为在dim=1维度上找到最大值的索引
# 定义标签映射
label_map = {2: "负面", 0: "中性", 1: "正面"}
# 输出情感分类结果
print("预测结果:", label_map.get(predicted_label, "未知"))
训练模型的代码:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
import pandas as pd
# 检查是否有可用的GPU设备
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# 定义数据集类
class MyDataset(Dataset):
def __init__(self, X, y):
self.X = torch.tensor(X.values.astype(float), dtype=torch.float).to(device)
self.y = torch.tensor(y.values.astype(float), dtype=torch.float).to(device)
def __len__(self):
return len(self.X)
def __getitem__(self, idx):
return self.X[idx], self.y[idx]
# 定义神经网络模型
class MyModel(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(MyModel, self).__init__()
self.fc = nn.Sequential(
nn.Linear(input_size, hidden_size),
nn.ReLU(),
nn.Linear(hidden_size, output_size)
)
def forward(self, x):
out = self.fc(x)
return out
# 读取数据集
data = pd.read_csv("最终数据集.csv")
print(data.columns)
# 假设您的数据集包含特征矩阵X和标签向量y
X = data.drop("Label", axis=1) # 特征矩阵,假设label列是标签列,需要从特征矩阵中去除
X_encoded = pd.get_dummies(X, columns=['Tokenized_Text']) # 对包含字符串的列进行独热编码
y = data["Label"] # 标签向量
# 划分数据集
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.3, random_state=42)
# 创建训练集和测试集的数据集对象
train_dataset = MyDataset(X_train, y_train)
test_dataset = MyDataset(X_test, y_test)
# 创建训练集和测试集的数据加载器
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)
# 定义模型参数
input_size = X_encoded.shape[1]
hidden_size = 64
output_size = 1
# 创建模型对象和优化器对象,并将模型移动到GPU设备上
model = MyModel(input_size, hidden_size, output_size).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
# 定义损失函数
criterion = nn.MSELoss()
# 训练模型
num_epochs = 10
for epoch in range(num_epochs):
train_loss = 0.0
model.train() # 设置模型为训练模式
for inputs, labels in train_loader:
optimizer.zero_grad()
inputs = inputs.to(device)
labels = labels.to(device)
outputs = model(inputs)
loss = criterion(outputs.squeeze(), labels)
loss.backward()
optimizer.step()
train_loss += loss.item() * inputs.size(0)
train_loss /= len(train_loader.dataset)
test_loss = 0.0
model.eval() # 设置模型为评估模式
with torch.no_grad():
for inputs, labels in test_loader:
inputs = inputs.to(device)
labels = labels.to(device)
outputs = model(inputs)
loss = criterion(outputs.squeeze(), labels)
test_loss += loss.item() * inputs.size(0)
test_loss /= len(test_loader.dataset)
# 打印训练过程中的损失和测试集上的损失
print("Epoch [{}/{}], Train Loss: {:.4f}, Test Loss: {:.4f}".format(epoch + 1, num_epochs, train_loss, test_loss))
# 保存训练好的模型参数到文件中
torch.save(model.state_dict(), 'my_model.pth')
报错:
D:\Anaconda3\python.exe D:\pycharm目录\毕设\测试模型.py
请输入要进行情感分类的文本:今天天气真好
Traceback (most recent call last):
File "D:\pycharm目录\毕设\测试模型.py", line 51, in <module>
y_pred = model(X_input)
File "C:\Users\31328\AppData\Roaming\Python\Python39\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "D:\pycharm目录\毕设\测试模型.py", line 21, in forward
out = self.fc1(x)
File "C:\Users\31328\AppData\Roaming\Python\Python39\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\31328\AppData\Roaming\Python\Python39\site-packages\torch\nn\modules\linear.py", line 114, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x1 and 42765x64)
进程已结束,退出代码为 1
|
|