| 
 | 
 
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册  
 
x
 
救救孩子吧 
这是我的测试模型代码,但是告诉我矩阵形状维度不同,我该怎么办,我使用的是pytorch训练模型 
import torch 
import pandas as pd 
from sklearn.feature_extraction.text import TfidfVectorizer 
import torch.nn as nn 
 
data = pd.read_csv("最终数据集.csv") 
X = data.drop("Label", axis=1)  # 特征矩阵,假设label列是标签列,需要从特征矩阵中去除 
X_encoded = pd.get_dummies(X, columns=['Tokenized_Text'])  # 对包含字符串的列进行独热编码 
input_size = X_encoded.shape[1] 
hidden_size = 64 
output_size = 3  # 修改为3,对应标签的数量 
 
class MyModel(nn.Module): 
    def __init__(self, input_size, hidden_size, output_size): 
        super(MyModel, self).__init__() 
        self.fc1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU() 
        self.fc2 = nn.Linear(hidden_size, output_size) 
 
    def forward(self, x): 
        out = self.fc1(x) 
        out = self.relu(out) 
        out = self.fc2(out) 
        return out 
 
 
# 加载模型 
try: 
    model = MyModel(input_size, hidden_size, output_size) 
    model.load_state_dict(torch.load("my_model.pth", map_location=torch.device('cpu'))) 
 
except (FileNotFoundError, KeyError): 
    print("无法加载模型,请检查模型文件。") 
    exit() 
 
# 获取用户输入的文本 
text = input("请输入要进行情感分类的文本:") 
 
# 创建包含用户输入文本的DataFrame 
input_data = pd.DataFrame({"text": [text]}) 
 
# 使用TF-IDF向量化器将输入文本转换为数字表示形式 
vectorizer = TfidfVectorizer() 
X_input = vectorizer.fit_transform(input_data["text"]).toarray() 
 
# 将数字表示形式的输入转换为tensor 
X_input = torch.tensor(X_input, dtype=torch.float32) 
 
# 使用模型进行预测 
with torch.no_grad(): 
    y_pred = model(X_input) 
 
# 将预测结果转换为标签 
predicted_label = torch.argmax(y_pred, dim=1).item()  # 修改为在dim=1维度上找到最大值的索引 
 
# 定义标签映射 
label_map = {2: "负面", 0: "中性", 1: "正面"} 
 
# 输出情感分类结果 
print("预测结果:", label_map.get(predicted_label, "未知")) 
 
训练模型的代码: 
import torch 
import torch.nn as nn 
import torch.optim as optim 
from torch.utils.data import DataLoader, Dataset 
from sklearn.model_selection import train_test_split 
import pandas as pd 
 
# 检查是否有可用的GPU设备 
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 
 
# 定义数据集类 
class MyDataset(Dataset): 
    def __init__(self, X, y): 
        self.X = torch.tensor(X.values.astype(float), dtype=torch.float).to(device) 
        self.y = torch.tensor(y.values.astype(float), dtype=torch.float).to(device) 
 
    def __len__(self): 
        return len(self.X) 
 
    def __getitem__(self, idx): 
        return self.X[idx], self.y[idx] 
 
 
# 定义神经网络模型 
class MyModel(nn.Module): 
    def __init__(self, input_size, hidden_size, output_size): 
        super(MyModel, self).__init__() 
        self.fc = nn.Sequential( 
            nn.Linear(input_size, hidden_size), 
            nn.ReLU(), 
            nn.Linear(hidden_size, output_size) 
        ) 
 
    def forward(self, x): 
        out = self.fc(x) 
        return out 
 
 
# 读取数据集 
data = pd.read_csv("最终数据集.csv") 
print(data.columns) 
 
# 假设您的数据集包含特征矩阵X和标签向量y 
X = data.drop("Label", axis=1)  # 特征矩阵,假设label列是标签列,需要从特征矩阵中去除 
X_encoded = pd.get_dummies(X, columns=['Tokenized_Text'])  # 对包含字符串的列进行独热编码 
y = data["Label"]  # 标签向量 
 
# 划分数据集 
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.3, random_state=42) 
 
# 创建训练集和测试集的数据集对象 
train_dataset = MyDataset(X_train, y_train) 
test_dataset = MyDataset(X_test, y_test) 
 
# 创建训练集和测试集的数据加载器 
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True) 
test_loader = DataLoader(test_dataset, batch_size=32) 
 
# 定义模型参数 
input_size = X_encoded.shape[1] 
hidden_size = 64 
output_size = 1 
 
# 创建模型对象和优化器对象,并将模型移动到GPU设备上 
model = MyModel(input_size, hidden_size, output_size).to(device) 
optimizer = optim.Adam(model.parameters(), lr=0.001) 
 
# 定义损失函数 
criterion = nn.MSELoss() 
 
# 训练模型 
num_epochs = 10 
for epoch in range(num_epochs): 
    train_loss = 0.0 
    model.train()  # 设置模型为训练模式 
    for inputs, labels in train_loader: 
        optimizer.zero_grad() 
        inputs = inputs.to(device) 
        labels = labels.to(device) 
        outputs = model(inputs) 
        loss = criterion(outputs.squeeze(), labels) 
        loss.backward() 
        optimizer.step() 
        train_loss += loss.item() * inputs.size(0) 
    train_loss /= len(train_loader.dataset) 
 
    test_loss = 0.0 
    model.eval()  # 设置模型为评估模式 
    with torch.no_grad(): 
        for inputs, labels in test_loader: 
            inputs = inputs.to(device) 
            labels = labels.to(device) 
            outputs = model(inputs) 
            loss = criterion(outputs.squeeze(), labels) 
            test_loss += loss.item() * inputs.size(0) 
        test_loss /= len(test_loader.dataset) 
 
    # 打印训练过程中的损失和测试集上的损失 
    print("Epoch [{}/{}], Train Loss: {:.4f}, Test Loss: {:.4f}".format(epoch + 1, num_epochs, train_loss, test_loss)) 
 
# 保存训练好的模型参数到文件中 
torch.save(model.state_dict(), 'my_model.pth') 
 
报错: 
D:\Anaconda3\python.exe D:\pycharm目录\毕设\测试模型.py  
请输入要进行情感分类的文本:今天天气真好 
Traceback (most recent call last): 
  File "D:\pycharm目录\毕设\测试模型.py", line 51, in <module> 
    y_pred = model(X_input) 
  File "C:\Users\31328\AppData\Roaming\Python\Python39\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl 
    return forward_call(*args, **kwargs) 
  File "D:\pycharm目录\毕设\测试模型.py", line 21, in forward 
    out = self.fc1(x) 
  File "C:\Users\31328\AppData\Roaming\Python\Python39\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl 
    return forward_call(*args, **kwargs) 
  File "C:\Users\31328\AppData\Roaming\Python\Python39\site-packages\torch\nn\modules\linear.py", line 114, in forward 
    return F.linear(input, self.weight, self.bias) 
RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x1 and 42765x64) 
 
进程已结束,退出代码为 1 
 |   
 
 
 
 |