{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Word2Vecを用いるセンチメント分析" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import torch\n", "#device = torch.device('mps') # macbook\n", "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## データ準備\n", "\n", "### CSVファイルを読み込む" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexbrandsentimenttext
02401BorderlandsPositiveim getting on borderlands and i will murder yo...
12401BorderlandsPositiveI am coming to the borders and I will kill you...
22401BorderlandsPositiveim getting on borderlands and i will kill you ...
32401BorderlandsPositiveim coming on borderlands and i will murder you...
42401BorderlandsPositiveim getting on borderlands 2 and i will murder ...
\n", "
" ], "text/plain": [ " index brand sentiment \\\n", "0 2401 Borderlands Positive \n", "1 2401 Borderlands Positive \n", "2 2401 Borderlands Positive \n", "3 2401 Borderlands Positive \n", "4 2401 Borderlands Positive \n", "\n", " text \n", "0 im getting on borderlands and i will murder yo... \n", "1 I am coming to the borders and I will kill you... \n", "2 im getting on borderlands and i will kill you ... \n", "3 im coming on borderlands and i will murder you... \n", "4 im getting on borderlands 2 and i will murder ... " ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df= pd.read_csv('./Data/twitter_training.csv',names=['index','brand','sentiment','text'])\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Negative 22542\n", "Positive 20832\n", "Neutral 18318\n", "Irrelevant 12990\n", "Name: sentiment, dtype: int64" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[\"sentiment\"].value_counts()" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "df[\"label\"]=df[\"sentiment\"].replace({\"Positive\":2,\"Negative\":0,\"Neutral\":1,\"Irrelevant\":np.nan})" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "df=df[['text','label']]\n", "df=df.dropna()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### トレーニング、バリデーション、テストデータに分割" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "# Split data (70% train, 15% validation, 15% test)\n", "train_df, temp_df = train_test_split(df, test_size=0.3, random_state=42)\n", "val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### テキストデータの前処理\n", "\n", "- テキストを小文字に変換\n", "- 句読点を削除\n", "- トークン化" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[nltk_data] Downloading package punkt to /Users/ryozawau/nltk_data...\n", "[nltk_data] Package punkt is already up-to-date!\n" ] } ], "source": [ "import re\n", "import nltk\n", "from nltk.tokenize import word_tokenize\n", "\n", "# Download NLTK data (if not already done)\n", "nltk.download('punkt')\n", "\n", "# Function for preprocessing text\n", "def preprocess_text(text):\n", " text = text.lower() # Lowercasing\n", " text = re.sub(r'\\W+', ' ', text) # Remove punctuation\n", " tokens = word_tokenize(text) # Tokenization\n", " return tokens" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "# Apply preprocessing\n", "train_df['processed_text'] = train_df['text'].apply(preprocess_text)\n", "val_df['processed_text'] = val_df['text'].apply(preprocess_text)\n", "test_df['processed_text'] = test_df['text'].apply(preprocess_text)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 単語分散表現によって特徴量の作成" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "tags": [ "hide-output" ] }, "outputs": [], "source": [ "import gensim.downloader\n", "word2vec = gensim.downloader.load('word2vec-google-news-300')" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "def tokens_to_embedding(tokens, model, embedding_size=300):\n", " embeddings = [model[word] for word in tokens if word in model]\n", " if len(embeddings) == 0:\n", " return np.zeros(embedding_size)\n", " else:\n", " return np.mean(embeddings, axis=0)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "train_df['embeddings'] = train_df['processed_text'].apply(lambda x: tokens_to_embedding(x, word2vec))\n", "val_df['embeddings'] = val_df['processed_text'].apply(lambda x: tokens_to_embedding(x, word2vec))\n", "test_df['embeddings'] = test_df['processed_text'].apply(lambda x: tokens_to_embedding(x, word2vec))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 学習用データセットの作成" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "from torch.utils.data import DataLoader, TensorDataset\n", "\n", "def create_dataset(df):\n", " features = torch.tensor(df['embeddings'].tolist(),dtype=torch.float32).to(device)\n", " labels = torch.tensor(df['label'].values, dtype=torch.long).to(device)\n", " return TensorDataset(features, labels)\n", "\n", "train_dataset = create_dataset(train_df)\n", "val_dataset = create_dataset(val_df)\n", "test_dataset = create_dataset(test_df)\n", "\n", "batch_size = 32\n", "train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)\n", "val_loader = DataLoader(val_dataset, batch_size=batch_size)\n", "test_loader = DataLoader(test_dataset, batch_size=batch_size)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### モデルの作成" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "import torch.nn as nn\n", "import torch.optim as optim\n", "\n", "# Define a simple Neural Network\n", "class SimpleNN(nn.Module):\n", " def __init__(self, input_size, hidden_size, num_classes):\n", " super(SimpleNN, self).__init__()\n", " self.fc1 = nn.Linear(input_size, hidden_size)\n", " self.relu = nn.ReLU()\n", " self.fc2 = nn.Linear(hidden_size, num_classes)\n", " \n", " def forward(self, x):\n", " out = self.fc1(x)\n", " out = self.relu(out)\n", " out = self.fc2(out)\n", " return out\n", "\n", "# Model, Loss, and Optimizer\n", "embedding_size = 300\n", "model = SimpleNN(input_size=embedding_size, hidden_size=100, num_classes=3).to(device)\n", "criterion = nn.CrossEntropyLoss()\n", "optimizer = optim.Adam(model.parameters(), lr=0.001)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 学習の実行\n" ] }, { "cell_type": "code", "execution_count": 48, "metadata": { "tags": [ "hide-output" ] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1, Accuracy: 0.6624, F1 Score: 0.6575\n", "New best model saved at Epoch 1 with F1 Score: 0.6575\n", "Epoch 2, Accuracy: 0.6721, F1 Score: 0.6658\n", "New best model saved at Epoch 2 with F1 Score: 0.6658\n", "Epoch 3, Accuracy: 0.6843, F1 Score: 0.6820\n", "New best model saved at Epoch 3 with F1 Score: 0.6820\n", "Epoch 4, Accuracy: 0.6883, F1 Score: 0.6857\n", "New best model saved at Epoch 4 with F1 Score: 0.6857\n", "Epoch 5, Accuracy: 0.6932, F1 Score: 0.6916\n", "New best model saved at Epoch 5 with F1 Score: 0.6916\n", "Epoch 6, Accuracy: 0.6975, F1 Score: 0.6950\n", "New best model saved at Epoch 6 with F1 Score: 0.6950\n", "Epoch 7, Accuracy: 0.7042, F1 Score: 0.6986\n", "New best model saved at Epoch 7 with F1 Score: 0.6986\n", "Epoch 8, Accuracy: 0.7129, F1 Score: 0.7099\n", "New best model saved at Epoch 8 with F1 Score: 0.7099\n", "Epoch 9, Accuracy: 0.7182, F1 Score: 0.7147\n", "New best model saved at Epoch 9 with F1 Score: 0.7147\n", "Epoch 10, Accuracy: 0.7301, F1 Score: 0.7296\n", "New best model saved at Epoch 10 with F1 Score: 0.7296\n", "Epoch 11, Accuracy: 0.7268, F1 Score: 0.7271\n", "Epoch 12, Accuracy: 0.7435, F1 Score: 0.7423\n", "New best model saved at Epoch 12 with F1 Score: 0.7423\n", "Epoch 13, Accuracy: 0.7479, F1 Score: 0.7465\n", "New best model saved at Epoch 13 with F1 Score: 0.7465\n", "Epoch 14, Accuracy: 0.7450, F1 Score: 0.7415\n", "Epoch 15, Accuracy: 0.7540, F1 Score: 0.7529\n", "New best model saved at Epoch 15 with F1 Score: 0.7529\n", "Epoch 16, Accuracy: 0.7576, F1 Score: 0.7561\n", "New best model saved at Epoch 16 with F1 Score: 0.7561\n", "Epoch 17, Accuracy: 0.7480, F1 Score: 0.7492\n", "Epoch 18, Accuracy: 0.7611, F1 Score: 0.7595\n", "New best model saved at Epoch 18 with F1 Score: 0.7595\n", "Epoch 19, Accuracy: 0.7709, F1 Score: 0.7697\n", "New best model saved at Epoch 19 with F1 Score: 0.7697\n", "Epoch 20, Accuracy: 0.7691, F1 Score: 0.7690\n", "Epoch 21, Accuracy: 0.7702, F1 Score: 0.7679\n", "Epoch 22, Accuracy: 0.7717, F1 Score: 0.7722\n", "New best model saved at Epoch 22 with F1 Score: 0.7722\n", "Epoch 23, Accuracy: 0.7798, F1 Score: 0.7800\n", "New best model saved at Epoch 23 with F1 Score: 0.7800\n", "Epoch 24, Accuracy: 0.7800, F1 Score: 0.7797\n", "Epoch 25, Accuracy: 0.7812, F1 Score: 0.7804\n", "New best model saved at Epoch 25 with F1 Score: 0.7804\n", "Epoch 26, Accuracy: 0.7847, F1 Score: 0.7845\n", "New best model saved at Epoch 26 with F1 Score: 0.7845\n", "Epoch 27, Accuracy: 0.7789, F1 Score: 0.7775\n", "Epoch 28, Accuracy: 0.7881, F1 Score: 0.7879\n", "New best model saved at Epoch 28 with F1 Score: 0.7879\n", "Epoch 29, Accuracy: 0.7928, F1 Score: 0.7924\n", "New best model saved at Epoch 29 with F1 Score: 0.7924\n", "Epoch 30, Accuracy: 0.7897, F1 Score: 0.7901\n", "Epoch 31, Accuracy: 0.8004, F1 Score: 0.8004\n", "New best model saved at Epoch 31 with F1 Score: 0.8004\n", "Epoch 32, Accuracy: 0.7960, F1 Score: 0.7948\n", "Epoch 33, Accuracy: 0.8031, F1 Score: 0.8028\n", "New best model saved at Epoch 33 with F1 Score: 0.8028\n", "Epoch 34, Accuracy: 0.8013, F1 Score: 0.8005\n", "Epoch 35, Accuracy: 0.7990, F1 Score: 0.7993\n", "Epoch 36, Accuracy: 0.8054, F1 Score: 0.8047\n", "New best model saved at Epoch 36 with F1 Score: 0.8047\n", "Epoch 37, Accuracy: 0.8013, F1 Score: 0.8013\n", "Epoch 38, Accuracy: 0.8026, F1 Score: 0.8029\n", "Epoch 39, Accuracy: 0.8087, F1 Score: 0.8083\n", "New best model saved at Epoch 39 with F1 Score: 0.8083\n", "Epoch 40, Accuracy: 0.8043, F1 Score: 0.8035\n", "Epoch 41, Accuracy: 0.8075, F1 Score: 0.8070\n", "Epoch 42, Accuracy: 0.8106, F1 Score: 0.8111\n", "New best model saved at Epoch 42 with F1 Score: 0.8111\n", "Epoch 43, Accuracy: 0.8147, F1 Score: 0.8146\n", "New best model saved at Epoch 43 with F1 Score: 0.8146\n", "Epoch 44, Accuracy: 0.8124, F1 Score: 0.8118\n", "Epoch 45, Accuracy: 0.8153, F1 Score: 0.8144\n", "Epoch 46, Accuracy: 0.8091, F1 Score: 0.8088\n", "Epoch 47, Accuracy: 0.8209, F1 Score: 0.8204\n", "New best model saved at Epoch 47 with F1 Score: 0.8204\n", "Epoch 48, Accuracy: 0.8132, F1 Score: 0.8138\n", "Epoch 49, Accuracy: 0.8182, F1 Score: 0.8180\n", "Epoch 50, Accuracy: 0.8195, F1 Score: 0.8190\n", "Epoch 51, Accuracy: 0.8196, F1 Score: 0.8193\n", "Epoch 52, Accuracy: 0.8032, F1 Score: 0.8021\n", "Epoch 53, Accuracy: 0.8260, F1 Score: 0.8258\n", "New best model saved at Epoch 53 with F1 Score: 0.8258\n", "Epoch 54, Accuracy: 0.8221, F1 Score: 0.8220\n", "Epoch 55, Accuracy: 0.8214, F1 Score: 0.8212\n", "Epoch 56, Accuracy: 0.8241, F1 Score: 0.8237\n", "Epoch 57, Accuracy: 0.8253, F1 Score: 0.8253\n", "Epoch 58, Accuracy: 0.8226, F1 Score: 0.8229\n", "Epoch 59, Accuracy: 0.8245, F1 Score: 0.8239\n", "Epoch 60, Accuracy: 0.8266, F1 Score: 0.8261\n", "New best model saved at Epoch 60 with F1 Score: 0.8261\n", "Epoch 61, Accuracy: 0.8222, F1 Score: 0.8213\n", "Epoch 62, Accuracy: 0.8214, F1 Score: 0.8214\n", "Epoch 63, Accuracy: 0.8254, F1 Score: 0.8250\n", "Epoch 64, Accuracy: 0.8315, F1 Score: 0.8315\n", "New best model saved at Epoch 64 with F1 Score: 0.8315\n", "Epoch 65, Accuracy: 0.8309, F1 Score: 0.8306\n", "Epoch 66, Accuracy: 0.8269, F1 Score: 0.8265\n", "Epoch 67, Accuracy: 0.8257, F1 Score: 0.8252\n", "Epoch 68, Accuracy: 0.8296, F1 Score: 0.8293\n", "Epoch 69, Accuracy: 0.8222, F1 Score: 0.8213\n", "Epoch 70, Accuracy: 0.8305, F1 Score: 0.8304\n", "Epoch 71, Accuracy: 0.8243, F1 Score: 0.8239\n", "Epoch 72, Accuracy: 0.8324, F1 Score: 0.8324\n", "New best model saved at Epoch 72 with F1 Score: 0.8324\n", "Epoch 73, Accuracy: 0.8277, F1 Score: 0.8271\n", "Epoch 74, Accuracy: 0.8356, F1 Score: 0.8358\n", "New best model saved at Epoch 74 with F1 Score: 0.8358\n", "Epoch 75, Accuracy: 0.8304, F1 Score: 0.8301\n", "Epoch 76, Accuracy: 0.8308, F1 Score: 0.8306\n", "Epoch 77, Accuracy: 0.8250, F1 Score: 0.8257\n", "Epoch 78, Accuracy: 0.8340, F1 Score: 0.8337\n", "Epoch 79, Accuracy: 0.8297, F1 Score: 0.8293\n", "Epoch 80, Accuracy: 0.8340, F1 Score: 0.8340\n", "Epoch 81, Accuracy: 0.8307, F1 Score: 0.8302\n", "Epoch 82, Accuracy: 0.8357, F1 Score: 0.8356\n", "Epoch 83, Accuracy: 0.8342, F1 Score: 0.8341\n", "Epoch 84, Accuracy: 0.8343, F1 Score: 0.8343\n", "Epoch 85, Accuracy: 0.8285, F1 Score: 0.8282\n", "Epoch 86, Accuracy: 0.8344, F1 Score: 0.8339\n", "Epoch 87, Accuracy: 0.8271, F1 Score: 0.8269\n", "Epoch 88, Accuracy: 0.8346, F1 Score: 0.8345\n", "Epoch 89, Accuracy: 0.8339, F1 Score: 0.8334\n", "Epoch 90, Accuracy: 0.8398, F1 Score: 0.8398\n", "New best model saved at Epoch 90 with F1 Score: 0.8398\n", "Epoch 91, Accuracy: 0.8401, F1 Score: 0.8400\n", "New best model saved at Epoch 91 with F1 Score: 0.8400\n", "Epoch 92, Accuracy: 0.8412, F1 Score: 0.8411\n", "New best model saved at Epoch 92 with F1 Score: 0.8411\n", "Epoch 93, Accuracy: 0.8337, F1 Score: 0.8333\n", "Epoch 94, Accuracy: 0.8365, F1 Score: 0.8362\n", "Epoch 95, Accuracy: 0.8297, F1 Score: 0.8291\n", "Epoch 96, Accuracy: 0.8389, F1 Score: 0.8391\n", "Epoch 97, Accuracy: 0.8387, F1 Score: 0.8386\n", "Epoch 98, Accuracy: 0.8408, F1 Score: 0.8405\n", "Epoch 99, Accuracy: 0.8417, F1 Score: 0.8414\n", "New best model saved at Epoch 99 with F1 Score: 0.8414\n", "Epoch 100, Accuracy: 0.8386, F1 Score: 0.8382\n", "Epoch 101, Accuracy: 0.8360, F1 Score: 0.8354\n", "Epoch 102, Accuracy: 0.8420, F1 Score: 0.8419\n", "New best model saved at Epoch 102 with F1 Score: 0.8419\n", "Epoch 103, Accuracy: 0.8423, F1 Score: 0.8421\n", "New best model saved at Epoch 103 with F1 Score: 0.8421\n", "Epoch 104, Accuracy: 0.8345, F1 Score: 0.8347\n", "Epoch 105, Accuracy: 0.8411, F1 Score: 0.8413\n", "Epoch 106, Accuracy: 0.8294, F1 Score: 0.8286\n", "Epoch 107, Accuracy: 0.8372, F1 Score: 0.8367\n", "Epoch 108, Accuracy: 0.8425, F1 Score: 0.8423\n", "New best model saved at Epoch 108 with F1 Score: 0.8423\n", "Epoch 109, Accuracy: 0.8399, F1 Score: 0.8399\n", "Epoch 110, Accuracy: 0.8348, F1 Score: 0.8347\n", "Epoch 111, Accuracy: 0.8324, F1 Score: 0.8318\n", "Epoch 112, Accuracy: 0.8418, F1 Score: 0.8416\n", "Epoch 113, Accuracy: 0.8391, F1 Score: 0.8392\n", "Epoch 114, Accuracy: 0.8328, F1 Score: 0.8322\n", "Epoch 115, Accuracy: 0.8417, F1 Score: 0.8416\n", "Epoch 116, Accuracy: 0.8339, F1 Score: 0.8333\n", "Epoch 117, Accuracy: 0.8356, F1 Score: 0.8354\n", "Epoch 118, Accuracy: 0.8388, F1 Score: 0.8385\n", "Epoch 119, Accuracy: 0.8373, F1 Score: 0.8374\n", "Epoch 120, Accuracy: 0.8426, F1 Score: 0.8424\n", "New best model saved at Epoch 120 with F1 Score: 0.8424\n", "Epoch 121, Accuracy: 0.8338, F1 Score: 0.8339\n", "Epoch 122, Accuracy: 0.8349, F1 Score: 0.8343\n", "Epoch 123, Accuracy: 0.8387, F1 Score: 0.8386\n", "Epoch 124, Accuracy: 0.8291, F1 Score: 0.8283\n", "Epoch 125, Accuracy: 0.8353, F1 Score: 0.8353\n", "Epoch 126, Accuracy: 0.8452, F1 Score: 0.8450\n", "New best model saved at Epoch 126 with F1 Score: 0.8450\n", "Epoch 127, Accuracy: 0.8370, F1 Score: 0.8370\n", "Epoch 128, Accuracy: 0.8378, F1 Score: 0.8373\n", "Epoch 129, Accuracy: 0.8436, F1 Score: 0.8434\n", "Epoch 130, Accuracy: 0.8351, F1 Score: 0.8344\n", "Epoch 131, Accuracy: 0.8397, F1 Score: 0.8391\n", "Epoch 132, Accuracy: 0.8423, F1 Score: 0.8421\n", "Epoch 133, Accuracy: 0.8388, F1 Score: 0.8384\n", "Epoch 134, Accuracy: 0.8414, F1 Score: 0.8412\n", "Epoch 135, Accuracy: 0.8386, F1 Score: 0.8382\n", "Epoch 136, Accuracy: 0.8459, F1 Score: 0.8461\n", "New best model saved at Epoch 136 with F1 Score: 0.8461\n", "Epoch 137, Accuracy: 0.8432, F1 Score: 0.8430\n", "Epoch 138, Accuracy: 0.8365, F1 Score: 0.8361\n", "Epoch 139, Accuracy: 0.8416, F1 Score: 0.8414\n", "Epoch 140, Accuracy: 0.8459, F1 Score: 0.8458\n", "Epoch 141, Accuracy: 0.8435, F1 Score: 0.8434\n", "Epoch 142, Accuracy: 0.8376, F1 Score: 0.8371\n", "Epoch 143, Accuracy: 0.8410, F1 Score: 0.8407\n", "Epoch 144, Accuracy: 0.8427, F1 Score: 0.8427\n", "Epoch 145, Accuracy: 0.8340, F1 Score: 0.8332\n", "Epoch 146, Accuracy: 0.8360, F1 Score: 0.8364\n", "Epoch 147, Accuracy: 0.8438, F1 Score: 0.8437\n", "Epoch 148, Accuracy: 0.8392, F1 Score: 0.8389\n", "Epoch 149, Accuracy: 0.8357, F1 Score: 0.8361\n", "Epoch 150, Accuracy: 0.8435, F1 Score: 0.8437\n", "Epoch 151, Accuracy: 0.8447, F1 Score: 0.8444\n", "Epoch 152, Accuracy: 0.8452, F1 Score: 0.8450\n", "Epoch 153, Accuracy: 0.8433, F1 Score: 0.8432\n", "Epoch 154, Accuracy: 0.8348, F1 Score: 0.8341\n", "Epoch 155, Accuracy: 0.8422, F1 Score: 0.8421\n", "Epoch 156, Accuracy: 0.8437, F1 Score: 0.8436\n", "Epoch 157, Accuracy: 0.8453, F1 Score: 0.8451\n", "Epoch 158, Accuracy: 0.8404, F1 Score: 0.8407\n", "Epoch 159, Accuracy: 0.8454, F1 Score: 0.8453\n", "Epoch 160, Accuracy: 0.8406, F1 Score: 0.8405\n", "Epoch 161, Accuracy: 0.8463, F1 Score: 0.8461\n", "New best model saved at Epoch 161 with F1 Score: 0.8461\n", "Epoch 162, Accuracy: 0.8440, F1 Score: 0.8441\n", "Epoch 163, Accuracy: 0.8433, F1 Score: 0.8430\n", "Epoch 164, Accuracy: 0.8472, F1 Score: 0.8469\n", "New best model saved at Epoch 164 with F1 Score: 0.8469\n", "Epoch 165, Accuracy: 0.8463, F1 Score: 0.8461\n", "Epoch 166, Accuracy: 0.8397, F1 Score: 0.8391\n", "Epoch 167, Accuracy: 0.8172, F1 Score: 0.8147\n", "Epoch 168, Accuracy: 0.8307, F1 Score: 0.8301\n", "Epoch 169, Accuracy: 0.8411, F1 Score: 0.8414\n", "Epoch 170, Accuracy: 0.8403, F1 Score: 0.8404\n", "Epoch 171, Accuracy: 0.8409, F1 Score: 0.8406\n", "Epoch 172, Accuracy: 0.8404, F1 Score: 0.8401\n", "Epoch 173, Accuracy: 0.8399, F1 Score: 0.8396\n", "Epoch 174, Accuracy: 0.8422, F1 Score: 0.8422\n", "Epoch 175, Accuracy: 0.8459, F1 Score: 0.8460\n", "Epoch 176, Accuracy: 0.8439, F1 Score: 0.8440\n", "Epoch 177, Accuracy: 0.8365, F1 Score: 0.8362\n", "Epoch 178, Accuracy: 0.8447, F1 Score: 0.8447\n", "Epoch 179, Accuracy: 0.8403, F1 Score: 0.8400\n", "Epoch 180, Accuracy: 0.8424, F1 Score: 0.8424\n", "Epoch 181, Accuracy: 0.8387, F1 Score: 0.8389\n", "Epoch 182, Accuracy: 0.8397, F1 Score: 0.8393\n", "Epoch 183, Accuracy: 0.8435, F1 Score: 0.8434\n", "Epoch 184, Accuracy: 0.8306, F1 Score: 0.8296\n", "Epoch 185, Accuracy: 0.8414, F1 Score: 0.8410\n", "Epoch 186, Accuracy: 0.8428, F1 Score: 0.8428\n", "Epoch 187, Accuracy: 0.8447, F1 Score: 0.8446\n", "Epoch 188, Accuracy: 0.8424, F1 Score: 0.8427\n", "Epoch 189, Accuracy: 0.8360, F1 Score: 0.8354\n", "Epoch 190, Accuracy: 0.8435, F1 Score: 0.8434\n", "Epoch 191, Accuracy: 0.8399, F1 Score: 0.8400\n", "Epoch 192, Accuracy: 0.8448, F1 Score: 0.8446\n", "Epoch 193, Accuracy: 0.8398, F1 Score: 0.8395\n", "Epoch 194, Accuracy: 0.8451, F1 Score: 0.8449\n", "Epoch 195, Accuracy: 0.8441, F1 Score: 0.8437\n", "Epoch 196, Accuracy: 0.8438, F1 Score: 0.8437\n", "Epoch 197, Accuracy: 0.8448, F1 Score: 0.8448\n", "Epoch 198, Accuracy: 0.8418, F1 Score: 0.8419\n", "Epoch 199, Accuracy: 0.8400, F1 Score: 0.8397\n", "Epoch 200, Accuracy: 0.8421, F1 Score: 0.8419\n" ] } ], "source": [ "from sklearn.metrics import accuracy_score, f1_score\n", "#from torch.utils.tensorboard import SummaryWriter\n", "# Initialize the SummaryWriter\n", "#writer = SummaryWriter('runs/sentiment')\n", "\n", "num_epochs = 200\n", "best_f1_score = 0.0\n", "# Training loop\n", "# Training and validation loop\n", "for epoch in range(num_epochs):\n", " # Training phase\n", " model.train()\n", " train_loss = 0.0\n", " for inputs, labels in train_loader:\n", " optimizer.zero_grad()\n", " outputs = model(inputs)\n", " loss = criterion(outputs, labels)\n", " loss.backward()\n", " optimizer.step()\n", " train_loss += loss.item()\n", " # Log training loss\n", " #writer.add_scalar('Loss/train', train_loss/len(train_loader), epoch)\n", "\n", " # Validation phase\n", " model.eval()\n", " val_loss = 0.0\n", " val_preds = []\n", " val_labels = []\n", " with torch.no_grad():\n", " for inputs, labels in val_loader:\n", " outputs = model(inputs)\n", " _, predicted = torch.max(outputs, 1)\n", " val_preds.extend(predicted.numpy())\n", " val_labels.extend(labels.numpy())\n", " # Log validation loss\n", " #writer.add_scalar('Loss/val', val_loss/len(val_loader), epoch)\n", " # Calculate accuracy and F1-score\n", " accuracy = accuracy_score(val_labels, val_preds)\n", " f1 = f1_score(val_labels, val_preds, average='weighted')\n", " # Log accuracy and F1-score\n", " #writer.add_scalar('Accuracy/val', accuracy, epoch)\n", " #writer.add_scalar('F1_Score/val', f1, epoch)\n", "\n", " print(f'Epoch {epoch+1}, Accuracy: {accuracy:.4f}, F1 Score: {f1:.4f}')\n", " if f1 > best_f1_score:\n", " best_f1_score = f1\n", " # Save the model\n", " torch.save(model.state_dict(), './Model/best_model.pth')\n", " print(f\"New best model saved at Epoch {epoch+1} with F1 Score: {f1:.4f}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## テストデータでモデルを検証する" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [], "source": [ "#model.load_state_dict(torch.load('./Model/best_model.pth'))\n", "# Testing loop\n", "model.eval()\n", "test_preds = []\n", "test_labels = []\n", "with torch.no_grad():\n", " for inputs, labels in test_loader:\n", " outputs = model(inputs)\n", " _, predicted = torch.max(outputs, 1)\n", " test_preds.extend(predicted.numpy())\n", " test_labels.extend(labels.numpy())" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from sklearn.metrics import confusion_matrix\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "\n", "# Confusion matrix\n", "cm = confusion_matrix(test_labels, test_preds)\n", "sns.heatmap(cm, annot=True, fmt='d', cmap=\"crest\")\n", "plt.xlabel('Predicted')\n", "plt.ylabel('True')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "jupyterbook", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.9" } }, "nbformat": 4, "nbformat_minor": 2 }