{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "#This notebook classifies localization of a protein from the Yeast dataset" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "#IMPORT LIBRARIES" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import pandas as pd; import numpy as np" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "#LOAD DATA" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "master = pd.read_csv('yeast.csv') #dataset with 8 variables, localization is in feature 'Target'\n", "\n", "backup = master" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Sequence NamemcggvhalmmiterlpoxvacnucTarget
0ADT1_YEAST0.580.610.470.130.50.00.480.22MIT
1ADT2_YEAST0.430.670.480.270.50.00.530.22MIT
2ADT3_YEAST0.640.620.490.150.50.00.530.22MIT
3AAR2_YEAST0.580.440.570.130.50.00.540.22NUC
4AATM_YEAST0.420.440.480.540.50.00.480.22MIT
\n", "
" ], "text/plain": [ " Sequence Name mcg gvh alm mit erl pox vac nuc Target\n", "0 ADT1_YEAST 0.58 0.61 0.47 0.13 0.5 0.0 0.48 0.22 MIT\n", "1 ADT2_YEAST 0.43 0.67 0.48 0.27 0.5 0.0 0.53 0.22 MIT\n", "2 ADT3_YEAST 0.64 0.62 0.49 0.15 0.5 0.0 0.53 0.22 MIT\n", "3 AAR2_YEAST 0.58 0.44 0.57 0.13 0.5 0.0 0.54 0.22 NUC\n", "4 AATM_YEAST 0.42 0.44 0.48 0.54 0.5 0.0 0.48 0.22 MIT" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "master.head() #sanity check" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(1484, 10)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#START RE-RUNNING CODE FROM HERE\n", "\n", "master = backup\n", "master.shape" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "CYT 463\n", "NUC 429\n", "MIT 244\n", "ME3 163\n", "ME2 51\n", "ME1 44\n", "EXC 35\n", "VAC 30\n", "POX 20\n", "ERL 5\n", "Name: Target, dtype: int64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Look at the different classes in the dataset: ERL has only 5 samples\n", "\n", "master['Target'].value_counts()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(1484, 10)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#drop ERL due to insufficient data\n", "\n", "# master = master[master['Target'] != 'ERL']\n", "master.shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(1484, 9)" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#drop sequence name column, do not need this column\n", "\n", "master = master.drop(columns='Sequence Name')\n", "master.shape" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "#identify feature columns\n", "\n", "data = master.columns.drop('Target')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "#label encode the target localization site\n", "\n", "from sklearn.preprocessing import LabelEncoder\n", "\n", "le = LabelEncoder()\n", "localization = ['CYT', 'ERL', 'EXC', 'ME1', 'ME2', 'ME3', 'MIT', 'NUC', 'POX', 'VAC']\n", "le.fit(localization)\n", "target = le.fit_transform(master['Target'])" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/abhay/anaconda3/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n", " from ._conv import register_converters as _register_converters\n", "Using TensorFlow backend.\n" ] } ], "source": [ "#onehotencode the target localization site\n", "\n", "from keras.utils import np_utils\n", "\n", "target = np_utils.to_categorical(target)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
mcggvhalmmiterlpoxvacnucTargetCYTERLEXCME1ME2ME3MITNUCPOXVAC
00.580.610.470.130.50.00.480.22MIT0.00.00.00.00.00.01.00.00.00.0
10.430.670.480.270.50.00.530.22MIT0.00.00.00.00.00.01.00.00.00.0
20.640.620.490.150.50.00.530.22MIT0.00.00.00.00.00.01.00.00.00.0
30.580.440.570.130.50.00.540.22NUC0.00.00.00.00.00.00.01.00.00.0
40.420.440.480.540.50.00.480.22MIT0.00.00.00.00.00.01.00.00.00.0
\n", "
" ], "text/plain": [ " mcg gvh alm mit erl pox vac nuc Target CYT ERL EXC ME1 \\\n", "0 0.58 0.61 0.47 0.13 0.5 0.0 0.48 0.22 MIT 0.0 0.0 0.0 0.0 \n", "1 0.43 0.67 0.48 0.27 0.5 0.0 0.53 0.22 MIT 0.0 0.0 0.0 0.0 \n", "2 0.64 0.62 0.49 0.15 0.5 0.0 0.53 0.22 MIT 0.0 0.0 0.0 0.0 \n", "3 0.58 0.44 0.57 0.13 0.5 0.0 0.54 0.22 NUC 0.0 0.0 0.0 0.0 \n", "4 0.42 0.44 0.48 0.54 0.5 0.0 0.48 0.22 MIT 0.0 0.0 0.0 0.0 \n", "\n", " ME2 ME3 MIT NUC POX VAC \n", "0 0.0 0.0 1.0 0.0 0.0 0.0 \n", "1 0.0 0.0 1.0 0.0 0.0 0.0 \n", "2 0.0 0.0 1.0 0.0 0.0 0.0 \n", "3 0.0 0.0 0.0 1.0 0.0 0.0 \n", "4 0.0 0.0 1.0 0.0 0.0 0.0 " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#join the indicator variables to create master dataframe with features as well as OneHotEncoded target\n", "\n", "master = master.join(pd.DataFrame(data=target, columns=localization))\n", "master.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "#LOF outlier detection (this is what we chose to use)\n", "\n", "from sklearn.neighbors import LocalOutlierFactor\n", "\n", "lof = LocalOutlierFactor(contamination='auto')\n", "\n", "lofOutliers = lof.fit_predict(master[data])" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "76" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#identify number of samples with highest outlier score\n", "\n", "(lofOutliers == -1).sum()" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "#Isolation Forest outlier detection \n", "\n", "from sklearn.ensemble import IsolationForest\n", "\n", "iso = IsolationForest(behaviour='new', max_features=8, contamination='auto')\n", "\n", "iso.fit(master[data])\n", "isoOutliers = iso.predict(master[data])" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "66" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#identify samples with highest outlier score\n", "\n", "(isoOutliers == -1).sum()\n" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "32" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# compare the common outliers from both functions\n", "\n", "((isoOutliers - lofOutliers) == 2).sum()" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(1408, 19)" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#DROP OUTLIER SAMPLES\n", "\n", "master = master[lofOutliers == 1]\n", "master.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "# split into train and test\n", "\n", "from sklearn.model_selection import train_test_split\n", "\n", "trainData, testData, trainTarget, testTarget = train_test_split(master[data], master[localization], \n", " test_size = 0.3, random_state = 0)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CYTERLEXCME1ME2ME3MITNUCPOXVAC
6910.00.00.00.00.00.00.01.00.00.0
850.00.00.00.00.01.00.00.00.00.0
9701.00.00.00.00.00.00.00.00.00.0
4561.00.00.00.00.00.00.00.00.00.0
10531.00.00.00.00.00.00.00.00.00.0
\n", "
" ], "text/plain": [ " CYT ERL EXC ME1 ME2 ME3 MIT NUC POX VAC\n", "691 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0\n", "85 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0\n", "970 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0\n", "456 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0\n", "1053 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "trainTarget.head() #sanity check" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
mcggvhalmmiterlpoxvacnuc
9700.610.480.560.190.50.00.530.31
4560.500.460.520.160.50.00.520.26
10530.380.400.530.120.50.00.550.22
3660.640.530.450.130.50.00.530.22
9990.730.600.470.200.50.00.720.22
\n", "
" ], "text/plain": [ " mcg gvh alm mit erl pox vac nuc\n", "970 0.61 0.48 0.56 0.19 0.5 0.0 0.53 0.31\n", "456 0.50 0.46 0.52 0.16 0.5 0.0 0.52 0.26\n", "1053 0.38 0.40 0.53 0.12 0.5 0.0 0.55 0.22\n", "366 0.64 0.53 0.45 0.13 0.5 0.0 0.53 0.22\n", "999 0.73 0.60 0.47 0.20 0.5 0.0 0.72 0.22" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#select only CYT samples from train and test sets\n", "\n", "trainCYT = trainData[trainTarget['CYT'] == 1]\n", "testCYT = testData[testTarget['CYT'] == 1]\n", "trainCYT.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "# 4-STEP MODELLING PROCESS: IMPORT MODEL, ADD LAYERS TO MODEL, TRAIN USING FIT, EVALUATE" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "from keras.models import Sequential\n", "from keras.layers import Dense, Activation, Input\n", "from keras.optimizers import SGD\n", "from keras.losses import categorical_crossentropy" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "#using stochastic gradient descent, set learning rate\n", "\n", "sgd = SGD(lr = 0.01)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "model = Sequential()\n", "model.add(Dense(3, input_shape=(8,), activation='sigmoid'))\n", "model.add(Dense(3, activation='sigmoid'))\n", "model.add(Dense(10, activation='softmax'))\n", "\n", "model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "#plot details for CYT class\n", "\n", "import matplotlib.pyplot as plt\n", "from keras.callbacks import ModelCheckpoint as mc\n", "from keras.callbacks import LambdaCallback as lc\n", "\n", "weightList = [[],[],[],[]]\n", "cyt_err_train = []\n", "cyt_err_test = []\n", "\n", "node1 = lc(on_epoch_end=lambda epoch, logs: weightList[0].append(model.layers[2].get_weights()[0][0][0]))\n", "node2 = lc(on_epoch_end=lambda epoch, logs: weightList[1].append(model.layers[2].get_weights()[0][1][0]))\n", "node3 = lc(on_epoch_end=lambda epoch, logs: weightList[2].append(model.layers[2].get_weights()[0][2][0]))\n", "node4 = lc(on_epoch_end=lambda epoch, logs: weightList[3].append(model.layers[2].get_weights()[1][0]))\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/100\n", "985/985 [==============================] - 4s 4ms/step - loss: 1.8798 - acc: 0.3259\n", "Epoch 2/100\n", "985/985 [==============================] - 3s 3ms/step - loss: 1.7272 - acc: 0.3289\n", "Epoch 3/100\n", "985/985 [==============================] - 2s 3ms/step - loss: 1.7102 - acc: 0.3320\n", "Epoch 4/100\n", "985/985 [==============================] - 3s 3ms/step - loss: 1.7047 - acc: 0.3310\n", "Epoch 5/100\n", "985/985 [==============================] - 3s 3ms/step - loss: 1.7009 - acc: 0.3320\n", "Epoch 6/100\n", "985/985 [==============================] - 3s 3ms/step - loss: 1.6985 - acc: 0.3299\n", "Epoch 7/100\n", "985/985 [==============================] - 3s 3ms/step - loss: 1.6991 - acc: 0.3279\n", "Epoch 8/100\n", "985/985 [==============================] - 3s 3ms/step - loss: 1.6977 - acc: 0.3310\n", "Epoch 9/100\n", "985/985 [==============================] - 3s 3ms/step - loss: 1.6963 - acc: 0.3279\n", "Epoch 10/100\n", "985/985 [==============================] - 3s 3ms/step - loss: 1.6952 - acc: 0.3310\n", "Epoch 11/100\n", "985/985 [==============================] - 3s 3ms/step - loss: 1.6954 - acc: 0.3289\n", "Epoch 12/100\n", "985/985 [==============================] - 3s 3ms/step - loss: 1.6965 - acc: 0.3340\n", "Epoch 13/100\n", "985/985 [==============================] - 3s 3ms/step - loss: 1.6958 - acc: 0.3310\n", "Epoch 14/100\n", "985/985 [==============================] - 3s 3ms/step - loss: 1.6957 - acc: 0.3239\n", "Epoch 15/100\n", "985/985 [==============================] - 3s 3ms/step - loss: 1.6946 - acc: 0.3310\n", "Epoch 16/100\n", "985/985 [==============================] - 2s 3ms/step - loss: 1.6908 - acc: 0.3259\n", "Epoch 17/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6939 - acc: 0.3208\n", "Epoch 18/100\n", "985/985 [==============================] - 3s 3ms/step - loss: 1.6944 - acc: 0.3310\n", "Epoch 19/100\n", "985/985 [==============================] - 3s 3ms/step - loss: 1.6936 - acc: 0.3249\n", "Epoch 20/100\n", "985/985 [==============================] - 3s 3ms/step - loss: 1.6935 - acc: 0.3228\n", "Epoch 21/100\n", "985/985 [==============================] - 3s 3ms/step - loss: 1.6946 - acc: 0.3330\n", "Epoch 22/100\n", "985/985 [==============================] - 3s 3ms/step - loss: 1.6936 - acc: 0.3269\n", "Epoch 23/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6921 - acc: 0.3310\n", "Epoch 24/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6942 - acc: 0.3289\n", "Epoch 25/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6938 - acc: 0.3340\n", "Epoch 26/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6933 - acc: 0.3320\n", "Epoch 27/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6937 - acc: 0.3340\n", "Epoch 28/100\n", "985/985 [==============================] - 2s 3ms/step - loss: 1.6926 - acc: 0.3279\n", "Epoch 29/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6925 - acc: 0.3269\n", "Epoch 30/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6909 - acc: 0.3350\n", "Epoch 31/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6900 - acc: 0.3289\n", "Epoch 32/100\n", "985/985 [==============================] - 2s 3ms/step - loss: 1.6917 - acc: 0.3259\n", "Epoch 33/100\n", "985/985 [==============================] - 3s 3ms/step - loss: 1.6903 - acc: 0.3269\n", "Epoch 34/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6917 - acc: 0.3289\n", "Epoch 35/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6923 - acc: 0.3320\n", "Epoch 36/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6887 - acc: 0.3320\n", "Epoch 37/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6895 - acc: 0.3289\n", "Epoch 38/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6907 - acc: 0.3330\n", "Epoch 39/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6901 - acc: 0.3299\n", "Epoch 40/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6884 - acc: 0.3279\n", "Epoch 41/100\n", "985/985 [==============================] - 2s 3ms/step - loss: 1.6873 - acc: 0.3249\n", "Epoch 42/100\n", "985/985 [==============================] - 3s 3ms/step - loss: 1.6888 - acc: 0.3259\n", "Epoch 43/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6879 - acc: 0.3147\n", "Epoch 44/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6887 - acc: 0.3340\n", "Epoch 45/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6881 - acc: 0.3228\n", "Epoch 46/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6881 - acc: 0.3310\n", "Epoch 47/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6849 - acc: 0.3330\n", "Epoch 48/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6869 - acc: 0.3279\n", "Epoch 49/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6870 - acc: 0.3340\n", "Epoch 50/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6853 - acc: 0.3228\n", "Epoch 51/100\n", "985/985 [==============================] - 3s 3ms/step - loss: 1.6852 - acc: 0.3289\n", "Epoch 52/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6843 - acc: 0.3198\n", "Epoch 53/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6829 - acc: 0.3279\n", "Epoch 54/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6816 - acc: 0.3299\n", "Epoch 55/100\n", "985/985 [==============================] - 2s 3ms/step - loss: 1.6825 - acc: 0.3269\n", "Epoch 56/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6813 - acc: 0.3330\n", "Epoch 57/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6787 - acc: 0.3320\n", "Epoch 58/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6792 - acc: 0.3289\n", "Epoch 59/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6783 - acc: 0.3269\n", "Epoch 60/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6776 - acc: 0.3320\n", "Epoch 61/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6750 - acc: 0.3259\n", "Epoch 62/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6732 - acc: 0.3299\n", "Epoch 63/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6707 - acc: 0.3279\n", "Epoch 64/100\n", "985/985 [==============================] - 3s 3ms/step - loss: 1.6707 - acc: 0.3249\n", "Epoch 65/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6675 - acc: 0.3310\n", "Epoch 66/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6659 - acc: 0.3228\n", "Epoch 67/100\n", "985/985 [==============================] - 3s 3ms/step - loss: 1.6642 - acc: 0.3320\n", "Epoch 68/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6605 - acc: 0.3320\n", "Epoch 69/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6565 - acc: 0.3279\n", "Epoch 70/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6533 - acc: 0.3299\n", "Epoch 71/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6474 - acc: 0.3249\n", "Epoch 72/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6438 - acc: 0.3269\n", "Epoch 73/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6381 - acc: 0.3299\n", "Epoch 74/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6321 - acc: 0.3188\n", "Epoch 75/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6271 - acc: 0.3340\n", "Epoch 76/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6197 - acc: 0.3340\n", "Epoch 77/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6106 - acc: 0.3350\n", "Epoch 78/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.6028 - acc: 0.3188\n", "Epoch 79/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.5929 - acc: 0.3320\n", "Epoch 80/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.5839 - acc: 0.3320\n", "Epoch 81/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.5733 - acc: 0.3391\n", "Epoch 82/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.5640 - acc: 0.3462\n", "Epoch 83/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.5534 - acc: 0.3523\n", "Epoch 84/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.5437 - acc: 0.3563\n", "Epoch 85/100\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "985/985 [==============================] - 2s 2ms/step - loss: 1.5344 - acc: 0.3594\n", "Epoch 86/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.5250 - acc: 0.3736\n", "Epoch 87/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.5147 - acc: 0.3675\n", "Epoch 88/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.5056 - acc: 0.3766\n", "Epoch 89/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.4981 - acc: 0.3807\n", "Epoch 90/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.4886 - acc: 0.3838\n", "Epoch 91/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.4809 - acc: 0.3919\n", "Epoch 92/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.4736 - acc: 0.3939\n", "Epoch 93/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.4689 - acc: 0.3868\n", "Epoch 94/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.4623 - acc: 0.3909\n", "Epoch 95/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.4560 - acc: 0.4051\n", "Epoch 96/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.4526 - acc: 0.3797\n", "Epoch 97/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.4466 - acc: 0.3949\n", "Epoch 98/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.4398 - acc: 0.3909\n", "Epoch 99/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.4367 - acc: 0.4010\n", "Epoch 100/100\n", "985/985 [==============================] - 2s 2ms/step - loss: 1.4300 - acc: 0.3990\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "weightList = [[],[],[],[]]\n", "numepochs = 100\n", "batch = 1\n", "\n", "model.fit(trainData.values, trainTarget.values, epochs = numepochs, \n", " batch_size=batch, callbacks=[node1, node2, node3, node4])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "423/423 [==============================] - 0s 984us/step\n" ] }, { "data": { "text/plain": [ "[1.4144956797854558, 0.425531914893617]" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#EVALUATE MODEL USING METRICS\n", "\n", "model.evaluate(testData.values, testTarget.values, batch_size=batch)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "weightList = np.array(weightList).T" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt\n", "\n", "plt.plot(weightList)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYYAAAD8CAYAAABzTgP2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAADsBJREFUeJzt23GonXd9x/H3x1xMUaFN2kRr0+xWWhjpBoqHFtkGnbVtOtAU7R/p/jBslfwx+8cUwUg3aqt/tN2kIrqNoEIQZusqYkBGia2FMUbtSduhmcZco9JrS42kFLpiS+Z3f9yn2/ldzu29uc+59+TW9wsO53l+v+95zveXA/nc53nOSVUhSdKr3jDtBiRJ5xaDQZLUMBgkSQ2DQZLUMBgkSQ2DQZLUMBgkSQ2DQZLUMBgkSY2ZaTewGhdddFHNzs5Ouw1J2lCOHj3666ratlzdhgyG2dlZhsPhtNuQpA0lyS9WUuelJElSw2CQJDUMBklSw2CQJDUMBklSw2CQJDUMBklSw2CQJDUMBklSw2CQJDUMBklSw2CQJDUMBklSw2CQJDUMBklSw2CQJDUMBklSw2CQJDUMBklSw2CQJDUMBklSw2CQJDUMBklSw2CQJDUmEgxJdic5nmQuyYEx85uTPNDNP5ZkdtH8ziQvJvnEJPqRJK1e72BIsgn4EnAjsAu4JcmuRWW3As9X1eXAfcA9i+bvA/61by+SpP4mccZwFTBXVSer6hXgfmDPopo9wKFu+0Hg2iQBSHITcBI4NoFeJEk9TSIYLgGeHtmf78bG1lTVGeAF4MIkbwY+Cdw5gT4kSRMwiWDImLFaYc2dwH1V9eKyb5LsTzJMMjx16tQq2pQkrcTMBI4xD1w6sr8DeGaJmvkkM8D5wGngauDmJPcCFwC/TfKbqvri4jepqoPAQYDBYLA4eCRJEzKJYHgcuCLJZcAvgb3Any+qOQzsA/4DuBl4pKoK+JNXC5J8GnhxXChIktZP72CoqjNJbgMeAjYBX62qY0nuAoZVdRj4CvC1JHMsnCns7fu+kqS1kYU/3DeWwWBQw+Fw2m1I0oaS5GhVDZar85fPkqSGwSBJahgMkqSGwSBJahgMkqSGwSBJahgMkqSGwSBJahgMkqSGwSBJahgMkqSGwSBJahgMkqSGwSBJahgMkqSGwSBJahgMkqSGwSBJahgMkqSGwSBJahgMkqSGwSBJahgMkqSGwSBJahgMkqSGwSBJahgMkqSGwSBJahgMkqSGwSBJahgMkqTGRIIhye4kx5PMJTkwZn5zkge6+ceSzHbj1yU5muQH3fN7J9GPJGn1egdDkk3Al4AbgV3ALUl2LSq7FXi+qi4H7gPu6cZ/Dby/qv4Q2Ad8rW8/kqR+JnHGcBUwV1Unq+oV4H5gz6KaPcChbvtB4Nokqaonq+qZbvwYcF6SzRPoSZK0SpMIhkuAp0f257uxsTVVdQZ4AbhwUc2HgCer6uUJ9CRJWqWZCRwjY8bqbGqSXMnC5aXrl3yTZD+wH2Dnzp1n36UkaUUmccYwD1w6sr8DeGapmiQzwPnA6W5/B/At4MNV9dOl3qSqDlbVoKoG27Ztm0DbkqRxJhEMjwNXJLksyRuBvcDhRTWHWbi5DHAz8EhVVZILgO8An6qqf59AL5KknnoHQ3fP4DbgIeBHwDeq6liSu5J8oCv7CnBhkjng48CrX2m9Dbgc+NskT3WP7X17kiStXqoW3w449w0GgxoOh9NuQ5I2lCRHq2qwXJ2/fJYkNQwGSVLDYJAkNQwGSVLDYJAkNQwGSVLDYJAkNQwGSVLDYJAkNQwGSVLDYJAkNQwGSVLDYJAkNQwGSVLDYJAkNQwGSVLDYJAkNQwGSVLDYJAkNQwGSVLDYJAkNQwGSVLDYJAkNQwGSVLDYJAkNQwGSVLDYJAkNQwGSVLDYJAkNQwGSVJjIsGQZHeS40nmkhwYM785yQPd/GNJZkfmPtWNH09ywyT6kSStXu9gSLIJ+BJwI7ALuCXJrkVltwLPV9XlwH3APd1rdwF7gSuB3cA/dMeTJE3JJM4YrgLmqupkVb0C3A/sWVSzBzjUbT8IXJsk3fj9VfVyVf0MmOuOJ0makkkEwyXA0yP7893Y2JqqOgO8AFy4wtdKktbRJIIhY8ZqhTUree3CAZL9SYZJhqdOnTrLFiVJKzWJYJgHLh3Z3wE8s1RNkhngfOD0Cl8LQFUdrKpBVQ22bds2gbYlSeNMIhgeB65IclmSN7JwM/nwoprDwL5u+2bgkaqqbnxv962ly4ArgO9PoCdJ0irN9D1AVZ1JchvwELAJ+GpVHUtyFzCsqsPAV4CvJZlj4Uxhb/faY0m+AfwXcAb4aFX9T9+eJEmrl4U/3DeWwWBQw+Fw2m1I0oaS5GhVDZar85fPkqSGwSBJahgMkqSGwSBJahgMkqSGwSBJahgMkqSGwSBJahgMkqSGwSBJahgMkqSGwSBJahgMkqSGwSBJahgMkqSGwSBJahgMkqSGwSBJahgMkqSGwSBJahgMkqSGwSBJahgMkqSGwSBJahgMkqSGwSBJahgMkqSGwSBJahgMkqSGwSBJahgMkqRGr2BIsjXJkSQnuuctS9Tt62pOJNnXjb0pyXeS/DjJsSR39+lFkjQZfc8YDgAPV9UVwMPdfiPJVuAO4GrgKuCOkQD5+6r6feBdwB8lubFnP5KknvoGwx7gULd9CLhpTM0NwJGqOl1VzwNHgN1V9VJVfQ+gql4BngB29OxHktRT32B4a1U9C9A9bx9Tcwnw9Mj+fDf2f5JcALyfhbMOSdIUzSxXkOS7wNvGTN2+wvfImLEaOf4M8HXgC1V18jX62A/sB9i5c+cK31qSdLaWDYaqet9Sc0meS3JxVT2b5GLgV2PK5oFrRvZ3AI+O7B8ETlTV55fp42BXy2AwqNeqlSStXt9LSYeBfd32PuDbY2oeAq5PsqW76Xx9N0aSzwLnA3/dsw9J0oT0DYa7geuSnACu6/ZJMkjyZYCqOg18Bni8e9xVVaeT7GDhctQu4IkkTyX5SM9+JEk9pWrjXZUZDAY1HA6n3YYkbShJjlbVYLk6f/ksSWoYDJKkhsEgSWoYDJKkhsEgSWoYDJKkhsEgSWoYDJKkhsEgSWoYDJKkhsEgSWoYDJKkhsEgSWoYDJKkhsEgSWoYDJKkhsEgSWoYDJKkhsEgSWoYDJKkhsEgSWoYDJKkhsEgSWoYDJKkhsEgSWoYDJKkhsEgSWoYDJKkhsEgSWoYDJKkRq9gSLI1yZEkJ7rnLUvU7etqTiTZN2b+cJIf9ulFkjQZfc8YDgAPV9UVwMPdfiPJVuAO4GrgKuCO0QBJ8kHgxZ59SJImpG8w7AEOdduHgJvG1NwAHKmq01X1PHAE2A2Q5C3Ax4HP9uxDkjQhfYPhrVX1LED3vH1MzSXA0yP7890YwGeAzwEv9exDkjQhM8sVJPku8LYxU7ev8D0yZqySvBO4vKo+lmR2BX3sB/YD7Ny5c4VvLUk6W8sGQ1W9b6m5JM8lubiqnk1yMfCrMWXzwDUj+zuAR4H3AO9O8vOuj+1JHq2qaxijqg4CBwEGg0Et17ckaXX6Xko6DLz6LaN9wLfH1DwEXJ9kS3fT+Xrgoar6x6p6e1XNAn8M/GSpUJAkrZ++wXA3cF2SE8B13T5JBkm+DFBVp1m4l/B497irG5MknYNStfGuygwGgxoOh9NuQ5I2lCRHq2qwXJ2/fJYkNQwGSVLDYJAkNQwGSVLDYJAkNQwGSVLDYJAkNQwGSVLDYJAkNQwGSVLDYJAkNQwGSVLDYJAkNQwGSVLDYJAkNQwGSVLDYJAkNQwGSVLDYJAkNQwGSVLDYJAkNQwGSVLDYJAkNQwGSVLDYJAkNVJV0+7hrCU5Bfxi2n2cpYuAX0+7iXXmmn83uOaN4/eqattyRRsyGDaiJMOqGky7j/Xkmn83uObXHy8lSZIaBoMkqWEwrJ+D025gClzz7wbX/DrjPQZJUsMzBklSw2CYoCRbkxxJcqJ73rJE3b6u5kSSfWPmDyf54dp33F+fNSd5U5LvJPlxkmNJ7l7f7s9Okt1JjieZS3JgzPzmJA90848lmR2Z+1Q3fjzJDevZdx+rXXOS65IcTfKD7vm96937avT5jLv5nUleTPKJ9ep5TVSVjwk9gHuBA932AeCeMTVbgZPd85Zue8vI/AeBfwZ+OO31rPWagTcBf9rVvBH4N+DGaa9piXVuAn4KvKPr9T+BXYtq/gr4p257L/BAt72rq98MXNYdZ9O017TGa34X8PZu+w+AX057PWu53pH5bwL/Anxi2uvp8/CMYbL2AIe67UPATWNqbgCOVNXpqnoeOALsBkjyFuDjwGfXoddJWfWaq+qlqvoeQFW9AjwB7FiHnlfjKmCuqk52vd7PwtpHjf5bPAhcmyTd+P1V9XJV/QyY6453rlv1mqvqyap6phs/BpyXZPO6dL16fT5jktzEwh89x9ap3zVjMEzWW6vqWYDuefuYmkuAp0f257sxgM8AnwNeWssmJ6zvmgFIcgHwfuDhNeqzr2XXMFpTVWeAF4ALV/jac1GfNY/6EPBkVb28Rn1OyqrXm+TNwCeBO9ehzzU3M+0GNpok3wXeNmbq9pUeYsxYJXkncHlVfWzxdctpW6s1jxx/Bvg68IWqOnn2Ha6L11zDMjUree25qM+aFyaTK4F7gOsn2Nda6bPeO4H7qurF7gRiQzMYzlJVvW+puSTPJbm4qp5NcjHwqzFl88A1I/s7gEeB9wDvTvJzFj6X7UkeraprmLI1XPOrDgInqurzE2h3rcwDl47s7wCeWaJmvgu784HTK3ztuajPmkmyA/gW8OGq+unat9tbn/VeDdyc5F7gAuC3SX5TVV9c+7bXwLRvcryeHsDf0d6IvXdMzVbgZyzcfN3SbW9dVDPLxrn53GvNLNxP+SbwhmmvZZl1zrBw/fgy/v/G5JWLaj5Ke2PyG932lbQ3n0+yMW4+91nzBV39h6a9jvVY76KaT7PBbz5PvYHX04OFa6sPAye651f/8xsAXx6p+0sWbkDOAX8x5jgbKRhWvWYW/iIr4EfAU93jI9Ne02us9c+An7DwzZXbu7G7gA902+ex8I2UOeD7wDtGXnt797rjnKPfvJrkmoG/Af575HN9Ctg+7fWs5Wc8cowNHwz+8lmS1PBbSZKkhsEgSWoYDJKkhsEgSWoYDJKkhsEgSWoYDJKkhsEgSWr8L4G+I6VKUcyzAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.plot(cyt_err_test)\n", "plt.plot(cyt_err_train)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Train on all data instead of just the train set" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model3 = Sequential()\n", "model3.add(Dense(3, input_shape=(8,), activation='sigmoid'))\n", "model3.add(Dense(3, activation='sigmoid'))\n", "model3.add(Dense(9, activation='softmax'))\n", "\n", "model3.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "history = model3.fit(master[data].values, master[localization].values, epochs = numepochs, batch_size=batch)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#EVALUATE MODEL USING PRECISION METRICS\n", "\n", "model.evaluate(master[data].values, master[localization].values, batch_size=batch)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model3.evaluate(master[data].values, master[localization].values, batch_size=batch)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }