from tensorflow import keras
dataset = keras.datasets.fashion_mnist
(X_train_full, y_train_full), (X_test_full, y_test_full) = dataset.load_data()
X_train_full.shape
(60000, 28, 28)
X_test_full.shape
(10000, 28, 28)
70,000 of full data is seperated into training 60,000 and testing 10,000 and each image is 28 by 28 2D array
Now we need to create a validation set and need to scale for neural network
Scaling is done by dividing the number by the largest which is 255 since a pixel is between 0 and 255
X_valid, X_train=X_train_full[:5000]/255.0,X_train_full[5000:]/255.0
y_valid, y_train=y_train_full[:5000],y_train_full[5000:]
X_test=X_test_full/255.0
y_test=y_test_full
First 5000 is validation set and the rest from the 60,000 is train set
Creating the model using sequential API¶
model=keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[28,28]))
model.add(keras.layers.Dense(300,activation='relu'))
model.add(keras.layers.Dense(100,activation='relu'))
model.add(keras.layers.Dense(10,activation='softmax'))
1-Create the sequential model(single stack of layers connected sequentially)
2-First layer used for data preprosessing (flattning ,converting 2D to 1D)
3-Dense hidden layer with 300 neurons ,activation function relu which is the norm
4-Second hidden layer
5-Dense output layer with 10 neurons since there are 10 classes ,use softmax (because the classes are exclusive and this is a multiclass classification)
we can pass a list of layers when creating, a list of layers as below
model=keras.models.Sequential([
keras.layers.Flatten(input_shape=[28,28]),
keras.layers.Dense(300,activation='relu'),
keras.layers.Dense(100,activation='relu'),
keras.layers.Dense(10,activation='softmax'),
])
model.summary() #all the model layers(None means batch size can be anything)
Model: "sequential_1"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ flatten_1 (Flatten) │ (None, 784) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense_3 (Dense) │ (None, 300) │ 235,500 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense_4 (Dense) │ (None, 100) │ 30,100 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense_5 (Dense) │ (None, 10) │ 1,010 │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 266,610 (1.02 MB)
Trainable params: 266,610 (1.02 MB)
Non-trainable params: 0 (0.00 B)
model.layers #access all the layers
[<Flatten name=flatten_1, built=True>, <Dense name=dense_3, built=True>, <Dense name=dense_4, built=True>, <Dense name=dense_5, built=True>]
hidden1=model.layers[1] #can call each layer by the index
hidden1.name
'dense_3'
weights,biases=hidden1.get_weights() #get ech layers parameters by calling get_weights funtion
weights
array([[-0.00329745, 0.055204 , 0.066624 , ..., 0.0559136 ,
-0.05367599, 0.02627006],
[ 0.02993034, 0.0679446 , 0.03813273, ..., -0.04538053,
0.02183852, 0.00674835],
[-0.0256972 , 0.03161852, 0.07265496, ..., 0.0385574 ,
-0.02168931, 0.04781467],
...,
[ 0.03442346, -0.01825543, 0.01425272, ..., -0.00487291,
-0.03699834, -0.03362278],
[-0.07326835, -0.03743588, -0.03252079, ..., 0.0719367 ,
0.04672862, 0.04777335],
[-0.01954559, -0.0738189 , 0.06967571, ..., 0.04953486,
0.03712752, 0.02743707]], dtype=float32)
Compiling the model¶
model.compile(
loss='sparse_categorical_crossentropy',
optimizer='sgd',
metrics=['accuracy']
)
loss-because we have spares lables(for each instance there is only one target)
optimizer-stochastic gradient descent
useful to measure accuracy
Training and evaluating the model¶
history=model.fit(X_train,y_train,epochs=30,
validation_data=(X_valid,y_valid)) #instead od validation_data we can do validation_split=0.1 etc..(use 10% of data before shuffling for validation)
Epoch 1/30 1719/1719 ━━━━━━━━━━━━━━━━━━━━ 7s 4ms/step - accuracy: 0.6673 - loss: 1.0451 - val_accuracy: 0.8332 - val_loss: 0.4993 Epoch 2/30 1719/1719 ━━━━━━━━━━━━━━━━━━━━ 6s 4ms/step - accuracy: 0.8212 - loss: 0.5116 - val_accuracy: 0.8474 - val_loss: 0.4474 Epoch 3/30 1719/1719 ━━━━━━━━━━━━━━━━━━━━ 6s 3ms/step - accuracy: 0.8415 - loss: 0.4538 - val_accuracy: 0.8582 - val_loss: 0.4227 Epoch 4/30 1719/1719 ━━━━━━━━━━━━━━━━━━━━ 6s 3ms/step - accuracy: 0.8511 - loss: 0.4199 - val_accuracy: 0.8656 - val_loss: 0.3993 Epoch 5/30 1719/1719 ━━━━━━━━━━━━━━━━━━━━ 5s 3ms/step - accuracy: 0.8606 - loss: 0.3997 - val_accuracy: 0.8618 - val_loss: 0.3916 Epoch 6/30 1719/1719 ━━━━━━━━━━━━━━━━━━━━ 6s 3ms/step - accuracy: 0.8626 - loss: 0.3848 - val_accuracy: 0.8692 - val_loss: 0.3779 Epoch 7/30 1719/1719 ━━━━━━━━━━━━━━━━━━━━ 5s 3ms/step - accuracy: 0.8700 - loss: 0.3671 - val_accuracy: 0.8692 - val_loss: 0.3773 Epoch 8/30 1719/1719 ━━━━━━━━━━━━━━━━━━━━ 6s 4ms/step - accuracy: 0.8767 - loss: 0.3511 - val_accuracy: 0.8710 - val_loss: 0.3628 Epoch 9/30 1719/1719 ━━━━━━━━━━━━━━━━━━━━ 6s 4ms/step - accuracy: 0.8767 - loss: 0.3452 - val_accuracy: 0.8696 - val_loss: 0.3654 Epoch 10/30 1719/1719 ━━━━━━━━━━━━━━━━━━━━ 6s 3ms/step - accuracy: 0.8773 - loss: 0.3420 - val_accuracy: 0.8700 - val_loss: 0.3630 Epoch 11/30 1719/1719 ━━━━━━━━━━━━━━━━━━━━ 5s 3ms/step - accuracy: 0.8813 - loss: 0.3328 - val_accuracy: 0.8766 - val_loss: 0.3437 Epoch 12/30 1719/1719 ━━━━━━━━━━━━━━━━━━━━ 6s 3ms/step - accuracy: 0.8863 - loss: 0.3186 - val_accuracy: 0.8818 - val_loss: 0.3297 Epoch 13/30 1719/1719 ━━━━━━━━━━━━━━━━━━━━ 7s 4ms/step - accuracy: 0.8875 - loss: 0.3119 - val_accuracy: 0.8816 - val_loss: 0.3277 Epoch 14/30 1719/1719 ━━━━━━━━━━━━━━━━━━━━ 7s 4ms/step - accuracy: 0.8928 - loss: 0.3027 - val_accuracy: 0.8826 - val_loss: 0.3278 Epoch 15/30 1719/1719 ━━━━━━━━━━━━━━━━━━━━ 7s 4ms/step - accuracy: 0.8909 - loss: 0.3006 - val_accuracy: 0.8846 - val_loss: 0.3184 Epoch 16/30 1719/1719 ━━━━━━━━━━━━━━━━━━━━ 9s 5ms/step - accuracy: 0.8964 - loss: 0.2892 - val_accuracy: 0.8848 - val_loss: 0.3201 Epoch 17/30 1719/1719 ━━━━━━━━━━━━━━━━━━━━ 7s 4ms/step - accuracy: 0.8971 - loss: 0.2854 - val_accuracy: 0.8826 - val_loss: 0.3180 Epoch 18/30 1719/1719 ━━━━━━━━━━━━━━━━━━━━ 8s 4ms/step - accuracy: 0.8993 - loss: 0.2785 - val_accuracy: 0.8868 - val_loss: 0.3079 Epoch 19/30 1719/1719 ━━━━━━━━━━━━━━━━━━━━ 6s 3ms/step - accuracy: 0.9004 - loss: 0.2753 - val_accuracy: 0.8854 - val_loss: 0.3201 Epoch 20/30 1719/1719 ━━━━━━━━━━━━━━━━━━━━ 6s 3ms/step - accuracy: 0.9025 - loss: 0.2712 - val_accuracy: 0.8818 - val_loss: 0.3218 Epoch 21/30 1719/1719 ━━━━━━━━━━━━━━━━━━━━ 5s 3ms/step - accuracy: 0.9034 - loss: 0.2659 - val_accuracy: 0.8864 - val_loss: 0.3068 Epoch 22/30 1719/1719 ━━━━━━━━━━━━━━━━━━━━ 5s 3ms/step - accuracy: 0.9085 - loss: 0.2551 - val_accuracy: 0.8862 - val_loss: 0.3132 Epoch 23/30 1719/1719 ━━━━━━━━━━━━━━━━━━━━ 5s 3ms/step - accuracy: 0.9081 - loss: 0.2573 - val_accuracy: 0.8880 - val_loss: 0.3074 Epoch 24/30 1719/1719 ━━━━━━━━━━━━━━━━━━━━ 6s 4ms/step - accuracy: 0.9089 - loss: 0.2524 - val_accuracy: 0.8886 - val_loss: 0.3003 Epoch 25/30 1719/1719 ━━━━━━━━━━━━━━━━━━━━ 6s 3ms/step - accuracy: 0.9117 - loss: 0.2453 - val_accuracy: 0.8938 - val_loss: 0.2993 Epoch 26/30 1719/1719 ━━━━━━━━━━━━━━━━━━━━ 5s 3ms/step - accuracy: 0.9111 - loss: 0.2423 - val_accuracy: 0.8844 - val_loss: 0.3162 Epoch 27/30 1719/1719 ━━━━━━━━━━━━━━━━━━━━ 6s 3ms/step - accuracy: 0.9142 - loss: 0.2389 - val_accuracy: 0.8906 - val_loss: 0.3090 Epoch 28/30 1719/1719 ━━━━━━━━━━━━━━━━━━━━ 6s 3ms/step - accuracy: 0.9165 - loss: 0.2326 - val_accuracy: 0.8924 - val_loss: 0.3022 Epoch 29/30 1719/1719 ━━━━━━━━━━━━━━━━━━━━ 6s 4ms/step - accuracy: 0.9161 - loss: 0.2293 - val_accuracy: 0.8926 - val_loss: 0.3014 Epoch 30/30 1719/1719 ━━━━━━━━━━━━━━━━━━━━ 6s 3ms/step - accuracy: 0.9200 - loss: 0.2252 - val_accuracy: 0.8912 - val_loss: 0.2997
if the performance on the training set is much better than on the validation set mostlikely model is overfitting the training set or a bug
when calling fit() method we can call class_weight argument id some classes being over or underrepresented if so it will balance the weight eaqually
Training parameters¶
history.params
{'verbose': 'auto', 'epochs': 30, 'steps': 1719}
List of epochs¶
history.epoch
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
Dictionary containing a list of loss and extra metrics it measured at the end of each epoch¶
history.history
{'accuracy': [0.756672739982605,
0.8289090991020203,
0.8434908986091614,
0.8537818193435669,
0.8593636155128479,
0.8651272654533386,
0.8691818118095398,
0.8733817934989929,
0.876763641834259,
0.8799818158149719,
0.8820727467536926,
0.8853999972343445,
0.88718181848526,
0.8910727500915527,
0.8918545246124268,
0.8947091102600098,
0.8972545266151428,
0.899327278137207,
0.9008181691169739,
0.903145432472229,
0.9047818183898926,
0.9071454405784607,
0.9079272747039795,
0.9095090627670288,
0.9120000004768372,
0.9125090837478638,
0.914545476436615,
0.916345477104187,
0.9165818095207214,
0.9180545210838318],
'loss': [0.7378497123718262,
0.48883289098739624,
0.4449765384197235,
0.4175524413585663,
0.3985403776168823,
0.3820713460445404,
0.36844417452812195,
0.3559795916080475,
0.3463549017906189,
0.3368052840232849,
0.3285277485847473,
0.31977418065071106,
0.31097421050071716,
0.30485057830810547,
0.2986160218715668,
0.2914504408836365,
0.28552526235580444,
0.2800866365432739,
0.27491796016693115,
0.2690766453742981,
0.2633378207683563,
0.25915294885635376,
0.2552708387374878,
0.25015559792518616,
0.24597640335559845,
0.24102188646793365,
0.23761503398418427,
0.23326702415943146,
0.2296309769153595,
0.22661589086055756],
'val_accuracy': [0.8331999778747559,
0.8474000096321106,
0.8582000136375427,
0.8655999898910522,
0.8618000149726868,
0.8691999912261963,
0.8691999912261963,
0.8709999918937683,
0.8695999979972839,
0.8700000047683716,
0.8766000270843506,
0.8817999958992004,
0.881600022315979,
0.8826000094413757,
0.8845999836921692,
0.8848000168800354,
0.8826000094413757,
0.8867999911308289,
0.8853999972343445,
0.8817999958992004,
0.8863999843597412,
0.8862000107765198,
0.8880000114440918,
0.8885999917984009,
0.8938000202178955,
0.8844000101089478,
0.8906000256538391,
0.8924000263214111,
0.8925999999046326,
0.8912000060081482],
'val_loss': [0.49927231669425964,
0.4473873972892761,
0.42269834876060486,
0.39929434657096863,
0.39162975549697876,
0.3779332935810089,
0.3772680163383484,
0.3627755641937256,
0.36535489559173584,
0.3629917502403259,
0.3437325060367584,
0.3296666741371155,
0.3277183175086975,
0.32775723934173584,
0.31836092472076416,
0.3201417326927185,
0.317959189414978,
0.30793848633766174,
0.32010623812675476,
0.321804940700531,
0.3068252205848694,
0.3132171034812927,
0.30735939741134644,
0.30028796195983887,
0.29932069778442383,
0.316245973110199,
0.3089560270309448,
0.3021879494190216,
0.301445871591568,
0.29968366026878357]}
import pandas as pd
import matplotlib.pyplot as plt
pd.DataFrame(history.history).plot(figsize=(8,5))
plt.grid(True)
plt.gca().set_ylim(0,1) # vertical range from 0 to 1
(0.0, 1.0)
model.evaluate(X_test,y_test)
313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8812 - loss: 0.3327
[0.33464494347572327, 0.8809999823570251]
Make predictions¶
X_new=X_test[:3] #take first 3 samples
y_proba=model.predict(X_new)
y_proba.round(2) #round to 2 decimal places
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 61ms/step
array([[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.01, 0. , 0.98],
[0. , 0. , 1. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
[0. , 1. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ]],
dtype=float32)
for each instance model estimates one probability per class, class with highest probebility is the class
model.predict_classes(X_test[0])
y_test[:3] #take first 3 samples
y_test[:3] == y_proba.argmax(axis=1) #compare the predicted with the actual values
#model.save('my_model.h5') #save the model
array([ True, True, True])