Hackathon - 6.0

Category - A: AI - Natural Disaster Prediction
import tensorflow as tf
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
tf.print(tf.__version__)
2.7.0
data = pd.read_csv('./data/rainfall-in-india-1901-2015.csv')
data.tail(20)
SUBDIVISION YEAR JAN FEB MAR APR MAY JUN JUL AUG SEP OCT NOV DEC ANNUAL Jan-Feb Mar-May Jun-Sep Oct-Dec
4096 LAKSHADWEEP 1996 44.7 1.1 1.6 17.4 50.0 427.1 335.3 197.3 230.4 109.0 60.5 131.6 1606.0 45.8 69.0 1190.1 301.1
4097 LAKSHADWEEP 1997 2.2 0.1 4.9 33.8 62.3 307.0 459.6 216.8 144.0 213.5 200.8 119.7 1764.7 2.3 101.0 1127.4 534.0
4098 LAKSHADWEEP 1998 52.0 0.0 1.8 40.3 68.2 382.0 388.8 196.7 274.7 184.8 144.1 253.5 1986.9 52.0 110.3 1242.2 582.4
4099 LAKSHADWEEP 1999 47.8 2.5 18.3 20.6 416.7 279.6 459.4 133.8 73.4 305.0 51.2 49.0 1857.3 50.3 455.6 946.2 405.2
4100 LAKSHADWEEP 2000 83.3 18.9 3.4 47.9 204.6 225.4 95.5 319.9 164.5 141.4 56.3 11.0 1372.1 102.2 255.9 805.3 208.7
4101 LAKSHADWEEP 2001 4.4 20.4 0.0 104.6 187.3 283.9 198.9 144.3 213.5 105.2 101.5 16.6 1380.6 24.8 291.9 840.6 223.3
4102 LAKSHADWEEP 2002 10.8 16.8 7.2 23.4 189.8 261.8 81.3 143.9 50.0 178.2 52.9 17.4 1033.5 27.6 220.4 537.0 248.5
4103 LAKSHADWEEP 2003 11.8 18.2 28.5 18.1 109.6 364.5 400.6 92.1 84.3 191.6 206.1 7.5 1532.9 30.0 156.2 941.5 405.2
4104 LAKSHADWEEP 2004 7.2 1.5 1.9 7.7 330.2 251.2 280.8 169.5 200.0 193.4 107.6 2.2 1553.2 8.7 339.8 901.5 303.2
4105 LAKSHADWEEP 2005 17.6 11.1 0.0 37.0 92.8 248.5 378.9 102.4 278.0 164.2 218.3 26.6 1575.4 28.7 129.8 1007.8 409.1
4106 LAKSHADWEEP 2006 20.1 0.0 33.0 0.3 327.9 286.9 172.3 150.7 318.5 119.1 158.9 10.9 1598.6 20.1 361.2 928.4 288.9
4107 LAKSHADWEEP 2007 2.5 4.2 0.2 22.2 166.2 573.4 427.4 294.7 457.5 256.1 47.6 109.6 2361.6 6.7 188.6 1753.0 413.3
4108 LAKSHADWEEP 2008 5.5 19.8 120.7 15.8 180.4 254.6 363.9 206.6 108.9 252.9 67.6 130.1 1726.8 25.3 316.9 934.0 450.6
4109 LAKSHADWEEP 2009 4.7 1.5 0.1 18.1 162.1 401.2 266.4 185.0 145.1 87.4 166.2 132.3 1570.1 6.2 180.3 997.7 385.9
4110 LAKSHADWEEP 2010 18.8 0.0 1.2 35.6 79.0 318.9 336.7 335.1 161.5 155.4 201.5 81.5 1725.2 18.8 115.8 1152.2 438.4
4111 LAKSHADWEEP 2011 5.1 2.8 3.1 85.9 107.2 153.6 350.2 254.0 255.2 117.4 184.3 14.9 1533.7 7.9 196.2 1013.0 316.6
4112 LAKSHADWEEP 2012 19.2 0.1 1.6 76.8 21.2 327.0 231.5 381.2 179.8 145.9 12.4 8.8 1405.5 19.3 99.6 1119.5 167.1
4113 LAKSHADWEEP 2013 26.2 34.4 37.5 5.3 88.3 426.2 296.4 154.4 180.0 72.8 78.1 26.7 1426.3 60.6 131.1 1057.0 177.6
4114 LAKSHADWEEP 2014 53.2 16.1 4.4 14.9 57.4 244.1 116.1 466.1 132.2 169.2 59.0 62.3 1395.0 69.3 76.7 958.5 290.5
4115 LAKSHADWEEP 2015 2.2 0.5 3.7 87.1 133.1 296.6 257.5 146.4 160.4 165.4 231.0 159.0 1642.9 2.7 223.9 860.9 555.4
subdivision = data["SUBDIVISION"].unique()
len(subdivision)
36
label_sub = le.fit_transform(data["SUBDIVISION"])
updated_data = data.drop("SUBDIVISION", axis="columns")
updated_data.insert(0, "SUBDIVISION", label_sub)
updated_data
SUBDIVISION YEAR JAN FEB MAR APR MAY JUN JUL AUG SEP OCT NOV DEC ANNUAL Jan-Feb Mar-May Jun-Sep Oct-Dec
0 0 1901 49.2 87.1 29.2 2.3 528.8 517.5 365.1 481.1 332.6 388.5 558.2 33.6 3373.2 136.3 560.3 1696.3 980.3
1 0 1902 0.0 159.8 12.2 0.0 446.1 537.1 228.9 753.7 666.2 197.2 359.0 160.5 3520.7 159.8 458.3 2185.9 716.7
2 0 1903 12.7 144.0 0.0 1.0 235.1 479.9 728.4 326.7 339.0 181.2 284.4 225.0 2957.4 156.7 236.1 1874.0 690.6
3 0 1904 9.4 14.7 0.0 202.4 304.5 495.1 502.0 160.1 820.4 222.2 308.7 40.1 3079.6 24.1 506.9 1977.6 571.0
4 0 1905 1.3 0.0 3.3 26.9 279.5 628.7 368.7 330.5 297.0 260.7 25.4 344.7 2566.7 1.3 309.7 1624.9 630.8
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
4111 18 2011 5.1 2.8 3.1 85.9 107.2 153.6 350.2 254.0 255.2 117.4 184.3 14.9 1533.7 7.9 196.2 1013.0 316.6
4112 18 2012 19.2 0.1 1.6 76.8 21.2 327.0 231.5 381.2 179.8 145.9 12.4 8.8 1405.5 19.3 99.6 1119.5 167.1
4113 18 2013 26.2 34.4 37.5 5.3 88.3 426.2 296.4 154.4 180.0 72.8 78.1 26.7 1426.3 60.6 131.1 1057.0 177.6
4114 18 2014 53.2 16.1 4.4 14.9 57.4 244.1 116.1 466.1 132.2 169.2 59.0 62.3 1395.0 69.3 76.7 958.5 290.5
4115 18 2015 2.2 0.5 3.7 87.1 133.1 296.6 257.5 146.4 160.4 165.4 231.0 159.0 1642.9 2.7 223.9 860.9 555.4

4116 rows × 19 columns

for x in updated_data.columns:
    if updated_data[x].isnull().sum()>1:
        print(f"{x} : {updated_data[x].isnull().sum()}")
SUBDIVISION : 0
YEAR : 0
JAN : 0
FEB : 0
MAR : 0
APR : 0
MAY : 0
JUN : 0
JUL : 0
AUG : 0
SEP : 0
OCT : 0
NOV : 0
DEC : 0
ANNUAL : 0
Jan-Feb : 0
Mar-May : 0
Jun-Sep : 0
Oct-Dec : 0
for x in updated_data.columns:
    updated_data[x]=updated_data[x].fillna(updated_data[x].mean())

updated_data[:30]
SUBDIVISION YEAR JAN FEB MAR APR MAY JUN JUL AUG SEP OCT NOV DEC ANNUAL Jan-Feb Mar-May Jun-Sep Oct-Dec
0 0 1901 49.2 87.1 29.2 2.3 528.8 517.5 365.1 481.1 332.6 388.5 558.2 33.6 3373.2 136.3 560.3 1696.3 980.3
1 0 1902 0.0 159.8 12.2 0.0 446.1 537.1 228.9 753.7 666.2 197.2 359.0 160.5 3520.7 159.8 458.3 2185.9 716.7
2 0 1903 12.7 144.0 0.0 1.0 235.1 479.9 728.4 326.7 339.0 181.2 284.4 225.0 2957.4 156.7 236.1 1874.0 690.6
3 0 1904 9.4 14.7 0.0 202.4 304.5 495.1 502.0 160.1 820.4 222.2 308.7 40.1 3079.6 24.1 506.9 1977.6 571.0
4 0 1905 1.3 0.0 3.3 26.9 279.5 628.7 368.7 330.5 297.0 260.7 25.4 344.7 2566.7 1.3 309.7 1624.9 630.8
5 0 1906 36.6 0.0 0.0 0.0 556.1 733.3 247.7 320.5 164.3 267.8 128.9 79.2 2534.4 36.6 556.1 1465.8 475.9
6 0 1907 110.7 0.0 113.3 21.6 616.3 305.2 443.9 377.6 200.4 264.4 648.9 245.6 3347.9 110.7 751.2 1327.1 1158.9
7 0 1908 20.9 85.1 0.0 29.0 562.0 693.6 481.4 699.9 428.8 170.7 208.1 196.9 3576.4 106.0 591.0 2303.7 575.7
8 0 1910 26.6 22.7 206.3 89.3 224.5 472.7 264.3 337.4 626.6 208.2 267.3 153.5 2899.4 49.3 520.1 1701.0 629.0
9 0 1911 0.0 8.4 0.0 122.5 327.3 649.0 253.0 187.1 464.5 333.8 94.5 247.1 2687.2 8.4 449.8 1553.6 675.4
10 0 1912 583.7 0.8 0.0 21.9 140.7 549.8 468.9 370.3 386.2 318.7 117.2 2.3 2960.5 584.5 162.6 1775.2 438.2
11 0 1913 84.8 0.5 1.3 2.5 190.7 530.0 280.8 205.8 580.1 288.8 133.0 67.5 2365.8 85.3 194.5 1596.7 489.3
12 0 1914 0.0 0.0 0.0 37.7 298.8 383.3 792.8 520.5 310.8 139.8 184.4 289.7 2957.8 0.0 336.5 2007.4 613.9
13 0 1915 45.0 56.7 33.3 40.9 170.2 334.7 269.0 317.2 429.8 468.1 258.4 318.0 2741.3 101.7 244.4 1350.7 1044.5
14 0 1916 0.0 0.0 0.0 0.5 487.4 450.1 317.3 425.0 561.2 369.7 192.6 133.7 2937.5 0.0 487.9 1753.6 696.0
15 0 1917 8.0 3.6 112.0 4.5 295.9 301.1 394.8 437.4 471.8 238.1 108.3 236.9 2612.4 11.6 412.4 1605.1 583.3
16 0 1918 77.4 6.9 11.4 10.7 729.3 710.8 200.9 455.4 303.3 227.0 366.9 175.0 3275.0 84.3 751.4 1670.4 768.9
17 0 1919 10.2 18.0 0.0 35.5 283.9 542.5 246.5 259.8 170.7 186.2 340.4 258.4 2352.1 28.2 319.4 1219.5 785.0
18 0 1920 122.3 7.4 3.1 13.0 237.4 546.9 294.4 467.4 505.4 397.5 262.9 85.5 2943.2 129.7 253.5 1814.1 745.9
19 0 1921 13.2 3.1 0.0 37.5 351.2 282.7 487.1 330.0 581.2 360.7 118.2 41.5 2606.4 16.3 388.7 1681.0 520.4
20 0 1922 245.3 34.3 15.6 323.1 289.7 506.1 425.8 307.4 511.7 162.0 541.0 192.2 3554.2 279.6 628.4 1751.0 895.2
21 0 1923 79.5 0.0 0.0 91.3 293.5 808.4 636.9 182.2 560.5 131.9 197.4 70.6 0.0 79.5 0.0 2188.0 399.9
22 0 1924 28.7 0.0 14.8 89.7 191.2 261.2 493.3 290.9 251.2 331.1 378.6 0.0 0.0 28.7 295.7 1296.6 0.0
23 0 1925 36.6 0.0 8.6 50.4 282.2 663.8 241.8 278.2 201.9 249.5 271.5 196.0 2480.5 36.6 341.2 1385.7 717.0
24 0 1926 122.1 0.0 0.0 0.5 198.4 370.0 195.3 523.7 719.3 443.8 148.4 560.7 3282.2 122.1 198.9 1808.3 1152.9
25 0 1927 3.0 17.5 17.8 108.6 504.1 433.3 195.2 370.1 126.2 327.5 274.1 65.5 2442.9 20.5 630.5 1124.8 667.1
26 0 1928 50.9 67.6 80.7 129.3 499.5 410.2 406.3 391.5 404.8 444.5 99.5 13.5 2998.3 118.5 709.5 1612.8 557.5
27 0 1929 74.2 118.4 129.2 69.8 316.6 588.8 134.0 644.7 172.9 413.0 251.5 13.5 2926.6 192.6 515.6 1540.4 678.0
28 0 1930 87.4 105.4 131.2 10.9 231.5 533.6 317.9 446.7 677.2 82.3 249.4 201.6 3075.1 192.8 373.6 1975.4 533.3
29 0 1931 25.3 0.0 2.5 2.5 205.4 393.5 289.3 571.0 294.4 368.3 22.8 182.7 2357.7 25.3 210.4 1548.2 573.8
import os

if os.path.isfile("./data/updated-data.csv") is False:
    updated_data.to_csv("./data/updated-data.csv", index=False)
updated_shuffled_data = updated_data.sample(frac = 1)
import os

if os.path.isfile("./data/updated-shuffled-data.csv") is False:
    updated_shuffled_data.to_csv("./data/updated-shuffled-data.csv", index=False)
updated_shuffled_data.shape
(4116, 19)
updated_shuffled_data
SUBDIVISION YEAR JAN FEB MAR APR MAY JUN JUL AUG SEP OCT NOV DEC ANNUAL Jan-Feb Mar-May Jun-Sep Oct-Dec
2059 33 1913 0.0 7.6 3.0 0.0 28.2 168.7 290.0 220.4 58.5 0.4 0.3 13.4 790.5 7.6 31.2 737.7 14.1
2422 26 1931 0.2 0.6 0.0 0.1 3.1 8.1 115.0 119.2 20.8 60.1 2.5 0.0 329.7 0.7 3.2 263.1 62.6
1957 8 1926 6.1 1.1 28.7 2.5 18.6 13.9 216.1 342.6 202.3 0.2 0.0 0.2 832.4 7.3 49.8 774.9 0.4
1470 12 2014 13.0 17.3 26.8 7.5 20.3 25.9 72.3 34.8 67.3 10.5 0.2 9.6 305.5 30.3 54.6 200.2 20.3
2554 17 1948 10.5 0.0 0.2 2.3 7.2 599.3 1080.4 797.3 376.6 77.7 168.1 0.0 3119.8 10.5 9.8 2853.6 245.9
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1842 34 1926 5.4 0.3 19.4 0.8 21.4 4.0 95.6 179.4 113.6 0.0 0.0 0.3 440.2 5.7 41.6 392.6 0.3
398 21 1977 14.2 34.9 27.7 383.8 303.7 452.0 389.1 355.8 193.0 149.6 38.2 16.6 2358.5 49.1 715.1 1389.9 204.4
416 21 1995 5.4 36.5 52.0 68.7 254.5 425.8 403.6 495.9 312.5 129.7 155.3 0.2 2340.2 41.9 375.2 1637.9 285.3
533 28 1997 18.9 31.3 76.4 119.5 166.9 613.3 488.1 479.0 443.9 41.3 16.3 56.5 2551.4 50.2 362.9 2024.2 114.1
1718 14 1917 63.6 34.2 92.7 145.0 67.9 155.4 170.3 231.4 219.5 129.6 0.0 76.5 1386.1 97.8 305.6 776.7 206.0

4116 rows × 19 columns

X = np.array(updated_shuffled_data.iloc[:,0])
y = np.array(updated_shuffled_data.iloc[:, 2:15])
y.shape
(4116, 13)

Spliting Data into three set (train, test, validation)

Using the principle 70:15:15 %

X_train = X[:2882]
X_train =  np.stack(X_train, axis=0)  
y_train = y[:2882]

len(X_train), len(y_train), X_train.shape
(2882, 2882, (2882,))
X_test = X[2882:3499]
y_test = y[2882:3499]
X_val = X[3440:4116]
y_val = y[3449:4116]
histogram = updated_shuffled_data
histogram.hist()
plt.show()
Building a multi-label classification
model = tf.keras.Sequential([
    tf.keras.layers.Dense(30, activation="relu", name="hidden_layer1", input_shape=(1,)),
    tf.keras.layers.Dense(20, activation="relu", name="hidden_layer2"),
    tf.keras.layers.Dense(13, activation="sigmoid", name="output_layer")

])
model.compile(
    loss = tf.keras.losses.BinaryCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(learning_rate = 0.001),
    metrics=["accuracy"]
)
history = model.fit(
     X_train,
     y_train,
     batch_size = 32,
     epochs=10,
    validation_data=(X_test, y_test)
)
Epoch 1/10
91/91 [==============================] - 2s 7ms/step - loss: nan - accuracy: 6.9396e-04 - val_loss: nan - val_accuracy: 0.0016
Epoch 2/10
91/91 [==============================] - 0s 5ms/step - loss: nan - accuracy: 0.0000e+00 - val_loss: nan - val_accuracy: 0.0016
Epoch 3/10
91/91 [==============================] - 0s 5ms/step - loss: nan - accuracy: 0.0000e+00 - val_loss: nan - val_accuracy: 0.0016
Epoch 4/10
91/91 [==============================] - 0s 5ms/step - loss: nan - accuracy: 0.0000e+00 - val_loss: nan - val_accuracy: 0.0016
Epoch 5/10
91/91 [==============================] - 0s 4ms/step - loss: nan - accuracy: 0.0000e+00 - val_loss: nan - val_accuracy: 0.0016
Epoch 6/10
91/91 [==============================] - 0s 4ms/step - loss: nan - accuracy: 0.0000e+00 - val_loss: nan - val_accuracy: 0.0016
Epoch 7/10
91/91 [==============================] - 0s 4ms/step - loss: nan - accuracy: 0.0000e+00 - val_loss: nan - val_accuracy: 0.0016
Epoch 8/10
91/91 [==============================] - 0s 4ms/step - loss: nan - accuracy: 0.0000e+00 - val_loss: nan - val_accuracy: 0.0016
Epoch 9/10
91/91 [==============================] - 0s 5ms/step - loss: nan - accuracy: 0.0000e+00 - val_loss: nan - val_accuracy: 0.0016
Epoch 10/10
91/91 [==============================] - 0s 4ms/step - loss: nan - accuracy: 0.0000e+00 - val_loss: nan - val_accuracy: 0.0016
model.evaluate(X_val, y_val)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_380/658241619.py in <module>
----> 1 model.evaluate(X_val, y_val)

~\anaconda3\envs\Tensorflow_@\lib\site-packages\keras\utils\traceback_utils.py in error_handler(*args, **kwargs)
     65     except Exception as e:  # pylint: disable=broad-except
     66       filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67       raise e.with_traceback(filtered_tb) from None
     68     finally:
     69       del filtered_tb

~\anaconda3\envs\Tensorflow_@\lib\site-packages\keras\engine\data_adapter.py in _check_data_cardinality(data)
   1655                            for i in tf.nest.flatten(single_data)))
   1656     msg += "Make sure all arrays contain the same number of samples."
-> 1657     raise ValueError(msg)
   1658 
   1659 

ValueError: Data cardinality is ambiguous:
  x sizes: 676
  y sizes: 667
Make sure all arrays contain the same number of samples.
X.squeeze()
array([33, 26,  8, ..., 21, 28, 14])
X
array([33, 26,  8, ..., 21, 28, 14])