Data analytics with Python
19.04.2021
Data analysis try to understand data by placing it in a visual context, use programming tools to analyze complex data in distinct scenarios in the real world.

In this article, we will use python libraries to create plots and analysis using:
Matplotlib ( https://matplotlib.org/ )
Pandas (https://pandas.pydata.org/docs/ )
Plotly (https://plotly.com/python/)
Using pandas read_csv
- import pandas as pd
- iris = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', names=['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class'])
- print(iris.head())

Scatter Plot in matplotlib
- import matplotlib.pyplot as plt
- fig, ax = plt.subplots()
- # scatter the sepal_length against the sepal_width
- ax.scatter(iris['sepal_length'], iris['sepal_width'])
- # set a title and labels
- ax.set_title('Iris Dataset')
- ax.set_xlabel('sepal_length')
- ax.set_ylabel('sepal_width')

- # create color dictionary
- colors = {'Iris-setosa':'r', 'Iris-versicolor':'g', 'Iris-virginica':'b'}
- # create a figure and axis
- fig, ax = plt.subplots()
- # plot each data-point
- for i in range(len(iris['sepal_length'])):
- ax.scatter(iris['sepal_length'][i], iris['sepal_width'][i],color=colors[iris['class'][i]])
- # set a title and labels
- ax.set_title('Iris Dataset')
- ax.set_xlabel('sepal_length')
- ax.set_ylabel('sepal_width')

- # get columns to plot
- columns = iris.columns.drop(['class'])
- # create x data
- x_data = range(0, iris.shape[0])
- # create figure and axis
- fig, ax = plt.subplots()
- # plot each column
- for column in columns:
- ax.plot(x_data, iris[column], label=column)
- # set title and legend
- ax.set_title('Iris Dataset')
- ax.legend()

- #Multiple Histograms
- iris.plot.hist(subplots=True, layout=(2,2), figsize=(10, 10), bins=20)

- import numpy as np
- # get correlation matrix
- corr = iris.corr()
- fig, ax = plt.subplots()
- # create heatmap
- im = ax.imshow(corr.values)
- # set labels
- ax.set_xticks(np.arange(len(corr.columns)))
- ax.set_yticks(np.arange(len(corr.columns)))
- ax.set_xticklabels(corr.columns)
- ax.set_yticklabels(corr.columns)
- # Rotate the tick labels and set their alignment.
- plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
- rotation_mode="anchor")

- # get correlation matrix
- corr = iris.corr()
- fig, ax = plt.subplots()
- # create heatmap
- im = ax.imshow(corr.values)
- # set labels
- ax.set_xticks(np.arange(len(corr.columns)))
- ax.set_yticks(np.arange(len(corr.columns)))
- ax.set_xticklabels(corr.columns)
- ax.set_yticklabels(corr.columns)
- # Rotate the tick labels and set their alignment.
- plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
- rotation_mode="anchor")
- # Loop over data dimensions and create text annotations.
- for i in range(len(corr.columns)):
- for j in range(len(corr.columns)):
- text = ax.text(j, i, np.around(corr.iloc[i, j], decimals=2),
- ha="center", va="center", color="black")

In this article, we looked at Matplotlib visualization with Python and The code is available in github in:
https://github.com/Tomas10000/data_analytics_python/blob/master/data_analytics_in_python.ipynb