Thursday, 31 March 2022

Pandos #04 -Data Aggregation

AGGREGATE

Data Frame Aggregation

Python has several methods are available to perform aggregations on data. It is done using the pandas and numpy libraries. Data Frame method support data aggregation. Let us see how we can apply:

import pandas as pd
import numpy as np

df = pd.DataFrame([[1, 2, 3, 4, 5],
                   [4, 5, 6, 7, 8],
                   [7, 8, 9, 10, 11],
                   [np.nan, np.nan, np.nan,np.nan,np.nan]],
                  columns=['A', 'B', 'C', 'D', 'E'])

# over rows
dfagg = df.agg(['sum', 'min'])

print(dfagg)

"""
       A     B     C     D     E
sum  12.0  15.0  18.0  21.0  24.0
min   1.0   2.0   3.0   4.0   5.0
"""

Aggregating different aggregates over columns

# Different aggregate functions in columns

df.agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']})
print(df.agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']}))
"""
       A    B
sum  12.0  NaN
min   1.0  2.0
max   NaN  8.0
"""

This describe() method display all statistical properties like min, max, mean, std, 25%, 50%, 75% quartiles.

print(df.describe())

"""
        A    B    C     D     E
count  3.0  3.0  3.0   3.0   3.0
mean   4.0  5.0  6.0   7.0   8.0
std    3.0  3.0  3.0   3.0   3.0
min    1.0  2.0  3.0   4.0   5.0
25%    2.5  3.5  4.5   5.5   6.5
50%    4.0  5.0  6.0   7.0   8.0
75%    5.5  6.5  7.5   8.5   9.5
max    7.0  8.0  9.0  10.0  11.0
"""

Transformation and manipulation on elements are very easy. Let us see some code snippets

Let us assume we want add +1 to all the above elements.

print(df.transform(lambda x: x + 1))
"""
     A    B     C     D     E
0  2.0  3.0   4.0   5.0   6.0
1  5.0  6.0   7.0   8.0   9.0
2  8.0  9.0  10.0  11.0  12.0
3  NaN  NaN   NaN   NaN   NaN
"""

We an use groupby() too.


df = pd.DataFrame({
    "Date": [
        "2019-05-08", "2019-05-07", "2019-05-06", "2019-05-05",
        "2019-05-08", "2019-05-07", "2019-05-06", "2019-05-05"],
    "Data": [5, 8, 6, 1, 50, 100, 60, 120],
})
print(df)
"""
        Date  Data
0  2019-05-08     5
1  2019-05-07     8
2  2019-05-06     6
3  2019-05-05     1
4  2019-05-08    50
5  2019-05-07   100
6  2019-05-06    60
7  2019-05-05   120
"""

print(df.groupby('Date')['Data'].transform('sum'))

"""
0     55
1    108
2     66
3    121
4     55
5    108
6     66
7    121
Name: Data, dtype: int64
"""

We can groupby different levels of a hierarchical index using the level parameter: Please note the usage of pd.MultiIndex.from.arrays() Method. So many ways, you can group by multiindex.


# Assume the array is like these : for concept..
arrays = [['male', 'male', 'female', 'female'],
          ['young', 'old', 'young', 'old']]


index = pd.MultiIndex.from_arrays(arrays, names=('Gender', 'Type'))
df = pd.DataFrame({'Max Enthu': [390., 350., 30., 20.]},
                  index=index)

print(df)


"""
Name: Data, dtype: int64
              Max Enthu
Gender Type            
male   young      390.0
       old        350.0
female young       30.0
       old         20.0
"""


# using level 0
print(df.groupby(level=0).mean())
"""
        Max Enthu
Gender           
female       25.0
male        370.0

"""


# Using level
print(df.groupby(level="Gender").mean())
"""
       Max Enthu
Type            
old        185.0
young      210.0
"""


print(df.groupby(level="Gender").mean())
"""
       Max Enthu
Gender           
female       25.0
male        370.0
"""

Happy Learning at AMET!!!

Tuesday, 29 March 2022

Fundas2

Some Basic Functions


# for <loop_variable> in <iterable>:
#     <code>
for i in range(10):
    print(i)
print(25*'-')
# for <loop_variable> in range(<start>, <stop>, <step>):
#     <code>
for i in range(1,10,2):
    print(i)
for i in range(1,30,2):
    print('fun'*i)
for i in range(30,1,-2):
    print('fun'*i)
l = ['apple','boy','cat','dog']
# for loop with iterable list
for i in range(len(l)):
   print(l[i])
str = ' I am iterable by seperation?'
for i in str:
    print(i)


# Break
lis = [1, 2, 3, 4, 5]


for elem in lis:
   if elem % 2 == 0:
      print("Even:", elem)
      print("break")
      break
   else:
      print("Odd:", elem)
"""
Odd: 1
Even: 2
break
"""
# Continue

for elem in lis:
   if elem % 2 == 0:
      print("continue")
      continue
   print("Odd:", elem)
"""
Odd: 1
continue
Odd: 3
continue
Odd: 5
"""

# zip() is an amazing built-in function that we can use in Python
# to iterate over multiple sequences at once,
# getting their corresponding elements in each iteration

list1 = [10, 20, 30, 40]
list2 = [50, 60, 70, 80]
list3 = ['a','b','c','d']

for elem1, elem2,elem3 in zip(list1, list2, list3):
    print(elem1, elem2, elem3)
"""
10 50 a
20 60 b
30 70 c
40 80 d
"""


# while <condition>:
#     <code>

x=5
while x >= 0:
   print("Fun " * x)
   x -= 1

#nested for loop
dim =3
for i in range(dim):
    for j in range(dim):
        for k in range(dim):
            print(i,j,k)
        print('\n')
    print('\n') # 3 x 3 3 = 9 elements x 9

num_cols=5
num_rows=5
for i in range(5):
   for num_cols in range(num_rows-i):
      print("*", end="")
   print()


# Function in pytho

def fun1():
    print('def()')

fun1()

def add(x,y):
    print(x+y)
add(5,5)


def mulp(a, b=5):  # default value for b
    print(a * b)

mulp(10) #50


# Recursibve Factorial function

def fact(n):
    if n == 0 or n == 1:
        return 1
    else:
        return n * fact(n-1)

f = fact(5)
print(f)#120

def fib(n):
    if n == 0 or n == 1:
        return n
    else:
        return fib(n-1) + fib(n-2)
print(5*'-')
f = fib(4)
print(f) #3

Class : Object oriented concepts in python using class

class student:
    name =  'Steve Jobs',
    co = 'Apple',
    country = 'USA',
    sal = 1000000

    def __init__(self,param1, param2, param3, param4):
        self.name = param1
        self.co = param2
        self.country = param3
        self.sal = param4

    def display(self, param1,param2, param3, param4):
        print(param1,param2,param3, param4)


stu = student('Bill Gates','Micro Soft','USA',100000)
print(stu.name, stu.sal) # Bill Gates 100000

print(type(stu)) #<class '__main__.student'>
print(stu) # <__main__.student object at 0x000001ACB0C19FD0>
print(stu.co,stu.country) #Micro Soft USA

# del stu

#---------------------------------------------
class Student:

    def __init__(self, name):
        self._name = name

    @property
    def name(self):
        print("Calling getter")
        return self._name

    @name.setter
    def name(self, new_name):
        print("Calling setter")
        self._name = new_name

    @name.deleter
    def name(self):
        print("Calling deleter")
        del self._name

stu = Student("Sandilya")
print(stu.name)
stu.name = "Chandra"
print(stu.name)

del stu
"""
Calling getter
Sandilya
Calling setter
Calling getter
Chandra Gupta
"""

All about import statement

Many ways we can import a python Libraries we will see the various ways

import pandas
print(pandas.read_csv('bio.csv'))

import pandas as pd  # renamed as pd
df = pd.read_csv('bio.csv')
print(df)

from pandas import read_csv  # import only read_csv
print(read_csv('bio.csv'))

"""
   Unnamed: 0  Name  Age
0           0   Raj   23
1           1   Ram   23
2           2  Sita   24
3           3  Laks   21
"""

from pandas import *  # import all 
print('df:',read_csv('bio.csv'))
"""
df:    Unnamed: 0  Name  Age
0           0   Raj   23
1           1   Ram   23
2           2  Sita   24
3           3  Laks   21
"""

List Comprehension in Python

The syntax used to define list comprehensions usually follows one of these four patterns:

[<value_to_include> for <var> in <sequence>]
[<value_to_include> for <var1> in <sequence1> for <var2> in <sequence2>][<value_to_include> for <var> in <sequence> if <condition>]
[<value> for <var1> in <sequence1> for <var2> in <sequence2> if <condition>]

# [<value_to_include> for <var> in <sequence>]
#print all alphabets
print([chr(i) for i in range(65, 91)])
#['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']

# [<value_to_include> for <var1> in <sequence1> for <var2> in <sequence2>][<value_to_include> for <var> in <sequence> if <condition>]
print([k for k in range(1, 25) if k % 2 == 0])
#[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24]
# [<value> for <var1> in <sequence1> for <var2> in <sequence2> if <condition>]
print([i * j for i in range(1, 5) for j in range(1, 5) if i % j == 0])
# [1, 2, 4, 3, 9, 4, 8, 16]

Above sample code snippets will explain how to use list comprehension with for, nested for, and if condition.

# Brain Teaser See the differenc  between the below statements

import sys
print(sys.getsizeof([i for i in range(500)])) # 4216

print(sys.getsizeof((i for i in range(500))))   #112

Here all elements in a list and versus one element in a list at a time. That's why, there is difference in memory allocation.

Monday, 28 March 2022

Pandas#03

Pandas ...

Most of the time people confused with DataFrame.loc() and Datframe.iloc() Methods
We will see the difference

loc[] is used to select rows and columns by Names/Labels
iloc[] is used to select rows and columns by Integer Index/Position. zero based index position.

import pandas as pd
technologies = {
    'Courses':["B.Sc","B.E.","MBA","B.COM","BA"],
    'Fee' :[20000,55000,60000,25000,18000],    'Duration':['3y','4y','2y','3y','2y'],    'Discount':[10,25,5,8,6]              }
index_labels=['r1','r2','r3','r4','r5']
df = pd.DataFrame(technologies,index=index_labels)
print(df)

Result :

   Courses    Fee Duration  Discount
r1    B.Sc  20000       3y        10
r2    B.E.  55000       4y        25
r3     MBA  60000       2y         5
r4   B.COM  25000       3y         8
r5      BA  18000       2y         6

The following code and the outputs given in the Doc String will help us understand loc and iloc

differences, retrieve rows, rows range, columns, columns range, add numeric columns, con cat

str columns.

# Select Single Row by Index Label
print(df.loc['r2'])

# Select Single Row by Index
print(df.iloc[1])

"""
Courses      B.E.
Fee         55000
Duration       4y
Discount       25
Name: r2, dtype: object
Courses      B.E.
Fee         55000
Duration       4y
Discount       25
"""

# Select Single Column by label
print(df.loc[:, "Courses"])
# Select Single Column by Index
print(df.iloc[:, 0])

"""
r1     B.Sc
r2     B.E.
r3      MBA
r4    B.COM
r5       BA
Name: Courses, dtype: object
r1     B.Sc
r2     B.E.
r3      MBA
r4    B.COM
r5       BA
Name: Courses, dtype: object
"""

# Select Multiple Rows by Label
print(df.loc[['r2','r3']])

# Select Multiple Rows by Index
print(df.iloc[[1,2]])
"""
   Courses    Fee Duration  Discount
r2    B.E.  55000       4y        25
r3     MBA  60000       2y         5
   Courses    Fee Duration  Discount
r2    B.E.  55000       4y        25
r3     MBA  60000       2y         5
"""

# Select Multiple Columns by labels
print(df.loc[:, ["Courses","Fee","Discount"]])

# Select Multiple Columns by Index
print(df.iloc[:, [0,1,3]])
"""
   Courses    Fee  Discount
r1    B.Sc  20000        10
r2    B.E.  55000        25
r3     MBA  60000         5
r4   B.COM  25000         8
r5      BA  18000         6
   Courses    Fee  Discount
r1    B.Sc  20000        10
r2    B.E.  55000        25
r3     MBA  60000         5
r4   B.COM  25000         8
r5      BA  18000         6
"""

# Select Rows Between two Index Labels
# Includes both r1 and r4 rows
print(df.loc['r1':'r4'])

# Select Rows Between two Indexs
# Includes Index 0 & Execludes 4
print(df.iloc[0:4])
"""
   Courses    Fee Duration  Discount
r1    B.Sc  20000       3y        10
r2    B.E.  55000       4y        25
r3     MBA  60000       2y         5
r4   B.COM  25000       3y         8
   Courses    Fee Duration  Discount
r1    B.Sc  20000       3y        10
r2    B.E.  55000       4y        25
r3     MBA  60000       2y         5
r4   B.COM  25000       3y         8
"""

# Select Columns between two Labels
# Includes both 'Fee' and 'Discount' columns
print(df.loc[:,'Fee':'Discount'])

# Select Columns between two Indexes
# Includes Index 1 & Execludes 4
print(df.iloc[:,1:4])
"""
      Fee Duration  Discount
r1  20000       3y        10
r2  55000       4y        25
r3  60000       2y         5
r4  25000       3y         8
r5  18000       2y         6
      Fee Duration  Discount
r1  20000       3y        10
r2  55000       4y        25
r3  60000       2y         5
r4  25000       3y         8
r5  18000       2y         6
"""
# Select Alternate rows By indeces
print(df.loc['r1':'r4':2])

# Select Alternate rows By Index
print(df.iloc[0:4:2])
"""
   Courses    Fee Duration  Discount
r1    B.Sc  20000       3y        10
r3     MBA  60000       2y         5
   Courses    Fee Duration  Discount
r1    B.Sc  20000       3y        10
r3     MBA  60000       2y         5
"""
# Select Alternate Columns between two Labels
print(df.loc[:,'Fee':'Discount':2])

# Select Alternate Columns between two Indexes
print(df.iloc[:,1:4:2])
"""
      Fee  Discount
r1  20000        10
r2  55000        25
r3  60000         5
r4  25000         8
r5  18000         6
      Fee  Discount
r1  20000        10
r2  55000        25
r3  60000         5
r4  25000         8
r5  18000         6
"""

# Using Conditions
print(df.loc[df['Fee'] >= 50000])

print(df.iloc[list(df['Fee'] >= 50000)])
"""
   Courses    Fee Duration  Discount
r2    B.E.  55000       4y        25
r3     MBA  60000       2y         5
   Courses    Fee Duration  Discount
r2    B.E.  55000       4y        25
r3     MBA  60000       2y         5
"""

column_names = list(df.columns.values)

# Get the list of all column names from headers
column_names = df.columns.values.tolist()

# Using list(df) to get the column headers as a list
column_names = list(df.columns)

# Using list(df) to get the list of all Column Names
column_names = list(df)

# Dataframe show all columns sorted list
column_names = sorted(df)

# Get all Column Header Labels as List
for column_headers in df.columns:
    print(column_headers)

"""
Courses
Fee
Duration
Discount
"""

column_names = df.keys().values.tolist()

# Get all numeric columns
numeric_columns = df._get_numeric_data().columns.values.tolist()

# Simple Pandas Numeric Columns Code
numeric_columns = df.dtypes[df.dtypes == "int64"].index.values.tolist()
print(numeric_columns) # ['Fee', 'Discount']

# Using map() function to combine two columns of text
df["Period"] = df["Courses"].map(str) + " " + df["Duration"]
print(df)
"""
  Courses    Fee Duration  Discount    Period
r1    B.Sc  20000       3y        10   B.Sc 3y
r2    B.E.  55000       4y        25   B.E. 4y
r3     MBA  60000       2y         5    MBA 2y
r4   B.COM  25000       3y         8  B.COM 3y
r5      BA  18000       2y         6     BA 2y
"""

# Using + operator to combine two columns
df["Period"] = df['Courses'].astype(str) +"-"+ df["Duration"]
print(df)

# Using apply() method to combine two columns of text
df["Period"] = df[["Courses", "Duration"]].apply("-".join, axis=1)
print(df)

# Using DataFrame.agg() to combine two columns of text
df["period"] = df[['Courses', 'Duration']].agg('-'.join, axis=1)
print(df)
"""
   Courses    Fee Duration  Discount    Period    period
r1    B.Sc  20000       3y        10   B.Sc-3y   B.Sc-3y
r2    B.E.  55000       4y        25   B.E.-4y   B.E.-4y
r3     MBA  60000       2y         5    MBA-2y    MBA-2y
r4   B.COM  25000       3y         8  B.COM-3y  B.COM-3y
r5      BA  18000       2y         6     BA-2y     BA-2y

"""

# Using Series.str.cat() function
df["Period"] = df["Courses"].str.cat(df["Duration"], sep = "-")
print(df)

# Using DataFrame.apply() and lambda function
df["Period"] = df[["Courses", "Duration"]].apply(lambda x: "-".join(x), axis =1)
print(df)

# Using map() function to combine two columns of text
df["Period"] = df["Courses"].map(str) + "-" + df["Duration"]
print(df)

Happy DataFraming in Pandas!!!!

Ref : https://sparkbyexamples.com/pandas/pandas-get-column-names/

Sunday, 27 March 2022

P#17. Tom..

Talking TOM - Toy👶👶👶



import pyttsx3
yttsx3.init()
speak = input(input('I am Talking Tom, I will repeat whatever u say\n Enter some Text here!))
p1.say(speak)
p1.runAndWait()

p1.stop()

The above 5 line(s) of code is smart enough to say whatever you say. Please don't forget to install:

pip install pyttsx3         # text to speech library in python

Run:


I am Talking Tom, I will repeat whatever You say
 Enter some Text here! 'Hello Nayan'

It will say Hello Nayan through your speaker. have fun !!!

Thursday, 24 March 2022

P#16 Line, Bar, Scatter, Pie charts

MATPLOTLIB...

In this blog, we will see how to plot various chart types using plt.line(), plt.bar(), plt.barh(). plt.scatter(), plt.hist().

If you run this code which is self explanatory, you will get this chart.

def plotline():
    import matplotlib.pyplot as plt
    langs = ['B.E CSE', 'B.E. Marine', 'B.E. ECE', 'B.Sc(NatSci)', 'MBA']
    students = [20, 40, 60, 80, 100]
    plt.plot(langs, students)  # plot
    plt.xlabel('Degree')
    plt.ylabel('Strength')
    plt.grid()
    plt.savefig('line.png')
    plt.show()

plotline()

If you run this code, you will get this chart.

def plotbar():
    langs = ['B.E CSE', 'B.E. Marine', 'B.E. ECE', 'B.Sc(NatSci)', 'MBA']
    students = [20, 40, 60, 80, 100]
    plt.bar(langs, students, col)  # BAR
    plt.xlabel('Degree')
    plt.ylabel('Strength')
    plt.grid()
    plt.savefig('bar.png')
    plt.show()

plotbar()


def plotbarh():
    langs = ['B.E CSE', 'B.E. Marine', 'B.E. ECE', 'B.Sc(NatSci)', 'MBA']
    students = [20, 40, 60, 80, 100]
    plt.barh(langs, students, color = 'hotpink')  # BARh Pink
    plt.xlabel('Degree')
    plt.ylabel('Strength')
    plt.grid()
    plt.savefig('bar.png')
    plt.show()

plotbarh() plots horizontal bar as shown below.

def plotbarss():
    import numpy as np
    import matplotlib.pyplot as plt
    data = [[30, 25, 50, 20],
            [40, 23, 51, 17],
            [35, 22, 45, 19]]
    X = np.arange(4)
    fig = plt.figure()
    ax = fig.add_axes([0, 0, 1, 1])
    ax.bar(X + 0.00, data[0], color='b', width=0.25)
    ax.bar(X + 0.25, data[1], color='g', width=0.25)
    ax.bar(X + 0.50, data[2], color='r', width=0.25)
    ax.legend(labels=('cse', 'it', 'mech', 'mba'), loc='upper right')
    plt.savefig('bars.png')
    plt.show()

plotbarss() displays the chart as shown below with three bars.

def plotscat():
    langs = ['B.E CSE', 'B.E. Marine', 'B.E. ECE', 'B.Sc(NatSci)', 'MBA']
    students = [20, 40, 60, 80, 100]
    plt.scatter(langs, students)  # Scatter
    plt.xlabel('Degree')
    plt.ylabel('Strength')
    plt.grid()
    plt.savefig('scat.png')
    plt.show()

plotscat()

plt.scatter() wil plot scatter plot as shown below()

def plotscatc():
    np.random.seed(19680801)  # seed the random number generator.
    data = {'a': np.arange(50),
            'c': np.random.randint(0, 50, 50),
            'd': np.random.randn(50)}
    data['b'] = data['a'] + 10 * np.random.randn(50)
    data['d'] = np.abs(data['d']) * 100

    fig, ax = plt.subplots(figsize=(5, 2.7), layout='constrained')
    ax.scatter('a', 'b', c='c', s='d', data=data, marker='*')
    ax.set_xlabel('entry a')
    ax.set_ylabel('entry b');
    plt.savefig('scatcc.png')
    plt.show()

plotscatc()

This will print different size, different colors.

This will print marker different size, different colors. 
marker = “*” sets the plot symbol as *. s = different size of the marker symbol in the plot.


def plothist():
    import matplotlib.pyplot as plt
    import numpy as np
    x = np.random.normal(170, 10, 250)
    plt.hist(x)
    plt.savefig('hist.png')
    plt.show()

plt.hist() method plots histogram as shown below;

def plotpie():
    import matplotlib.pyplot as plt
    import numpy as np
    y = np.array([32, 40, 60, 80, 100])
    plt.pie(y)
    plt.savefig('pie.png')
    plt.show()

plotpie()

plot.pie() method to plot y.
def plotpie1():
    deg = ['B.E CSE', 'B.E. Marine', 'B.E. ECE', 'B.Sc(NatSci)', 'MBA']
    students = [20, 40, 60, 80, 100]
    # Creating plot
    fig = plt.figure(figsize=(8, 6))
    plt.pie(students, labels=deg)
    # show plot
    plt.savefig('pie1.png')
    plt.show()
plotpie1()

This is pie plot with labels. That is provide by  plt.pie(students, labels=deg) 

Happy learning with AMET ODL!!!

AMET-SOLID