Thursday 31 March 2022

Pandos #04 -Data Aggregation

 AGGREGATE

Data Frame Aggregation

Python has several methods are available to perform aggregations on data. It is done using the pandas and numpy libraries. Data Frame method support data aggregation. Let us see how we can apply:

import pandas as pd
import numpy as np

df = pd.DataFrame([[
1, 2, 3, 4, 5],
[4, 5, 6, 7, 8],
[7, 8, 9, 10, 11],
[np.nan, np.nan, np.nan,np.nan,np.nan]],
columns=['A', 'B', 'C', 'D', 'E'])

# over rows
dfagg = df.agg(['sum', 'min'])

print(dfagg)

"""
A B C D E sum 12.0 15.0 18.0 21.0 24.0 min 1.0 2.0 3.0 4.0 5.0
"""

Aggregating different aggregates over columns

# Different aggregate functions in columns

df.agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']})
print(df.agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']}))
"""
A B
sum 12.0 NaN
min 1.0 2.0
max NaN 8.0
"""

This describe() method display all statistical properties like min, max, mean, std, 25%, 50%, 75% quartiles.

print(df.describe())

"""
A B C D E
count 3.0 3.0 3.0 3.0 3.0
mean 4.0 5.0 6.0 7.0 8.0
std 3.0 3.0 3.0 3.0 3.0
min 1.0 2.0 3.0 4.0 5.0
25% 2.5 3.5 4.5 5.5 6.5
50% 4.0 5.0 6.0 7.0 8.0
75% 5.5 6.5 7.5 8.5 9.5
max 7.0 8.0 9.0 10.0 11.0
"""

Transformation and manipulation on elements are very easy. Let us see some code snippets

Let us assume we want add +1 to all the above elements.

print(df.transform(lambda x: x + 1))
"""
A B C D E
0 2.0 3.0 4.0 5.0 6.0
1 5.0 6.0 7.0 8.0 9.0
2 8.0 9.0 10.0 11.0 12.0
3 NaN NaN NaN NaN NaN
"""

We an use groupby() too.


df = pd.DataFrame({
"Date": [
"2019-05-08", "2019-05-07", "2019-05-06", "2019-05-05",
"2019-05-08", "2019-05-07", "2019-05-06", "2019-05-05"],
"Data": [5, 8, 6, 1, 50, 100, 60, 120],
})
print(df)
"""
Date Data
0 2019-05-08 5
1 2019-05-07 8
2 2019-05-06 6
3 2019-05-05 1
4 2019-05-08 50
5 2019-05-07 100
6 2019-05-06 60
7 2019-05-05 120
"""

print(df.groupby('Date')['Data'].transform('sum'))

"""
0 55
1 108
2 66
3 121
4 55
5 108
6 66
7 121
Name: Data, dtype: int64
"""

We can groupby different levels of a hierarchical index using the level parameter: Please note the usage of pd.MultiIndex.from.arrays()  Method. So many ways, you can group by multiindex.



# Assume the array is like these : for concept..
arrays = [['male', 'male', 'female', 'female'],
['young', 'old', 'young', 'old']]

index = pd.MultiIndex.from_arrays(arrays, names=('Gender', 'Type'))
df = pd.DataFrame({'Max Enthu': [390., 350., 30., 20.]},
index=index)

print(df)

"""
Name: Data, dtype: int64
Max Enthu
Gender Type
male young 390.0
old 350.0
female young 30.0
old 20.0
"""

# using level 0
print(df.groupby(level=0).mean())
"""
Max Enthu
Gender
female 25.0
male 370.0

"""

# Using level
print(df.groupby(level="Gender").mean())
"""
Max Enthu
Type
old 185.0
young 210.0
"""

print(df.groupby(level="Gender").mean())
"""
Max Enthu
Gender
female 25.0
male 370.0
"""

Happy Learning at AMET!!!

























Tuesday 29 March 2022

Fundas2

Some Basic Functions


# for <loop_variable> in <iterable>:
# <code>
for i in range(10):
print(i)
print(25*'-')
# for <loop_variable> in range(<start>, <stop>, <step>):
# <code>
for i in range(1,10,2):
print(i)
for i in range(1,30,2):
print('fun'*i)
for i in range(30,1,-2):
print('fun'*i)
l = ['apple','boy','cat','dog']
# for loop with iterable list
for i in range(len(l)):
print(l[i])
str = ' I am iterable by seperation?'
for i in str:
print(i)


# Break
lis = [1, 2, 3, 4, 5]


for elem in lis:
if elem % 2 == 0:
print("Even:", elem)
print("break")
break
else:
print("Odd:", elem)
"""
Odd: 1
Even: 2
break
"""
# Continue

for elem in lis:
if elem % 2 == 0:
print("continue")
continue
print("Odd:", elem)
"""
Odd: 1
continue
Odd: 3
continue
Odd: 5
"""

# zip() is an amazing built-in function that we can use in Python
# to iterate over multiple sequences at once,
# getting their corresponding elements in each iteration

list1 = [10, 20, 30, 40]
list2 = [50, 60, 70, 80]
list3 = ['a','b','c','d']

for elem1, elem2,elem3 in zip(list1, list2, list3):
print(elem1, elem2, elem3)
"""
10 50 a
20 60 b
30 70 c
40 80 d
"""


# while <condition>:
# <code>

x=5
while x >= 0:
print("Fun " * x)
x -= 1

#nested for loop
dim =3
for i in range(dim):
for j in range(dim):
for k in range(dim):
print(i,j,k)
print('\n')
print('\n') # 3 x 3 3 = 9 elements x 9

num_cols=5
num_rows=5
for i in range(5):
for num_cols in range(num_rows-i):
print("*", end="")
print()


# Function in pytho

def fun1():
print('def()')

fun1()

def add(x,y):
print(x+y)
add(5,5)


def mulp(a, b=5): # default value for b
print(a * b)

mulp(10) #50


# Recursibve Factorial function

def fact(n):
if n == 0 or n == 1:
return 1
else:
return n * fact(n-1)

f = fact(5)
print(f)#120

def fib(n):
if n == 0 or n == 1:
return n
else:
return fib(n-1) + fib(n-2)
print(5*'-')
f = fib(4)
print(f) #3

Class : Object oriented concepts in python using class

class student:
name =
'Steve Jobs',
co = 'Apple',
country = 'USA',
sal = 1000000

def __init__(self,param1, param2, param3, param4):
self.name = param1
self.co = param2
self.country = param3
self.sal = param4

def display(self, param1,param2, param3, param4):
print(param1,param2,param3, param4)


stu = student(
'Bill Gates','Micro Soft','USA',100000)
print(stu.name, stu.sal) # Bill Gates 100000

print(type(stu)) #<class '__main__.student'>
print(stu) # <__main__.student object at 0x000001ACB0C19FD0>
print(stu.co,stu.country) #Micro Soft USA

# del stu

#---------------------------------------------
class Student:

def __init__(self, name):
self._name = name

@property
def name(self):
print("Calling getter")
return self._name

@name.setter
def name(self, new_name):
print("Calling setter")
self._name = new_name

@name.deleter
def name(self):
print("Calling deleter")
del self._name

stu = Student(
"Sandilya")
print(stu.name)
stu.name =
"Chandra"
print(stu.name)

del stu
"""
Calling getter
Sandilya
Calling setter
Calling getter
Chandra Gupta
"""


All about import statement

Many ways we can import a python Libraries we will see the various ways
import pandas
print(pandas.read_csv('bio.csv'))

import pandas as pd # renamed as pd
df = pd.read_csv('bio.csv')
print(df)

from pandas import read_csv # import only read_csv
print(read_csv('bio.csv'))

"""
Unnamed: 0 Name Age
0 0 Raj 23
1 1 Ram 23
2 2 Sita 24
3 3 Laks 21
"""

from pandas import * # import all
print('df:',read_csv('bio.csv'))
"""
df: Unnamed: 0 Name Age
0 0 Raj 23
1 1 Ram 23
2 2 Sita 24
3 3 Laks 21
"""

List Comprehension in Python


The syntax used to define list comprehensions usually follows one of these four patterns:

  • [<value_to_include> for <var> in <sequence>]
  • [<value_to_include> for <var1> in <sequence1> for <var2> in <sequence2>][<value_to_include> for <var> in <sequence> if <condition>]
  • [<value> for <var1> in <sequence1> for <var2> in <sequence2> if <condition>]

# [<value_to_include> for <var> in <sequence>]
#print all alphabets
print([chr(i) for i in range(65, 91)])
#['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']

# [<value_to_include> for <var1> in <sequence1> for <var2> in <sequence2>][<value_to_include> for <var> in <sequence> if <condition>]
print([k for k in range(1, 25) if k % 2 == 0])
#[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24]
# [<value> for <var1> in <sequence1> for <var2> in <sequence2> if <condition>]
print([i * j for i in range(1, 5) for j in range(1, 5) if i % j == 0])
# [1, 2, 4, 3, 9, 4, 8, 16]
Above sample code snippets will explain how to use list comprehension with for, nested for, and if condition.

# Brain Teaser See the differenc  between the below statements

import sys
print(sys.getsizeof([i for i in range(500)])) # 4216

print(sys.getsizeof((i for i in range(500)))) #112

Here all elements in a list and versus one element in a list at a time. That's why, there is difference in memory allocation.








Monday 28 March 2022

Pandas#03

Pandas ...


Most of the time people confused with DataFrame.loc()  and Datframe.iloc() Methods
We will see the difference





  • loc[] is used to select rows and columns by Names/Labels
  • iloc[] is used to select rows and columns by Integer Index/Positionzero based index position.

  • import pandas as pd
    technologies = {
    'Courses':["B.Sc","B.E.","MBA","B.COM","BA"],
    'Fee' :[20000,55000,60000,25000,18000],
    'Duration':['3y','4y','2y','3y','2y'],
    'Discount':[10,25,5,8,6]
    }
    index_labels=['r1','r2','r3','r4','r5']
    df = pd.DataFrame(technologies,index=index_labels)
    print(df)
  • Result :
  •    Courses    Fee Duration  Discount
    r1 B.Sc 20000 3y 10
    r2 B.E. 55000 4y 25
    r3 MBA 60000 2y 5
    r4 B.COM 25000 3y 8
    r5 BA 18000 2y 6
The following code and the outputs given in the Doc String will help us understand loc and iloc
differences, retrieve rows, rows range, columns, columns range, add numeric columns, con cat
str columns.
# Select Single Row by Index Label
print(df.loc['r2'])

# Select Single Row by Index
print(df.iloc[1])

"""
Courses B.E.
Fee 55000
Duration 4y
Discount 25
Name: r2, dtype: object
Courses B.E.
Fee 55000
Duration 4y
Discount 25
"""

# Select Single Column by label
print(df.loc[:, "Courses"])
# Select Single Column by Index
print(df.iloc[:, 0])

"""
r1 B.Sc
r2 B.E.
r3 MBA
r4 B.COM
r5 BA
Name: Courses, dtype: object
r1 B.Sc
r2 B.E.
r3 MBA
r4 B.COM
r5 BA
Name: Courses, dtype: object
"""

# Select Multiple Rows by Label
print(df.loc[['r2','r3']])

# Select Multiple Rows by Index
print(df.iloc[[1,2]])
"""
Courses Fee Duration Discount
r2 B.E. 55000 4y 25
r3 MBA 60000 2y 5
Courses Fee Duration Discount
r2 B.E. 55000 4y 25
r3 MBA 60000 2y 5
"""

# Select Multiple Columns by labels
print(df.loc[:, ["Courses","Fee","Discount"]])

# Select Multiple Columns by Index
print(df.iloc[:, [0,1,3]])
"""
Courses Fee Discount
r1 B.Sc 20000 10
r2 B.E. 55000 25
r3 MBA 60000 5
r4 B.COM 25000 8
r5 BA 18000 6
Courses Fee Discount
r1 B.Sc 20000 10
r2 B.E. 55000 25
r3 MBA 60000 5
r4 B.COM 25000 8
r5 BA 18000 6
"""

# Select Rows Between two Index Labels
# Includes both r1 and r4 rows
print(df.loc['r1':'r4'])

# Select Rows Between two Indexs
# Includes Index 0 & Execludes 4
print(df.iloc[0:4])
"""
Courses Fee Duration Discount
r1 B.Sc 20000 3y 10
r2 B.E. 55000 4y 25
r3 MBA 60000 2y 5
r4 B.COM 25000 3y 8
Courses Fee Duration Discount
r1 B.Sc 20000 3y 10
r2 B.E. 55000 4y 25
r3 MBA 60000 2y 5
r4 B.COM 25000 3y 8
"""

# Select Columns between two Labels
# Includes both 'Fee' and 'Discount' columns
print(df.loc[:,'Fee':'Discount'])

# Select Columns between two Indexes
# Includes Index 1 & Execludes 4
print(df.iloc[:,1:4])
"""
Fee Duration Discount
r1 20000 3y 10
r2 55000 4y 25
r3 60000 2y 5
r4 25000 3y 8
r5 18000 2y 6
Fee Duration Discount
r1 20000 3y 10
r2 55000 4y 25
r3 60000 2y 5
r4 25000 3y 8
r5 18000 2y 6
"""
# Select Alternate rows By indeces
print(df.loc['r1':'r4':2])

# Select Alternate rows By Index
print(df.iloc[0:4:2])
"""
Courses Fee Duration Discount
r1 B.Sc 20000 3y 10
r3 MBA 60000 2y 5
Courses Fee Duration Discount
r1 B.Sc 20000 3y 10
r3 MBA 60000 2y 5
"""
# Select Alternate Columns between two Labels
print(df.loc[:,'Fee':'Discount':2])

# Select Alternate Columns between two Indexes
print(df.iloc[:,1:4:2])
"""
Fee Discount
r1 20000 10
r2 55000 25
r3 60000 5
r4 25000 8
r5 18000 6
Fee Discount
r1 20000 10
r2 55000 25
r3 60000 5
r4 25000 8
r5 18000 6
"""

# Using Conditions
print(df.loc[df['Fee'] >= 50000])

print(df.iloc[list(df['Fee'] >= 50000)])
"""
Courses Fee Duration Discount
r2 B.E. 55000 4y 25
r3 MBA 60000 2y 5
Courses Fee Duration Discount
r2 B.E. 55000 4y 25
r3 MBA 60000 2y 5
"""

column_names = list(df.columns.values)

# Get the list of all column names from headers
column_names = df.columns.values.tolist()

# Using list(df) to get the column headers as a list
column_names = list(df.columns)

# Using list(df) to get the list of all Column Names
column_names = list(df)

# Dataframe show all columns sorted list
column_names = sorted(df)

# Get all Column Header Labels as List
for column_headers in df.columns:
print(column_headers)

"""
Courses
Fee
Duration
Discount
"""

column_names = df.keys().values.tolist()

# Get all numeric columns
numeric_columns = df._get_numeric_data().columns.values.tolist()

# Simple Pandas Numeric Columns Code
numeric_columns = df.dtypes[df.dtypes == "int64"].index.values.tolist()
print(numeric_columns) # ['Fee', 'Discount']

# Using map() function to combine two columns of text
df["Period"] = df["Courses"].map(str) + " " + df["Duration"]
print(df)
"""
Courses Fee Duration Discount Period
r1 B.Sc 20000 3y 10 B.Sc 3y
r2 B.E. 55000 4y 25 B.E. 4y
r3 MBA 60000 2y 5 MBA 2y
r4 B.COM 25000 3y 8 B.COM 3y
r5 BA 18000 2y 6 BA 2y
"""

# Using + operator to combine two columns
df["Period"] = df['Courses'].astype(str) +"-"+ df["Duration"]
print(df)

# Using apply() method to combine two columns of text
df["Period"] = df[["Courses", "Duration"]].apply("-".join, axis=1)
print(df)

# Using DataFrame.agg() to combine two columns of text
df["period"] = df[['Courses', 'Duration']].agg('-'.join, axis=1)
print(df)
"""
Courses Fee Duration Discount Period period
r1 B.Sc 20000 3y 10 B.Sc-3y B.Sc-3y
r2 B.E. 55000 4y 25 B.E.-4y B.E.-4y
r3 MBA 60000 2y 5 MBA-2y MBA-2y
r4 B.COM 25000 3y 8 B.COM-3y B.COM-3y
r5 BA 18000 2y 6 BA-2y BA-2y

"""

# Using Series.str.cat() function
df["Period"] = df["Courses"].str.cat(df["Duration"], sep = "-")
print(df)

# Using DataFrame.apply() and lambda function
df["Period"] = df[["Courses", "Duration"]].apply(lambda x: "-".join(x), axis =1)
print(df)

# Using map() function to combine two columns of text
df["Period"] = df["Courses"].map(str) + "-" + df["Duration"]
print(df)
Happy DataFraming in Pandas!!!!

Ref : https://sparkbyexamples.com/pandas/pandas-get-column-names/

Sunday 27 March 2022

P#17. Tom..

 Talking TOM - Toy👶👶👶



import pyttsx3
yttsx3.init()
speak = input(input('I am Talking Tom, I will repeat whatever u say\n Enter some Text here!))
p1.say(speak)
p1.runAndWait()
p1.stop()

The above 5 line(s) of  code is smart enough to say whatever you say.  Please don't forget to install:

pip install pyttsx3         # text to speech library in python

Run:


I am Talking Tom, I will repeat whatever You say
Enter some Text here! 'Hello Nayan'

It will say Hello Nayan through your speaker. have fun !!!



Thursday 24 March 2022

P#16 Line, Bar, Scatter, Pie charts

MATPLOTLIB...

In this blog, we will see how to plot various chart types using plt.line(), plt.bar(), plt.barh(). plt.scatter(), plt.hist(). 
If you run this code which is self explanatory, you will get this chart. 
def plotline():
import matplotlib.pyplot as plt
langs = ['B.E CSE', 'B.E. Marine', 'B.E. ECE', 'B.Sc(NatSci)', 'MBA']
students = [20, 40, 60, 80, 100]
plt.plot(langs, students) # plot
plt.xlabel('Degree')
plt.ylabel('Strength')
plt.grid()
plt.savefig('line.png')
plt.show()
plotline()
If you run this code, you will get this chart. 
def plotbar():
langs = ['B.E CSE', 'B.E. Marine', 'B.E. ECE', 'B.Sc(NatSci)', 'MBA']
students = [20, 40, 60, 80, 100]
plt.bar(langs, students, col) # BAR
plt.xlabel('Degree')
plt.ylabel('Strength')
plt.grid()
plt.savefig('bar.png')
plt.show()
plotbar()

def plotbarh():
langs = ['B.E CSE', 'B.E. Marine', 'B.E. ECE', 'B.Sc(NatSci)', 'MBA']
students = [20, 40, 60, 80, 100]
plt.barh(langs, students, color = 'hotpink') # BARh Pink
plt.xlabel('Degree')
plt.ylabel('Strength')
plt.grid()
plt.savefig('bar.png')
plt.show()
plotbarh() plots horizontal bar as shown below.
def plotbarss():
import numpy as np
import matplotlib.pyplot as plt
data = [[30, 25, 50, 20],
[40, 23, 51, 17],
[35, 22, 45, 19]]
X = np.arange(4)
fig = plt.figure()
ax = fig.add_axes([0, 0, 1, 1])
ax.bar(X + 0.00, data[0], color='b', width=0.25)
ax.bar(X + 0.25, data[1], color='g', width=0.25)
ax.bar(X + 0.50, data[2], color='r', width=0.25)
ax.legend(labels=('cse', 'it', 'mech', 'mba'), loc='upper right')
plt.savefig('bars.png')
plt.show()
plotbarss() displays the chart as shown below with three bars.
def plotscat():
langs = ['B.E CSE', 'B.E. Marine', 'B.E. ECE', 'B.Sc(NatSci)', 'MBA']
students = [20, 40, 60, 80, 100]
plt.scatter(langs, students) # Scatter
plt.xlabel('Degree')
plt.ylabel('Strength')
plt.grid()
plt.savefig('scat.png')
plt.show()
plotscat()
plt.scatter() wil plot scatter plot as shown below()
def plotscatc():
np.random.seed(19680801) # seed the random number generator.
data = {'a': np.arange(50),
'c': np.random.randint(0, 50, 50),
'd': np.random.randn(50)}
data['b'] = data['a'] + 10 * np.random.randn(50)
data['d'] = np.abs(data['d']) * 100

fig, ax = plt.subplots(figsize=(5, 2.7), layout='constrained')
ax.scatter('a', 'b', c='c', s='d', data=data, marker='*')
ax.set_xlabel('entry a')
ax.set_ylabel('entry b');
plt.savefig('scatcc.png')
plt.show()
plotscatc()
This will print different size, different colors.

This will print marker different size, different colors. 
marker = “*” sets the plot symbol as *. s = different size of the marker symbol in the plot. 

def plothist():
import matplotlib.pyplot as plt
import numpy as np
x = np.random.normal(170, 10, 250)
plt.hist(x)
plt.savefig('hist.png')
plt.show()
plt.hist() method plots histogram as shown below;
def plotpie():
import matplotlib.pyplot as plt
import numpy as np
y = np.array([32, 40, 60, 80, 100])
plt.pie(y)
plt.savefig('pie.png')
plt.show()
plotpie()
plot.pie() method to plot y.
def plotpie1():
deg = ['B.E CSE', 'B.E. Marine', 'B.E. ECE', 'B.Sc(NatSci)', 'MBA']
students = [20, 40, 60, 80, 100]
# Creating plot
fig = plt.figure(figsize=(8, 6))
plt.pie(students, labels=deg)
# show plot
plt.savefig('pie1.png')
plt.show()
plotpie1()

This is pie plot with labels. That is provide by plt.pie(students, labels=deg)

Happy learning with AMET ODL!!!




Making Prompts for Profile Web Site

  Prompt: Can you create prompt to craft better draft in a given topic. Response: Sure! Could you please specify the topic for which you...