Monday 28 March 2022

Pandas#03

Pandas ...


Most of the time people confused with DataFrame.loc()  and Datframe.iloc() Methods
We will see the difference





  • loc[] is used to select rows and columns by Names/Labels
  • iloc[] is used to select rows and columns by Integer Index/Positionzero based index position.

  • import pandas as pd
    technologies = {
    'Courses':["B.Sc","B.E.","MBA","B.COM","BA"],
    'Fee' :[20000,55000,60000,25000,18000],
    'Duration':['3y','4y','2y','3y','2y'],
    'Discount':[10,25,5,8,6]
    }
    index_labels=['r1','r2','r3','r4','r5']
    df = pd.DataFrame(technologies,index=index_labels)
    print(df)
  • Result :
  •    Courses    Fee Duration  Discount
    r1 B.Sc 20000 3y 10
    r2 B.E. 55000 4y 25
    r3 MBA 60000 2y 5
    r4 B.COM 25000 3y 8
    r5 BA 18000 2y 6
The following code and the outputs given in the Doc String will help us understand loc and iloc
differences, retrieve rows, rows range, columns, columns range, add numeric columns, con cat
str columns.
# Select Single Row by Index Label
print(df.loc['r2'])

# Select Single Row by Index
print(df.iloc[1])

"""
Courses B.E.
Fee 55000
Duration 4y
Discount 25
Name: r2, dtype: object
Courses B.E.
Fee 55000
Duration 4y
Discount 25
"""

# Select Single Column by label
print(df.loc[:, "Courses"])
# Select Single Column by Index
print(df.iloc[:, 0])

"""
r1 B.Sc
r2 B.E.
r3 MBA
r4 B.COM
r5 BA
Name: Courses, dtype: object
r1 B.Sc
r2 B.E.
r3 MBA
r4 B.COM
r5 BA
Name: Courses, dtype: object
"""

# Select Multiple Rows by Label
print(df.loc[['r2','r3']])

# Select Multiple Rows by Index
print(df.iloc[[1,2]])
"""
Courses Fee Duration Discount
r2 B.E. 55000 4y 25
r3 MBA 60000 2y 5
Courses Fee Duration Discount
r2 B.E. 55000 4y 25
r3 MBA 60000 2y 5
"""

# Select Multiple Columns by labels
print(df.loc[:, ["Courses","Fee","Discount"]])

# Select Multiple Columns by Index
print(df.iloc[:, [0,1,3]])
"""
Courses Fee Discount
r1 B.Sc 20000 10
r2 B.E. 55000 25
r3 MBA 60000 5
r4 B.COM 25000 8
r5 BA 18000 6
Courses Fee Discount
r1 B.Sc 20000 10
r2 B.E. 55000 25
r3 MBA 60000 5
r4 B.COM 25000 8
r5 BA 18000 6
"""

# Select Rows Between two Index Labels
# Includes both r1 and r4 rows
print(df.loc['r1':'r4'])

# Select Rows Between two Indexs
# Includes Index 0 & Execludes 4
print(df.iloc[0:4])
"""
Courses Fee Duration Discount
r1 B.Sc 20000 3y 10
r2 B.E. 55000 4y 25
r3 MBA 60000 2y 5
r4 B.COM 25000 3y 8
Courses Fee Duration Discount
r1 B.Sc 20000 3y 10
r2 B.E. 55000 4y 25
r3 MBA 60000 2y 5
r4 B.COM 25000 3y 8
"""

# Select Columns between two Labels
# Includes both 'Fee' and 'Discount' columns
print(df.loc[:,'Fee':'Discount'])

# Select Columns between two Indexes
# Includes Index 1 & Execludes 4
print(df.iloc[:,1:4])
"""
Fee Duration Discount
r1 20000 3y 10
r2 55000 4y 25
r3 60000 2y 5
r4 25000 3y 8
r5 18000 2y 6
Fee Duration Discount
r1 20000 3y 10
r2 55000 4y 25
r3 60000 2y 5
r4 25000 3y 8
r5 18000 2y 6
"""
# Select Alternate rows By indeces
print(df.loc['r1':'r4':2])

# Select Alternate rows By Index
print(df.iloc[0:4:2])
"""
Courses Fee Duration Discount
r1 B.Sc 20000 3y 10
r3 MBA 60000 2y 5
Courses Fee Duration Discount
r1 B.Sc 20000 3y 10
r3 MBA 60000 2y 5
"""
# Select Alternate Columns between two Labels
print(df.loc[:,'Fee':'Discount':2])

# Select Alternate Columns between two Indexes
print(df.iloc[:,1:4:2])
"""
Fee Discount
r1 20000 10
r2 55000 25
r3 60000 5
r4 25000 8
r5 18000 6
Fee Discount
r1 20000 10
r2 55000 25
r3 60000 5
r4 25000 8
r5 18000 6
"""

# Using Conditions
print(df.loc[df['Fee'] >= 50000])

print(df.iloc[list(df['Fee'] >= 50000)])
"""
Courses Fee Duration Discount
r2 B.E. 55000 4y 25
r3 MBA 60000 2y 5
Courses Fee Duration Discount
r2 B.E. 55000 4y 25
r3 MBA 60000 2y 5
"""

column_names = list(df.columns.values)

# Get the list of all column names from headers
column_names = df.columns.values.tolist()

# Using list(df) to get the column headers as a list
column_names = list(df.columns)

# Using list(df) to get the list of all Column Names
column_names = list(df)

# Dataframe show all columns sorted list
column_names = sorted(df)

# Get all Column Header Labels as List
for column_headers in df.columns:
print(column_headers)

"""
Courses
Fee
Duration
Discount
"""

column_names = df.keys().values.tolist()

# Get all numeric columns
numeric_columns = df._get_numeric_data().columns.values.tolist()

# Simple Pandas Numeric Columns Code
numeric_columns = df.dtypes[df.dtypes == "int64"].index.values.tolist()
print(numeric_columns) # ['Fee', 'Discount']

# Using map() function to combine two columns of text
df["Period"] = df["Courses"].map(str) + " " + df["Duration"]
print(df)
"""
Courses Fee Duration Discount Period
r1 B.Sc 20000 3y 10 B.Sc 3y
r2 B.E. 55000 4y 25 B.E. 4y
r3 MBA 60000 2y 5 MBA 2y
r4 B.COM 25000 3y 8 B.COM 3y
r5 BA 18000 2y 6 BA 2y
"""

# Using + operator to combine two columns
df["Period"] = df['Courses'].astype(str) +"-"+ df["Duration"]
print(df)

# Using apply() method to combine two columns of text
df["Period"] = df[["Courses", "Duration"]].apply("-".join, axis=1)
print(df)

# Using DataFrame.agg() to combine two columns of text
df["period"] = df[['Courses', 'Duration']].agg('-'.join, axis=1)
print(df)
"""
Courses Fee Duration Discount Period period
r1 B.Sc 20000 3y 10 B.Sc-3y B.Sc-3y
r2 B.E. 55000 4y 25 B.E.-4y B.E.-4y
r3 MBA 60000 2y 5 MBA-2y MBA-2y
r4 B.COM 25000 3y 8 B.COM-3y B.COM-3y
r5 BA 18000 2y 6 BA-2y BA-2y

"""

# Using Series.str.cat() function
df["Period"] = df["Courses"].str.cat(df["Duration"], sep = "-")
print(df)

# Using DataFrame.apply() and lambda function
df["Period"] = df[["Courses", "Duration"]].apply(lambda x: "-".join(x), axis =1)
print(df)

# Using map() function to combine two columns of text
df["Period"] = df["Courses"].map(str) + "-" + df["Duration"]
print(df)
Happy DataFraming in Pandas!!!!

Ref : https://sparkbyexamples.com/pandas/pandas-get-column-names/

No comments:

Post a Comment

Making Prompts for Profile Web Site

  Prompt: Can you create prompt to craft better draft in a given topic. Response: Sure! Could you please specify the topic for which you...