Pandas ...

Most of the time people confused with DataFrame.loc() and Datframe.iloc() Methods
We will see the difference

loc[] is used to select rows and columns by Names/Labels
iloc[] is used to select rows and columns by Integer Index/Position. zero based index position.

import pandas as pd
technologies = {
    'Courses':["B.Sc","B.E.","MBA","B.COM","BA"],
    'Fee' :[20000,55000,60000,25000,18000],    'Duration':['3y','4y','2y','3y','2y'],    'Discount':[10,25,5,8,6]              }
index_labels=['r1','r2','r3','r4','r5']
df = pd.DataFrame(technologies,index=index_labels)
print(df)

Result :

   Courses    Fee Duration  Discount
r1    B.Sc  20000       3y        10
r2    B.E.  55000       4y        25
r3     MBA  60000       2y         5
r4   B.COM  25000       3y         8
r5      BA  18000       2y         6

The following code and the outputs given in the Doc String will help us understand loc and iloc

differences, retrieve rows, rows range, columns, columns range, add numeric columns, con cat

str columns.

# Select Single Row by Index Label
print(df.loc['r2'])

# Select Single Row by Index
print(df.iloc[1])

"""
Courses      B.E.
Fee         55000
Duration       4y
Discount       25
Name: r2, dtype: object
Courses      B.E.
Fee         55000
Duration       4y
Discount       25
"""

# Select Single Column by label
print(df.loc[:, "Courses"])
# Select Single Column by Index
print(df.iloc[:, 0])

"""
r1     B.Sc
r2     B.E.
r3      MBA
r4    B.COM
r5       BA
Name: Courses, dtype: object
r1     B.Sc
r2     B.E.
r3      MBA
r4    B.COM
r5       BA
Name: Courses, dtype: object
"""

# Select Multiple Rows by Label
print(df.loc[['r2','r3']])

# Select Multiple Rows by Index
print(df.iloc[[1,2]])
"""
   Courses    Fee Duration  Discount
r2    B.E.  55000       4y        25
r3     MBA  60000       2y         5
   Courses    Fee Duration  Discount
r2    B.E.  55000       4y        25
r3     MBA  60000       2y         5
"""

# Select Multiple Columns by labels
print(df.loc[:, ["Courses","Fee","Discount"]])

# Select Multiple Columns by Index
print(df.iloc[:, [0,1,3]])
"""
   Courses    Fee  Discount
r1    B.Sc  20000        10
r2    B.E.  55000        25
r3     MBA  60000         5
r4   B.COM  25000         8
r5      BA  18000         6
   Courses    Fee  Discount
r1    B.Sc  20000        10
r2    B.E.  55000        25
r3     MBA  60000         5
r4   B.COM  25000         8
r5      BA  18000         6
"""

# Select Rows Between two Index Labels
# Includes both r1 and r4 rows
print(df.loc['r1':'r4'])

# Select Rows Between two Indexs
# Includes Index 0 & Execludes 4
print(df.iloc[0:4])
"""
   Courses    Fee Duration  Discount
r1    B.Sc  20000       3y        10
r2    B.E.  55000       4y        25
r3     MBA  60000       2y         5
r4   B.COM  25000       3y         8
   Courses    Fee Duration  Discount
r1    B.Sc  20000       3y        10
r2    B.E.  55000       4y        25
r3     MBA  60000       2y         5
r4   B.COM  25000       3y         8
"""

# Select Columns between two Labels
# Includes both 'Fee' and 'Discount' columns
print(df.loc[:,'Fee':'Discount'])

# Select Columns between two Indexes
# Includes Index 1 & Execludes 4
print(df.iloc[:,1:4])
"""
      Fee Duration  Discount
r1  20000       3y        10
r2  55000       4y        25
r3  60000       2y         5
r4  25000       3y         8
r5  18000       2y         6
      Fee Duration  Discount
r1  20000       3y        10
r2  55000       4y        25
r3  60000       2y         5
r4  25000       3y         8
r5  18000       2y         6
"""
# Select Alternate rows By indeces
print(df.loc['r1':'r4':2])

# Select Alternate rows By Index
print(df.iloc[0:4:2])
"""
   Courses    Fee Duration  Discount
r1    B.Sc  20000       3y        10
r3     MBA  60000       2y         5
   Courses    Fee Duration  Discount
r1    B.Sc  20000       3y        10
r3     MBA  60000       2y         5
"""
# Select Alternate Columns between two Labels
print(df.loc[:,'Fee':'Discount':2])

# Select Alternate Columns between two Indexes
print(df.iloc[:,1:4:2])
"""
      Fee  Discount
r1  20000        10
r2  55000        25
r3  60000         5
r4  25000         8
r5  18000         6
      Fee  Discount
r1  20000        10
r2  55000        25
r3  60000         5
r4  25000         8
r5  18000         6
"""

# Using Conditions
print(df.loc[df['Fee'] >= 50000])

print(df.iloc[list(df['Fee'] >= 50000)])
"""
   Courses    Fee Duration  Discount
r2    B.E.  55000       4y        25
r3     MBA  60000       2y         5
   Courses    Fee Duration  Discount
r2    B.E.  55000       4y        25
r3     MBA  60000       2y         5
"""

column_names = list(df.columns.values)

# Get the list of all column names from headers
column_names = df.columns.values.tolist()

# Using list(df) to get the column headers as a list
column_names = list(df.columns)

# Using list(df) to get the list of all Column Names
column_names = list(df)

# Dataframe show all columns sorted list
column_names = sorted(df)

# Get all Column Header Labels as List
for column_headers in df.columns:
    print(column_headers)

"""
Courses
Fee
Duration
Discount
"""

column_names = df.keys().values.tolist()

# Get all numeric columns
numeric_columns = df._get_numeric_data().columns.values.tolist()

# Simple Pandas Numeric Columns Code
numeric_columns = df.dtypes[df.dtypes == "int64"].index.values.tolist()
print(numeric_columns) # ['Fee', 'Discount']

# Using map() function to combine two columns of text
df["Period"] = df["Courses"].map(str) + " " + df["Duration"]
print(df)
"""
  Courses    Fee Duration  Discount    Period
r1    B.Sc  20000       3y        10   B.Sc 3y
r2    B.E.  55000       4y        25   B.E. 4y
r3     MBA  60000       2y         5    MBA 2y
r4   B.COM  25000       3y         8  B.COM 3y
r5      BA  18000       2y         6     BA 2y
"""

# Using + operator to combine two columns
df["Period"] = df['Courses'].astype(str) +"-"+ df["Duration"]
print(df)

# Using apply() method to combine two columns of text
df["Period"] = df[["Courses", "Duration"]].apply("-".join, axis=1)
print(df)

# Using DataFrame.agg() to combine two columns of text
df["period"] = df[['Courses', 'Duration']].agg('-'.join, axis=1)
print(df)
"""
   Courses    Fee Duration  Discount    Period    period
r1    B.Sc  20000       3y        10   B.Sc-3y   B.Sc-3y
r2    B.E.  55000       4y        25   B.E.-4y   B.E.-4y
r3     MBA  60000       2y         5    MBA-2y    MBA-2y
r4   B.COM  25000       3y         8  B.COM-3y  B.COM-3y
r5      BA  18000       2y         6     BA-2y     BA-2y

"""

# Using Series.str.cat() function
df["Period"] = df["Courses"].str.cat(df["Duration"], sep = "-")
print(df)

# Using DataFrame.apply() and lambda function
df["Period"] = df[["Courses", "Duration"]].apply(lambda x: "-".join(x), axis =1)
print(df)

# Using map() function to combine two columns of text
df["Period"] = df["Courses"].map(str) + "-" + df["Duration"]
print(df)

Happy DataFraming in Pandas!!!!

Ref : https://sparkbyexamples.com/pandas/pandas-get-column-names/

AMET-SOLID

Monday, 28 March 2022

Pandas#03

Pandas ...

No comments:

Post a Comment

Work Diary - 2025

Happy open and Distance Learning!

Blog Archive