📌  相关文章
📜  在Python中查找 Pandas 数据框中元素的位置

📅  最后修改于: 2022-05-13 01:55:27.767000             🧑  作者: Mango

在Python中查找 Pandas 数据框中元素的位置

在本文中,我们将了解如何使用用户定义的函数来查找数据框中元素的位置。让我们首先使用列表字典创建一个简单的数据框,例如列名是:'Name'、'Age'、'City'和'Section'。

Python3
# Import pandas library
import pandas as pd
 
# List of tuples
students = [('Ankit', 23, 'Delhi', 'A'),
            ('Swapnil', 22, 'Delhi', 'B'),
            ('Aman', 22, 'Dehradun', 'A'),
            ('Jiten', 22, 'Delhi', 'A'),
            ('Jeet', 21, 'Mumbai', 'B')
            ]
 
# Creating Dataframe object
df = pd.DataFrame(students, columns =['Name', 'Age', 'City', 'Section'])
 
df


Python3
# Import pandas library
import pandas as pd
 
# List of tuples
students = [('Ankit', 23, 'Delhi', 'A'),
            ('Swapnil', 22, 'Delhi', 'B'),
            ('Aman', 22, 'Dehradun', 'A'),
            ('Jiten', 22, 'Delhi', 'A'),
            ('Jeet', 21, 'Mumbai', 'B')
            ]
 
# Creating Dataframe object
df = pd.DataFrame(students, columns =['Name', 'Age', 'City', 'Section'])
 
# This function will return a list of
# positions where element exists
# in the dataframe.
def getIndexes(dfObj, value):
     
    # Empty list
    listOfPos = []
     
    # isin() method will return a dataframe with
    # boolean values, True at the positions   
    # where element exists
    result = dfObj.isin([value])
     
    # any() method will return
    # a boolean series
    seriesObj = result.any()
 
    # Get list of column names where
    # element exists
    columnNames = list(seriesObj[seriesObj == True].index)
    
    # Iterate over the list of columns and
    # extract the row index where element exists
    for col in columnNames:
        rows = list(result[col][result[col] == True].index)
 
        for row in rows:
            listOfPos.append((row, col))
             
    # This list contains a list tuples with
    # the index of element in the dataframe
    return listOfPos
 
# Calling getIndexes() function to get
# the index positions of all occurrences
# of 22 in the dataframe
listOfPositions = getIndexes(df, 22)
 
print('Index positions of 22 in Dataframe : ')
 
# Printing the position
for i in range(len(listOfPositions)):
    print( listOfPositions[i])


Python3
# Import pandas library
import pandas as pd
 
# List of tuples
students = [('Ankit', 23, 'Delhi', 'A'),
            ('Swapnil', 22, 'Delhi', 'B'),
            ('Aman', 22, 'Dehradun', 'A'),
            ('Jiten', 22, 'Delhi', 'A'),
            ('Jeet', 21, 'Mumbai', 'B')
            ]
 
# Creating Dataframe object
df = pd.DataFrame(students, columns =['Name', 'Age', 'City', 'Section'])
 
# This function will return a
# list of positions where
# element exists in dataframe
def getIndexes(dfObj, value):
     
    # Empty list
    listOfPos = []
     
 
    # isin() method will return a dataframe with
    # boolean values, True at the positions   
    # where element exists
    result = dfObj.isin([value])
     
    # any() method will return
    # a boolean series
    seriesObj = result.any()
 
    # Get list of columns where element exists
    columnNames = list(seriesObj[seriesObj == True].index)
    
    # Iterate over the list of columns and
    # extract the row index where element exists
    for col in columnNames:
        rows = list(result[col][result[col] == True].index)
 
        for row in rows:
            listOfPos.append((row, col))
             
    # This list contains a list tuples with
    # the index of element in the dataframe
    return listOfPos
 
# Create a list which contains all the elements
# whose index position you need to find
listOfElems = [22, 'Delhi']
 
# Using dictionary comprehension to find
# index positions of multiple elements
# in dataframe
dictOfPos = {elem: getIndexes(df, elem) for elem in listOfElems}
 
print('Position of given elements in Dataframe are : ')
 
# Looping through key, value pairs
# in the dictionary
for key, value in dictOfPos.items():
    print(key, ' : ', value)


输出:

数据框

示例 1:在数据框中查找元素的位置。

Python3

# Import pandas library
import pandas as pd
 
# List of tuples
students = [('Ankit', 23, 'Delhi', 'A'),
            ('Swapnil', 22, 'Delhi', 'B'),
            ('Aman', 22, 'Dehradun', 'A'),
            ('Jiten', 22, 'Delhi', 'A'),
            ('Jeet', 21, 'Mumbai', 'B')
            ]
 
# Creating Dataframe object
df = pd.DataFrame(students, columns =['Name', 'Age', 'City', 'Section'])
 
# This function will return a list of
# positions where element exists
# in the dataframe.
def getIndexes(dfObj, value):
     
    # Empty list
    listOfPos = []
     
    # isin() method will return a dataframe with
    # boolean values, True at the positions   
    # where element exists
    result = dfObj.isin([value])
     
    # any() method will return
    # a boolean series
    seriesObj = result.any()
 
    # Get list of column names where
    # element exists
    columnNames = list(seriesObj[seriesObj == True].index)
    
    # Iterate over the list of columns and
    # extract the row index where element exists
    for col in columnNames:
        rows = list(result[col][result[col] == True].index)
 
        for row in rows:
            listOfPos.append((row, col))
             
    # This list contains a list tuples with
    # the index of element in the dataframe
    return listOfPos
 
# Calling getIndexes() function to get
# the index positions of all occurrences
# of 22 in the dataframe
listOfPositions = getIndexes(df, 22)
 
print('Index positions of 22 in Dataframe : ')
 
# Printing the position
for i in range(len(listOfPositions)):
    print( listOfPositions[i])

输出 :

数据框中元素的索引

现在让我们了解函数getIndexes() 的工作原理。 isin()、dataframe/series.any() 接受值并返回带有布尔值的数据帧。这个布尔数据框的大小与第一个原始数据框的大小相似。在数据框中存在给定元素的位置,该值为 True,否则为 False。然后找到包含元素 22 的列的名称。我们可以通过在包含 True 的布尔数据框中获取列的名称来完成此操作。现在在布尔数据框中,我们遍历每个选定的列,对于每一列,我们找到 True 的行。现在,这些 True 存在的列名和行索引的组合是数据帧中 22 的索引位置。这就是 getIndexes() 如何找到给定元素的确切索引位置并以(行,列)元组的形式存储每个位置。最后,它返回一个元组列表,表示它在数据帧中的索引位置。
示例 2:在 DataFrame 中查找多个元素的位置。

Python3

# Import pandas library
import pandas as pd
 
# List of tuples
students = [('Ankit', 23, 'Delhi', 'A'),
            ('Swapnil', 22, 'Delhi', 'B'),
            ('Aman', 22, 'Dehradun', 'A'),
            ('Jiten', 22, 'Delhi', 'A'),
            ('Jeet', 21, 'Mumbai', 'B')
            ]
 
# Creating Dataframe object
df = pd.DataFrame(students, columns =['Name', 'Age', 'City', 'Section'])
 
# This function will return a
# list of positions where
# element exists in dataframe
def getIndexes(dfObj, value):
     
    # Empty list
    listOfPos = []
     
 
    # isin() method will return a dataframe with
    # boolean values, True at the positions   
    # where element exists
    result = dfObj.isin([value])
     
    # any() method will return
    # a boolean series
    seriesObj = result.any()
 
    # Get list of columns where element exists
    columnNames = list(seriesObj[seriesObj == True].index)
    
    # Iterate over the list of columns and
    # extract the row index where element exists
    for col in columnNames:
        rows = list(result[col][result[col] == True].index)
 
        for row in rows:
            listOfPos.append((row, col))
             
    # This list contains a list tuples with
    # the index of element in the dataframe
    return listOfPos
 
# Create a list which contains all the elements
# whose index position you need to find
listOfElems = [22, 'Delhi']
 
# Using dictionary comprehension to find
# index positions of multiple elements
# in dataframe
dictOfPos = {elem: getIndexes(df, elem) for elem in listOfElems}
 
print('Position of given elements in Dataframe are : ')
 
# Looping through key, value pairs
# in the dictionary
for key, value in dictOfPos.items():
    print(key, ' : ', value)

输出 :

数据框中元素的索引