1.str.contains(),类似re.search(),检查是否包含特定字符串
import pandas as pd# 示例数据 data = {'col1': ['apple', 'banana', 'cherry', 'pineapple', 'grape']} df = pd.DataFrame(data)# 查找包含字母 'a' 的行 pattern = r'a' df['contains_a'] = df['col1'].str.contains(pattern)print(df)col1 contains_a 0 apple True 1 banana True 2 cherry False 3 pineapple True 4 grape True
2.str.match(),类似re.match(),检查是否以特定字符串开始
import pandas as pd# 示例数据 data = {'col1': ['apple', 'banana', 'cherry', 'pineapple', 'grape']} df = pd.DataFrame(data)# 完全匹配以 'a' 开头的行 pattern = r'^a' # 匹配以 'a' 开头的字符串 df['starts_with_a'] = df['col1'].str.match(pattern)print(df)col1 starts_with_a 0 apple True 1 banana False 2 cherry False 3 pineapple False 4 grape False
3.str.replace(),类似re.sub(),查找特定字符串,并替换
import pandas as pd# 示例数据 data = {'col1': ['apple', 'banana', 'cherry', 'pineapple', 'grape']} df = pd.DataFrame(data)# 替换 'a' 为 '@' df['replaced'] = df['col1'].str.replace(r'a', '@', regex=True)print(df)col1 replaced 0 apple @pple 1 banana b@n@n@ 2 cherry cherry 3 pineapple pine@pple 4 grape gr@pe
4.str.extract(),类似match.groups(),查找特定字符串,并提取出来
import pandas as pd# 示例数据 data = {'col1': ['apple123', 'banana456', 'cherry789', 'pineapple000', 'grape987']} df = pd.DataFrame(data)# 提取数字部分 pattern = r'(\d+)' # 匹配数字 df['numbers'] = df['col1'].str.extract(pattern)print(df)col1 numbers 0 apple123 123 1 banana456 456 2 cherry789 789 3 pineapple000 000 4 grape987 987
5.str.findall(),类似re.findall(),查找所有匹配的字符串,返回列表
import pandas as pd# 示例数据 data = {'col1': ['apple123', 'banana456', 'cherry789', 'pineapple000', 'grape987']} df = pd.DataFrame(data)# 查找所有数字 pattern = r'(\d+)' # 匹配数字 df['all_numbers'] = df['col1'].str.findall(pattern)print(df)col1 all_numbers 0 apple123 [123] 1 banana456 [456] 2 cherry789 [789] 3 pineapple000 [000] 4 grape987 [987]