a = [1,0,5,4,8,9]
for i in range(0,(len(a)-1)//2):
temp = a[i]
a[i] = a[len(a)-i-1]
a[len(a)-i-1] = temp
print(a)
数据准备
data = [[0, 0, 'start', 0.712], [0, 0, 'end', 1.52], [0, 1, 'start', 3.14], [0, 1, 'end', 4.12], [1, 0, 'start', 0.55], [1, 0, 'end', 1.55], [1, 1, 'start', 0.43], [1, 1, 'end', 1.42], [2, 0, 'start', 4.1], [2, 0, 'end', 4.512], [2, 1, 'start', 2.5], [2, 1, 'end', 5]]
activity = pd.DataFrame(data, columns=['machine_id', 'process_id', 'activity_type', 'timestamp']).astype({'machine_id':'Int64', 'process_id':'Int64', 'activity_type':'object', 'timestamp':'Float64'})
import pandas as pd
def get_average_time(activity: pd.DataFrame) -> pd.DataFrame:
#按照机器id和进程id进行分组,然后计算时间戳的差值
activity['processing_time'] = activity.groupby(['machine_id','process_id'])['timestamp'].diff()
#去掉空值
activity = activity.dropna()
return activity.groupby('machine_id',as_index=False)['processing_time'].mean().round(3)
查找每个月和每个国家/地区的事务数及其总金额、已批准的事务数及其总金额
数据准备:
import pandas as pd
data = [[121, 'US', 'approved', 1000, '2018-12-18'], [122, 'US', 'declined', 2000, '2018-12-19'], [123, 'US', 'approved', 2000, '2019-01-01'], [124, 'DE', 'approved', 2000, '2019-01-07']]
transactions = pd.DataFrame(data, columns=['id', 'country', 'state', 'amount', 'trans_date']).astype({'id':'Int64', 'country':'object', 'state':'object', 'amount':'Int64', 'trans_date':'datetime64[ns]'})
对DataFrame进行处理
#使用DataFrame自带的日期函数对日期进行格式化
transactions['month'] = transactions['trans_date'].dt.strftime('%Y-%m')
# 按城市和月份分组
df = transactions.groupby(['country','month']).agg(
#agg函数中,指定的列可以理解成是DataFrame格式的
trans_count=('id','count'),#指定id列,方法:计数
approved_count = ('state',lambda x:(x=='approved').sum()),#对返回的True、False计数,True为1,False为0
trans_total_amount = ('amount','sum'),
#对amount进行过滤求和
approved_total_amount = ('amount',lambda x:sum(x[transactions['state']=='approved']))
).reset_index()
print(df)
# print((transactions['state'] == 'approved'))
数据准备:
import pandas as pd
from datetime import datetime
from collections import Counter
data = [[1, 100], [2, 200], [3, 300]]
employee = pd.DataFrame(data, columns=['id', 'salary']).astype({'id':'int64', 'salary':'int64'})
id:学生id
salary:学生分数
取出排名第二的学生及分数,如果没有排名第二的则返回Null
import pandas as pd
def second_highest_salary(employee: pd.DataFrame) -> pd.DataFrame:
#按降序排名
employee['rank'] = employee[['salary']].rank(method='dense',ascending=False)
if len(employee[['salary']].drop_duplicates()) <= 1:
return pd.DataFrame({'SecondHighestSalary':[None]})
else:
return employee[employee['rank'] == 2][['salary']].rename(columns={'salary':'SecondHighestSalary'})
1.首先需把入参转换成日期类型
2.使用Python自带的calendar库实现需求
from datetime import datetime
import calendar
import re
#转换成日期类型
def date_transfer(date):
if isinstance(date, str) and re.search('\d{4}-\d{2}-\d{2}', date):
#转换成日期类型
date = datetime.strptime(date, '%Y-%m-%d')
return date
if isinstance(date, str) and re.search('\d{4}\d{2}\d{2}', date):
#转换成日期类型
date = datetime.strptime(date, '%Y%m%d')
return date
if isinstance(date, datetime):
return date
def get_last_day(date):
date_time = date_transfer(date)
year = date_time.year
month = date_time.month
"""
calendar.monthrange:返回一个元组(x,y)。x:此月第一天星期码。0是星期一,6是星期天;y:此月最后一天
"""
last_day = calendar.monthrange(year, month)[1]
return ('-'.join([str(year), str(month), str(last_day)]),last_day)
因篇幅问题不能全部显示,请点此查看更多更全内容