|

楼主 |
发表于 2024-12-30 21:59:28
|
显示全部楼层
这是我目前写出的代码,请各位前辈给点意见
- #库的导入#
- import numpy as np
- import pandas as pd
- import matplotlib.pyplot as plt
- #读取并查看文件一#
- df1 = pd.read_csv("pollution_us_5city_2006_2010_SO2.csv")
- print(df1[:4]) #查看前五行,或者print(df.head())#
- print(df1.tail(2)) #查看后两行#
- df1 = df1.drop(columns=["State Code", "County Code", "Site Num", "Address"])#删除操作#
- df1.loc[df1['SO2 AQI'].isnull(), 'SO2 AQI'] = int(df1['SO2 AQI'].mean())#填充均值#
- df1.to_excel('pollution_us_5city_2006_2010_SO2.xlsx', index = False)#导出#
- #查看文件二#
- df2 = pd.read_excel("pollution_us_5city_2006_2010_SO2.xlsx")
- ny_df = df2.loc[df2['City'] == 'New York', ] #选取#
- ny_df.to_csv('pollution_us_NewYork_2006_2010_SO2.txt', sep = ' ', index = False)
- #查看文件三#
- df3 = pd.read_csv("pollution_us_NewYork_2006_2010_SO2.txt", sep = ' ', parse_dates = ['Date Local'])
- date_df3 = df3[(df3['Date Local'] >= '2007-01-01') & (df3['Date Local'] <= '2009-12-31')]
- date_df3.to_csv('pollution_us_NewYork_2007_2009_SO2.csv', index=False)
- #查看文件四#
- df4 = pd.read_csv("pollution_us_NewYork_2007_2009_SO2.csv")
- df4['Date Local'] = pd.to_datetime(df4['Date Local'])# 确保Date字段是日期类型
- df4.set_index('Date Local', inplace=True)# 设置Date为索引
- #查看文件四#
- df4 = pd.read_csv("pollution_us_NewYork_2007_2009_SO2.csv")
- df4['Date Local'] = pd.to_datetime(df4['Date Local'])# 确保Date字段是日期类型
- df4.set_index('Date Local', inplace=True)# 设置Date为索引
- _mean = df4.drop(columns=["ID", "State", "County", "City", "SO2 Units", "SO2 1st Max Value", "SO2 1st Max Hour", "SO2 AQI"])
- _max = df4.drop(columns=["ID", "State", "County", "City", "SO2 Units", "SO2 1st Max Value", "SO2 Mean", "SO2 AQI"])
- _aqi = df4.drop(columns=["ID", "State", "County", "City", "SO2 Units", "SO2 1st Max Value", "SO2 1st Max Hour", "SO2 Mean"])
- monthly_mean = _mean.resample('ME').mean()# 计算月均值(有问题)
- monthly_max = _max.resample('ME').mean()
- monthly_aqi = _aqi.resample('ME').mean()
- plt.figure(figsize=(12, 8))
- plt.plot(monthly_mean.index, monthly_mean["SO2 Mean"], color="red", label="SO2 Mean")
- plt.plot(monthly_max.index, monthly_max["SO2 1st Max Hour"], color="green", label="SO2 1st Max Hour")
- plt.plot(monthly_aqi.index, monthly_aqi, color="blue", label="SO2 AQI")
- plt.xticks(rotation=90)
- plt.xlabel("Year - Month")
- plt.ylabel("Value")
- plt.title("SO2 Statistics in New York from 2007 to 2009")
- plt.legend()
- plt.show()
复制代码 |
|