#库的导入#
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#读取并查看文件一#
df1 = pd.read_csv("pollution_us_5city_2006_2010_SO2.csv")
print(df1[:4]) #查看前五行,或者print(df.head())#
print(df1.tail(2)) #查看后两行#
df1 = df1.drop(columns=["State Code", "County Code", "Site Num", "Address"])#删除操作#
df1.loc[df1['SO2 AQI'].isnull(), 'SO2 AQI'] = int(df1['SO2 AQI'].mean())#填充均值#
df1.to_excel('pollution_us_5city_2006_2010_SO2.xlsx', index = False)#导出#
#查看文件二#
df2 = pd.read_excel("pollution_us_5city_2006_2010_SO2.xlsx")
ny_df = df2.loc[df2['City'] == 'New York', ] #选取#
ny_df.to_csv('pollution_us_NewYork_2006_2010_SO2.txt', sep = ' ', index = False)
#查看文件三#
df3 = pd.read_csv("pollution_us_NewYork_2006_2010_SO2.txt", sep = ' ', parse_dates = ['Date Local'])
date_df3 = df3[(df3['Date Local'] >= '2007-01-01') & (df3['Date Local'] <= '2009-12-31')]
date_df3.to_csv('pollution_us_NewYork_2007_2009_SO2.csv', index=False)
#查看文件四#
df4 = pd.read_csv("pollution_us_NewYork_2007_2009_SO2.csv")
df4['Date Local'] = pd.to_datetime(df4['Date Local'])# 确保Date字段是日期类型
df4.set_index('Date Local', inplace=True)# 设置Date为索引
#查看文件四#
df4 = pd.read_csv("pollution_us_NewYork_2007_2009_SO2.csv")
df4['Date Local'] = pd.to_datetime(df4['Date Local'])# 确保Date字段是日期类型
df4.set_index('Date Local', inplace=True)# 设置Date为索引
_mean = df4.drop(columns=["ID", "State", "County", "City", "SO2 Units", "SO2 1st Max Value", "SO2 1st Max Hour", "SO2 AQI"])
_max = df4.drop(columns=["ID", "State", "County", "City", "SO2 Units", "SO2 1st Max Value", "SO2 Mean", "SO2 AQI"])
_aqi = df4.drop(columns=["ID", "State", "County", "City", "SO2 Units", "SO2 1st Max Value", "SO2 1st Max Hour", "SO2 Mean"])
monthly_mean = _mean.resample('ME').mean()# 计算月均值(有问题)
monthly_max = _max.resample('ME').mean()
monthly_aqi = _aqi.resample('ME').mean()
plt.figure(figsize=(12, 8))
plt.plot(monthly_mean.index, monthly_mean["SO2 Mean"], color="red", label="SO2 Mean")
plt.plot(monthly_max.index, monthly_max["SO2 1st Max Hour"], color="green", label="SO2 1st Max Hour")
plt.plot(monthly_aqi.index, monthly_aqi, color="blue", label="SO2 AQI")
plt.xticks(rotation=90)
plt.xlabel("Year - Month")
plt.ylabel("Value")
plt.title("SO2 Statistics in New York from 2007 to 2009")
plt.legend()
plt.show()