|  | 
 
| 
x
马上注册,结交更多好友,享用更多功能^_^您需要 登录 才可以下载或查看,没有账号?立即注册  
 D:\PythonProject\pythonProject\Scripts\python.exe E:\qycache\xuexi\pythonProject\房地产\process(版本2.3).py
 D:\PythonProject\pythonProject\lib\site-packages\numpy\_distributor_init.py:30: UserWarning: loaded more than 1 DLL from .libs:
 D:\PythonProject\pythonProject\lib\site-packages\numpy\.libs\libopenblas.EL2C6PLE4ZYW3ECEVIV3OXXGRN2NRFM2.gfortran-win_amd64.dll
 D:\PythonProject\pythonProject\lib\site-packages\numpy\.libs\libopenblas64__v0.3.21-gcc_10_3_0.dll
 warnings.warn("loaded more than 1 DLL from .libs:"
 Traceback (most recent call last):
 File "D:\PythonProject\pythonProject\lib\site-packages\pandas\core\indexes\base.py", line 3652, in get_loc
 return self._engine.get_loc(casted_key)
 File "pandas\_libs\index.pyx", line 147, in pandas._libs.index.IndexEngine.get_loc
 File "pandas\_libs\index.pyx", line 176, in pandas._libs.index.IndexEngine.get_loc
 File "pandas\_libs\hashtable_class_helper.pxi", line 7080, in pandas._libs.hashtable.PyObjectHashTable.get_item
 File "pandas\_libs\hashtable_class_helper.pxi", line 7088, in pandas._libs.hashtable.PyObjectHashTable.get_item
 KeyError: '状态_x'
 
 The above exception was the direct cause of the following exception:
 
 Traceback (most recent call last):
 File "E:\qycache\xuexi\pythonProject\房地产\process(版本2.3).py", line 59, in <module>
 df_changed = df_duplicates[df_duplicates['状态_x'] != df_duplicates['状态_y']]
 File "D:\PythonProject\pythonProject\lib\site-packages\pandas\core\frame.py", line 3761, in __getitem__
 indexer = self.columns.get_loc(key)
 File "D:\PythonProject\pythonProject\lib\site-packages\pandas\core\indexes\base.py", line 3654, in get_loc
 raise KeyError(key) from err
 KeyError: '状态_x'
 
 进程已结束,退出代码1
 
 
 
 
 
 # coding=utf-8
 # import pandas as pd
 # may_df = pd.read_csv('may.csv')
 # june_df = pd.read_csv('june.csv')
 #
 # merged = pd.merge(may_df,june_df,on="states",how = 'outer',suffixes=('_ahead','behind'),indicator=True)
 # # find change
 # changed_rows = merged[merged['_merge']!='both']
 # # find new lines
 # new_rows = merged[merged['_merge']=='right_only']
 # print(new_rows)
 #
 
 import pandas as pd
 from tqdm import tqdm
 
 # # 读取两张表
 # df1 = pd.read_csv('may.csv')
 # df2 = pd.read_csv('june.csv')
 #
 # # 筛选出location、project_name、building、unit、high和room相同的行
 # cols = ['location', 'project_name', 'building', 'unit', 'high', 'room']
 # df1_grouped = df1.groupby(cols).first().reset_index()
 # df2_grouped = df2.groupby(cols).first().reset_index()
 #
 # # 将df1_grouped和df2_grouped合并,并比较states列的值
 # df = pd.DataFrame(columns=df1.columns)
 # for index, row1 in tqdm(df1_grouped.iterrows(), total=len(df1_grouped)):
 #     for _, row2 in df2_grouped.iterrows():
 #         if row1['location'] == row2['location'] and \
 #                 row1['project_name'] == row2['project_name'] and \
 #                 row1['building'] == row2['building'] and \
 #                 row1['unit'] == row2['unit'] and \
 #                 row1['high'] == row2['high'] and \
 #                 row1['room'] == row2['room'] and \
 #                 row1['states'] != row2['states']:
 #             df = df.append(row1)
 #
 # # 输出结果
 # print(df)
 
 import pandas as pd
 from tqdm import tqdm
 
 # 读取两张表
 df1 = pd.read_excel('D:\一介书生资料库\爬虫:八爪鱼\各市县整体市场\shujuchuli\七月三亚.xlsx')
 df2 = pd.read_excel('D:\一介书生资料库\爬虫:八爪鱼\各市县整体市场\shujuchuli\八月三亚.xlsx')
 
 # 筛选出location、project_name、building、unit、high和room相同的行,并选择指定的列
 cols = ['区域', '项目名称', '楼盘', '单元', '楼层', '房间', '建筑面积', '房型', '挂牌清水价', '挂牌装修价']
 df1_grouped = df1.groupby(cols).first().reset_index()
 df2_grouped = df2.groupby(cols).first().reset_index()
 
 # 合并两张表格
 df_merged = pd.concat([df1_grouped, df2_grouped])
 
 # 找到发生变化的行
 df_duplicates = df_merged[df_merged.duplicated(cols, keep=False)]
 df_changed = df_duplicates[df_duplicates['状态_x'] != df_duplicates['状态_y']]
 
 # 输出结果
 df_changed.to_excel('D:\pydata\data.xlsx', index=False)
 print(df_changed)
 
 
 
 
 
 
 
 | 
 |