| 
 | 
 
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册  
 
x
 
# coding=utf-8 
# import pandas as pd 
# may_df = pd.read_csv('may.csv') 
# june_df = pd.read_csv('june.csv') 
# 
# merged = pd.merge(may_df,june_df,on="states",how = 'outer',suffixes=('_ahead','behind'),indicator=True) 
# # find change 
# changed_rows = merged[merged['_merge']!='both'] 
# # find new lines 
# new_rows = merged[merged['_merge']=='right_only'] 
# print(new_rows) 
# 
 
import pandas as pd 
from tqdm import tqdm 
 
# # 读取两张表 
# df1 = pd.read_csv('may.csv') 
# df2 = pd.read_csv('june.csv') 
# 
# # 筛选出location、project_name、building、unit、high和room相同的行 
# cols = ['location', 'project_name', 'building', 'unit', 'high', 'room'] 
# df1_grouped = df1.groupby(cols).first().reset_index() 
# df2_grouped = df2.groupby(cols).first().reset_index() 
# 
# # 将df1_grouped和df2_grouped合并,并比较states列的值 
# df = pd.DataFrame(columns=df1.columns) 
# for index, row1 in tqdm(df1_grouped.iterrows(), total=len(df1_grouped)): 
#     for _, row2 in df2_grouped.iterrows(): 
#         if row1['location'] == row2['location'] and \ 
#                 row1['project_name'] == row2['project_name'] and \ 
#                 row1['building'] == row2['building'] and \ 
#                 row1['unit'] == row2['unit'] and \ 
#                 row1['high'] == row2['high'] and \ 
#                 row1['room'] == row2['room'] and \ 
#                 row1['states'] != row2['states']: 
#             df = df.append(row1) 
# 
# # 输出结果 
# print(df) 
 
import pandas as pd 
from tqdm import tqdm 
 
# 读取两张表 
df1 = pd.read_excel('D:\一介书生资料库\爬虫:八爪鱼\各市县整体市场\shujuchuli\A.xlsx') 
df2 = pd.read_excel('D:\一介书生资料库\爬虫:八爪鱼\各市县整体市场\shujuchuli\B.xlsx') 
 
# 筛选出location、project_name、building、unit、high和room相同的行   ,'got_date' 
cols = ['备案名', '楼栋', '单元号', '房号', '面积', '清水价', '装修价', '物业类型'] 
df1_grouped = df1.groupby(cols).first().reset_index() 
df2_grouped = df2.groupby(cols).first().reset_index() 
 
# 合并两张表,并筛选出states有变化的行 
df_merged = pd.merge(df1_grouped, df2_grouped, on=cols, suffixes=('_1', '_2')) 
df = df_merged.loc[df_merged['颜色_1'] != df_merged['颜色_2']] 
 
df.to_excel('D:\pydata\data.xlsx', index=False) 
# 输出结果 
print(df) 
 
 
 
 
D:\PythonProject\pythonProject\Scripts\python.exe "E:\qycache\xuexi\pythonProject\房地产\process(3).py"  
D:\PythonProject\pythonProject\lib\site-packages\numpy\_distributor_init.py:30: UserWarning: loaded more than 1 DLL from .libs: 
D:\PythonProject\pythonProject\lib\site-packages\numpy\.libs\libopenblas.EL2C6PLE4ZYW3ECEVIV3OXXGRN2NRFM2.gfortran-win_amd64.dll 
D:\PythonProject\pythonProject\lib\site-packages\numpy\.libs\libopenblas64__v0.3.21-gcc_10_3_0.dll 
  warnings.warn("loaded more than 1 DLL from .libs:" 
Traceback (most recent call last): 
  File "E:\qycache\xuexi\pythonProject\房地产\process(3).py", line 55, in <module> 
    df_merged = pd.merge(df1_grouped, df2_grouped, on=cols, suffixes=('_1', '_2')) 
  File "D:\PythonProject\pythonProject\lib\site-packages\pandas\core\reshape\merge.py", line 144, in merge 
    op = _MergeOperation( 
  File "D:\PythonProject\pythonProject\lib\site-packages\pandas\core\reshape\merge.py", line 737, in __init__ 
    self._maybe_coerce_merge_keys() 
  File "D:\PythonProject\pythonProject\lib\site-packages\pandas\core\reshape\merge.py", line 1389, in _maybe_coerce_merge_keys 
    raise ValueError(msg) 
ValueError: You are trying to merge on object and float64 columns. If you wish to proceed you should use pd.concat 
 
进程已结束,退出代码1 
 |   
 
 
 
 |