# -*- coding: utf-8 -*-
Created on Tue Feb 15 15:02:26 2023
@author: Neal
shareholder information of a stock are listed in :
And you are requried to collect the tables of shareholder information for stocks in "select_stocks"
with following 7 columns, and then perform the analysis to answer the questions.
1. 'rank'-股票代码
2. 'rank'-排名
3. 'org_name'-股东名称
4. 'shares'-持股数量(万股)
5. 'percentage'-持股比例
6. 'changes'-持股变化(万股)
7. 'nature'-股本性质
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
fake_header = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36",
"Accept-Encoding":"gzip, deflate, sdch",
data_file= './data/stock_shareholders.csv'
select_stocks = ('601186','601169','601166','601088','601006','600523',
print('There are', len(select_stocks), 'stocks in select_stocks')
base_url = 'https://q.stock.sohu.com/cn/{}/ltgd.shtml'
row_count = 0
#create a list to store the crawled share-holdoing records
for stock in select_stocks:#process stock one by one
#prepare the request webpage with desired parameters
url = base_url.format(stock)
print("Now we are crawling stock",stock)
#send http request with fake http header
response = requests.get(url,headers = fake_header)
if response.status_code == 200:
response.encoding = 'gbk'#++insert your code here++ look for charset in html
root = BeautifulSoup(response.text,"html.parser")
# search the table storing the shareholder information
table = root.select('body > div.str2Column.clearfix > div.str2ColumnR > div.BIZ_innerMain > div.BIZ_innerBoard > div > div:nth-child(2) > table tr:nth-child(2) > td:nth-child(2) > a')#++insert your code here++
# list all rows the table, i.e., tr tags
rows = #++insert your code here++
for row in rows: #iterate rows
record=[stock,]# define a record with stock pre-filled and then store columns of the row/record
# list all columns of the row , i.e., td tags
columns = #++insert your code here++
for col in columns: #iterate colums
if len(record) == 7:# if has valid columns, save the record to list results
#++insert your code here++ to add single "record" to list of "records"
print('Crawled and saved {} records of shareholder information of select_stocks to{}'.format(row_count,data_file) )
sharehold_records_df = pd.DataFrame(columns=['stock', 'rank','org_name','shares','percentage','changes','nature'], data=results)
print("List of shareholers are \n", sharehold_records_df['org_name'])
++insert your code here++ to answer Q3-1, Q3-2 and Q3-3