| 
 | 
 
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册  
 
x
 
 
我想请问一下,我这个代码使用pycharm写的,在pycharm上也能跑。但是为什么用python3.7自带的idle却打不开 
代码在附件,因为有一个csv文件,所以只能打包发 呜呜呜
在 main.py 中的注释中,有些看起来像斜体的字母,在 idle 中不支持,我把他们替换后就可以打开了 
- import random
 
 - from copy import deepcopy
 
 - from itertools import groupby
 
 - from statistics import median
 
  
- # This function should have one parameter, a file name (including, if necessary, its path). 
 
 - # The function should read this CSV file and return a matrix (list of lists) in which a row 
 
 - # in the file is a row in the matrix. If the file has N rows, the matrix should have N 
 
 - # elements (each element is a list). Notice that in CSV files a comma separates columns 
 
 - # (CSV = comma separated values). You can assume the file will contain solely numeric values 
 
 - # (and commas, of course) with no quotes.
 
 - def load_from_csv(fileName):
 
 -     # Use list to save matrix
 
 -     res = []
 
 -     # Use a loop structure to read each line in the data file.
 
 -     with open(fileName) as f:
 
 -         lines = f.readlines()
 
 -         for l in lines:
 
 -             res.append(l.strip().split(','))
 
 -     return res
 
  
 
- # This function should have two parameters, both of them lists. It should return the Manhattan 
 
 - # distance between the two lists. For details about this distance, read the appendix.
 
 - def get_distance(l1, l2):
 
 -     # Use a judgment structure to determine whether the length of list1 and list2 are the same
 
 -     if len(l1) != len(l2):
 
 -         print('Error! List1 and List2 are not the same length!')
 
 -         return -1
 
 -     else:
 
 -         d = 0
 
 -         # Use a loop structure look into each index of two lists
 
 -         for i in range(len(l1)):
 
 -             d += abs(l1[i] - l2[i])
 
 -         return d
 
  
 
- # This function should have two parameters, a matrix (list of lists) and a column number. It should 
 
 - # look into all the elements of the data matrix in this column number, and return the highest value.
 
 - def get_max(m, c):
 
 -     # Use a judgment structure to determine if the column number is less than the number of columns in the matrix
 
 -     if c >= len(m[0]):
 
 -         print('Error! The column number should be smaller than the number of columns in the matrix')
 
 -         return -1
 
 -     else:
 
 -         # Loop through each row of a matrix using a loop structure
 
 -         maxValue = m[0][c]
 
 -         for r in m:
 
 -             # Determine if the current value is greater than the maximum
 
 -             if r[c] > maxValue:
 
 -                 maxValue = r[c]
 
 -         return maxValue
 
  
- # This function should have two parameters, a matrix (list of lists) and a column number. It should 
 
 - # look into all the elements of the data matrix in this column number, and return the lowest value.
 
 - def get_min(m, c):
 
 -     # Use a judgment structure to determine if the column number is less than the number of columns in the matrix
 
 -     if c >= len(m[0]):
 
 -         print('Error! The column number should be smaller than the number of columns in the matrix')
 
 -         return -1
 
 -     else:
 
 -         # Loop through each row of a matrix using a loop structure
 
 -         minValue = m[0][c]
 
 -         for r in m:
 
 -             # Determines whether the current value is smaller than the minimum value
 
 -             if r[c] < minValue:
 
 -                 minValue = r[c]
 
 -         return minValue
 
  
- # This function should have two parameters, a matrix (list of lists) and a column number. It should 
 
 - # look into all the elements of the data matrix in this column number, and return the average of this column number.
 
 - def get_mean(m, c):
 
 -     # Use a judgment structure to determine if the column number is less than the number of columns in the matrix
 
 -     if c >= len(m[0]):
 
 -         print('Error! The column number should be smaller than the number of columns in the matrix')
 
 -         return -1
 
 -     else:
 
 -         # Loop through each row of a matrix using a loop structure
 
 -         mean = 0
 
 -         for r in m:
 
 -             mean += float(r[c])
 
 -         return mean/len(m)
 
  
- # This function should take one parameter, a matrix (list of lists). It should return a matrix containing 
 
 - # the standardised version of the matrix passed as a parameter. This function should somehow use the get_max 
 
 - # and get_min functions. For details on how to standardise a matrix, read the appendix.
 
 - def get_standardised_matrix(m):
 
 -     maxList = [ get_max(m,c) for c in range(len(m[0]))]
 
 -     minList = [ get_min(m,c) for c in range(len(m[0]))]
 
 -     meanList = [ get_mean(m,c) for c in range(len(m[0]))]
 
 -     lists = []
 
 -     for r in m:
 
 -         l = []
 
 -         for c, i in enumerate(r):
 
 -             l.append((float(i) - float(meanList[c]))/(float(maxList[c])-float(minList[c])))
 
 -         lists.append(l)
 
 -     return lists
 
  
 
- # This function should have two parameters: a matrix (list of lists), and a column number. It should return
 
 - #  the median of the values in the data matrix at the column number passed as a parameter. Details about 
 
 - # median can be easily found online, eg. https://en.wikipedia.org/wiki/Median.
 
 - def get_median(m, c):
 
 -     l = [float(r[c]) for r in m]
 
 -     return median(l)
 
  
 
- # This function should have three parameters: (i) a matrix (list of lists), (iii) the list S, (iii) the value 
 
 - # of K. This function should implement the Step 6 of the algorithm described in the appendix. It should return 
 
 - # a list containing K elements, c1, c2, ... , cK. Clearly, each of these elements is also a list.
 
 - def get_centroids(m, S, K):
 
 -     res = []
 
 -     for i in range(K):
 
 -         c = []
 
 -         # select those rows that have been assigned to cluster k.
 
 -         m_i = [x for index, x in enumerate(m) if S[index] == i]
 
 -         # Each element j of ckshould be equal to the median of the column D'j
 
 -         for j in range(len(m[0])):
 
 -             c.append(get_median(m_i, j))
 
 -         # Update ck. 
 
 -         res.append(c)
 
 -     return res
 
  
 
- # This function should have two parameters: a data matrix (list of lists) and the number of groups to be 
 
 - # created (K). This function follows the algorithm described in the appendix. It should return a list 
 
 - # S (defined in the appendix). This function should use the other functions you wrote as much as possible. 
 
 - # Do not keep repeating code you already wrote.
 
 - def get_groups(D, K):
 
 -     S = []
 
 -     # Select K different rows from the data matrix at random.
 
 -     c_list = deepcopy(random.sample(D, K))
 
 -     while True:
 
 -         S_temp = []
 
 -         for i in range(len(D)):
 
 -             # Calculate the Manhattan distance between data row D'i and each of the lists c1, c2, ... , cK. cK
 
 -             dist = get_distance(D[i], c_list[0])
 
 -             s_i = 0
 
 -             for index, c_i in enumerate(c_list):
 
 -                 if get_distance(D[i], c_i) < dist:
 
 -                     dist = get_distance(D[i], c_i)
 
 -                     s_i = index
 
 -             # Assign the row D'i to the cluster of the nearest c
 
 -             S_temp.append(s_i)
 
 -         #  If the previous step does not change S, stop
 
 -         if S_temp == S:
 
 -             break 
 
 -         else:
 
 -             S = deepcopy(S_temp)
 
 -         c_list = get_centroids(D, S, K)
 
 -     return S
 
  
 
- def run_test():
 
 -     # load data file
 
 -     fileName = 'Data(1).csv'
 
 -     m = load_from_csv(fileName)
 
 -     # get standardised matrix
 
 -     D = get_standardised_matrix(m)
 
 -     res = []
 
 -     # run experiments with K = 2, 3, 4, 5, 6.
 
 -     for K in range(2,7):
 
 -         res.append(get_groups(D, K))
 
 -     # print the result
 
 -     for K in range(2, 7):
 
 -         print("="*100)
 
 -         print("K = " + str(K) + ":")
 
 -         print(res[K-2])
 
 -         # show how many entities (wines) have been assigned to each group
 
 -         for key, group in groupby(sorted(res[K-2])):
 
 -             print("group " + str(key) + ": " + str(len(list(group))))
 
 -                 
 
 - if __name__ == "__main__":
 
 -     # test::load_from_csv(fileName)
 
 -     # fileName = 'Data(1).csv'
 
 -     # m = load_from_csv(fileName)
 
  
-     # # test::get_distance(l1, l2)
 
 -     # l1 = [1,2,3]
 
 -     # l2 = [4,5,6]
 
 -     # print(get_distance(l1, l2))
 
  
-     # c = 1
 
 -     # # test::get_max(m, c)
 
 -     # maxValue = get_max(m, c)
 
 -     # print(maxValue)
 
  
-     # # test::get_min(m, c)
 
 -     # minValue = get_min(m, c)
 
 -     # print(minValue)
 
 -     
 
 -     # # test::get_mean(m, c)
 
 -     # meanValue = get_mean(m, c)
 
 -     # print(meanValue)
 
  
-     # # test::get_standardised_matrix(m)
 
 -     # D = get_standardised_matrix(m)
 
 -     # print(D)
 
  
-     # # test::get_median(m, c)
 
 -     # print(get_median(m, c))
 
  
-     # K = 2
 
 -     # S = [0, 0, 0, 1, 1, 1]
 
 -     # # test::get_centroids(m, S, K)
 
 -     # c_k = get_centroids(m, S, K)
 
 -     # print(c_k)
 
  
-     # # test::get_groups(D, K)
 
 -     # S = get_groups(m, K)
 
 -     # print(S)
 
  
-     # test::run_test()
 
 -     run_test()
 
  复制代码 
 
 
 |   
 
 
 
 |