import csv import pandas as pd # read an Excel file into a Pandas dataframe xls = pd.read_excel('description.xlsx', sheet_name='likely_similar', engine='openpyxl') xls.iloc[:, 1:] # select all rows and all columns starting from the second column # get the names of the source databases from the dataframe source_databases = xls.iloc[:, 1:].columns # create a dictionary to hold the similarity data similarity_dict = {} # loop through each source database and get the list of similar databases for source_database in source_databases: series = xls.loc[:, source_database] similar_databases = series[series != False].values.tolist() similarity_dict[source_database] = similar_databases # find the length of the longest list max_len = max(len(v) for v in similarity_dict.values()) # pad the shorter lists with NaNs to make them the same length as the longest list for k, v in similarity_dict.items(): if len(v) < max_len: similarity_dict[k] = v + [float('nan')] * (max_len - len(v)) # convert the dictionary to a Pandas dataframe and transpose it df = pd.DataFrame.from_dict(similarity_dict) df = df.transpose() # write the dataframe to a CSV file df.to_csv('similarity_dict.csv', index=False)