自定義函數
from openpyxl import load_workbook
import pandas as pd
def extract_xlsx_onesheet(wb, sheet_name, header=False):
ws = wb[sheet_name]
if header:
data = ws.values
columns = next(data)[0:]
df = pd.DataFrame(data, columns=columns)
else:
df = pd.DataFrame(ws.values)
return df
讀取excel文件:
path = 'C:/Users/DELL/Desktop/work/CloudStation/scaffold/CMGC_group.xlsx'
wb = oad_workbook(path)
查看有哪些sheet:
In [12]: wb.sheetnames
Out[12]: ['CDK', 'CDKL', 'CK2', 'CLK', 'DYRK', 'GSK', 'MAPK', 'RCK', 'SRPK']
選擇一個讀取成數據框:
df = extract_xlsx_onesheet(wb, 'CLK', header=True)
有列名就header=True, 否則不填即可。
In [12]: df
Out[14]:
chembl_id_mol canonical_smiles
0 CHEMBL485053 CN1C(=N\C(=C/c2ccc3OCOc3c2)\C1=O)N
1 CHEMBL1803085 Oc1ccc(NC2=N\C(=C/c3ccc4OCOc4c3)\C(=O)N2)cc1
2 CHEMBL3728359 Nc1ccc(Cl)c(NC2=NC(=O)\C(=C\c3ccc4ncccc4c3)\S2)c1
3 CHEMBL2321962 O\N=C/1\C(=C/2\C(=O)Nc3c(Br)cccc23)\Nc4cc(ccc1...
4 CHEMBL2321952 O\N=C/1\C(=C/2\C(=O)Nc3ccccc23)\Nc4ccc(cc14)C(...
.. ... ...
101 CHEMBL1802856 CN1C(=N\C(=C/c2ccc3OCOc3c2)\C1=O)NC(=O)C
102 CHEMBL3318024 CCNC(=O)Nc1ccc2ncc(cc2n1)c3ccc(OC)c(OC)c3
103 CHEMBL2012570 Cc1nccc2c3cc4OCOc4cc3n(CCCCN)c12
104 CHEMBL2062565 CC(=O)Nc1cnc2ccn(c3cc(NC4CC4)n5ncc(C#N)c5n3)c2c1
105 CHEMBL1230165 OC(=O)c1ccc2c(c1)nc(Nc3cccc(Cl)c3)c4ccncc24