Cham's Blog Algorithm, skill and thinking

Python 常用文件读取与存储方式

2020-09-22

Python 常用文件读取与存储方式,包括 mat,npz,csv,xlsx,mkl,pkl 等格式。

Cham’s Blog 首发原创

mat 格式

# 读取matlab的文件
from scipy.io import loadmat
def load_data(path_to_file):
    annots = loadmat(path_to_file)
    data = annots['x']
    labels = annots['y'].flatten()
    labels -= 1
    return data, labels
    
# 保存matlab的文件
from scipy.io import savemat
savemat('D://data.mat', {'x':data})

npz 格式

# 读取文件
import numpy as np
S4b_train = np.load('./data/S4b_train.npz')

# 保存文件
import numpy as np
np.save("data.npz", x, y)

csv 格式

# 读取文件
import csv
import numpy as np
with open('./data/train.csv','r') as csvfile:
    reader = csv.reader(csvfile)
    rows= [row for row in reader]
data=np.array(rows)

# 保存文件
import pandas as pd
a,b = [1,2,3], [4,5,6]    
dataframe = pd.DataFrame({'a_name':a, 'b_name':b})  # 字典中的key值即为csv中列名
dataframe.to_csv("test.csv", index=False, sep=',')

xlsx 格式

# 读取文件
import pandas as pd
df = pd.read_excel('./data/aa.xlsx', encoding='gbk')  # usecols =[0, 5] 指定列
[num, item] = df.values.shape

# 保存文件
import pandas as pd
data = pd.DataFrame([['a', 'b'], ['c', 'd']], index=['row 1', 'row 2'],
...                    columns=['col 1', 'col 2'])
data.to_excel('./data/bb.xlsx', index=False)

mkl 格式

import torch as tr

# 读取模型文件
model_path = './models/trained_model.pkl'
model = MLP(n_feature=100, n_hidden=16, n_output=2)
model.load_state_dict(tr.load(model_path))

# 保存模型文件
model = MLP(n_feature=100, n_hidden=16, n_output=2)
tr.save(model.state_dict(), model_path)

pkl 格式

import joblib

# 模型训练
gbm = LGBMClassifier(num_leaves=31, learning_rate=0.05, n_estimators=20)
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5)

# 模型存储
joblib.dump(gbm, 'demo_model.pkl')

# 模型加载
gbm = joblib.load('demo_model.pkl')


Similar Posts

Comments