訓(xùn)練日志刷屏使我痛苦,我開發(fā)了VLog
訓(xùn)練日志刷屏使我痛苦,我開發(fā)了VLog,可以在任意訓(xùn)練代碼中輕松使用~
例如,通過callback嵌入到lightgbm/catboost/transformers/ultralytics,乃至keras庫的訓(xùn)練代碼流程中~
before:

after:

為什么不用tensorboard或者wandb?
tensorboard需要開端口權(quán)限,服務(wù)器開發(fā)環(huán)境有時(shí)候沒有端口權(quán)限~
wandb需要聯(lián)網(wǎng),有時(shí)候網(wǎng)速很差或者沒有網(wǎng),影響體驗(yàn)~
綜合對比考慮如下表

一,VLog基本原理
VLog類主要有以下5個(gè)方法。
from torchkeras import VLog
#1, 初始化方法
vlog = VLog(epochs=20, monitor_metric='val_loss', monitor_mode='min')
#2, 顯示開始空圖表
vlog.log_start()
#3, 更新step級別日志
vlog.log_step({'train_loss':0.003,'val_loss':0.002})
#4, 更新epoch級別日志
vlog.log_epoch({'train_acc':0.9,'val_acc':0.87,'train_loss':0.002,'val_loss':0.03})
#5, 輸出最終穩(wěn)定狀態(tài)圖表
vlog.log_end()
import time
import math,random
from torchkeras import VLog
epochs = 10
batchs = 30
#0, 指定監(jiān)控北極星指標(biāo),以及指標(biāo)優(yōu)化方向
vlog = VLog(epochs, monitor_metric='val_loss', monitor_mode='min')
#1, log_start 初始化動(dòng)態(tài)圖表
vlog.log_start()
for epoch in range(epochs):
#train
for step in range(batchs):
#2, log_step 更新step級別日志信息,打日志,并用小進(jìn)度條顯示進(jìn)度
vlog.log_step({'train_loss':100-2.5*epoch+math.sin(2*step/batchs)})
time.sleep(0.05)
#eval
for step in range(20):
#3, log_step 更新step級別日志信息,指定training=False說明在驗(yàn)證模式,只打日志不更新小進(jìn)度條
vlog.log_step({'val_loss':100-2*epoch+math.sin(2*step/batchs)},training=False)
time.sleep(0.05)
#4, log_epoch 更新epoch級別日志信息,每個(gè)epoch刷新一次動(dòng)態(tài)圖表和大進(jìn)度條進(jìn)度
vlog.log_epoch({'val_loss':100 - 2*epoch+2*random.random()-1,
'train_loss':100-2.5*epoch+2*random.random()-1})
# 5, log_end 調(diào)整坐標(biāo)軸范圍,輸出最終指標(biāo)可視化圖表
vlog.log_end()

二,在LightGBM中使用VLog
設(shè)計(jì)一個(gè)簡單的回調(diào),就可以搞定~
from torchkeras import VLog
class VLogCallback:
def __init__(self, num_boost_round,
monitor_metric='val_loss',
monitor_mode='min'):
self.order = 20
self.num_boost_round = num_boost_round
self.vlog = VLog(epochs = num_boost_round, monitor_metric = monitor_metric,
monitor_mode = monitor_mode)
def __call__(self, env) -> None:
metrics = {}
for item in env.evaluation_result_list:
if len(item) == 4:
data_name, eval_name, result = item[:3]
metrics[data_name+'_'+eval_name] = result
else:
data_name, eval_name = item[1].split()
res_mean = item[2]
res_stdv = item[4]
metrics[data_name+'_'+eval_name] = res_mean
self.vlog.log_epoch(metrics)
import datetime
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
def printlog(info):
nowtime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
print("\n"+"=========="*8 + "%s"%nowtime)
print(info+'...\n\n')
#================================================================================
# 一,讀取數(shù)據(jù)
#================================================================================
printlog("step1: reading data...")
# 讀取dftrain,dftest
breast = datasets.load_breast_cancer()
df = pd.DataFrame(breast.data,columns = [x.replace(' ','_') for x in breast.feature_names])
df['label'] = breast.target
df['mean_radius'] = df['mean_radius'].apply(lambda x:int(x))
df['mean_texture'] = df['mean_texture'].apply(lambda x:int(x))
dftrain,dftest = train_test_split(df)
categorical_features = ['mean_radius','mean_texture']
lgb_train = lgb.Dataset(dftrain.drop(['label'],axis = 1),label=dftrain['label'],
categorical_feature = categorical_features)
lgb_valid = lgb.Dataset(dftest.drop(['label'],axis = 1),label=dftest['label'],
categorical_feature = categorical_features,
reference=lgb_train)
#================================================================================
# 二,設(shè)置參數(shù)
#================================================================================
printlog("step2: setting parameters...")
boost_round = 50
early_stop_rounds = 10
params = {
'boosting_type': 'gbdt',
'objective':'binary',
'metric': ['auc'], #'l2'
'num_leaves': 15,
'learning_rate': 0.05,
'feature_fraction': 0.9,
'bagging_fraction': 0.8,
'bagging_freq': 5,
'verbose': 0,
'early_stopping_round':5
}
#================================================================================
# 三,訓(xùn)練模型
#================================================================================
printlog("step3: training model...")
result = {}
vlog_cb = VLogCallback(boost_round, monitor_metric = 'val_auc', monitor_mode = 'max')
vlog_cb.vlog.log_start()
gbm = lgb.train(params,
lgb_train,
num_boost_round= boost_round,
valid_sets=(lgb_valid, lgb_train),
valid_names=('val','train'),
callbacks = [lgb.record_evaluation(result),
vlog_cb]
)
vlog_cb.vlog.log_end()
#================================================================================
# 四,評估模型
#================================================================================
printlog("step4: evaluating model ...")
y_pred_train = gbm.predict(dftrain.drop('label',axis = 1), num_iteration=gbm.best_iteration)
y_pred_test = gbm.predict(dftest.drop('label',axis = 1), num_iteration=gbm.best_iteration)
print('train accuracy: {:.5} '.format(accuracy_score(dftrain['label'],y_pred_train>0.5)))
print('valid accuracy: {:.5} \n'.format(accuracy_score(dftest['label'],y_pred_test>0.5)))
#================================================================================
# 五,保存模型
#================================================================================
printlog("step5: saving model ...")
model_dir = "gbm.model"
print("model_dir: %s"%model_dir)
gbm.save_model("gbm.model")
printlog("task end...")
###
##
#
================================================================================2023-11-10 15:39:38
step1: reading data......
================================================================================2023-11-10 15:39:38
step2: setting parameters......
================================================================================2023-11-10 15:39:38
step3: training model......

================================================================================2023-11-10 15:39:44
step4: evaluating model ......
train accuracy: 0.95775
valid accuracy: 0.94406
================================================================================2023-11-10 15:39:44
step5: saving model ......
model_dir: gbm.model
================================================================================2023-11-10 15:39:44
task end......
三, 在ultralytics中使用VLog
寫個(gè)適配的回調(diào)~
ultralytics可以做 分類,檢測,分割 等等。
這個(gè)回調(diào)函數(shù)是通用的,此處以分類問題為例,改個(gè)monitor_metric即可~
cats_vs_dogs數(shù)據(jù)集可以在公眾號算法美食屋后臺(tái)回復(fù):torchkeras 獲取~
from torchkeras import VLog
class VLogCallback:
def __init__(self,epochs,monitor_metric,monitor_mode):
self.vlog = VLog(epochs,monitor_metric,monitor_mode)
def on_train_batch_end(self,trainer):
self.vlog.log_step(trainer.label_loss_items(trainer.tloss, prefix='train'))
def on_fit_epoch_end(self,trainer):
metrics = {k.split('/')[-1]:v for k,v in trainer.metrics.items() if 'loss' not in k}
self.vlog.log_epoch(metrics)
def on_train_epoch_end(self,trainer):
pass
from ultralytics import YOLO
epochs = 10
vlog_cb = VLogCallback(epochs = epochs,
monitor_metric='accuracy_top1',
monitor_mode='max')
callbacks = {
"on_train_batch_end": vlog_cb.on_train_batch_end,
"on_fit_epoch_end": vlog_cb.on_fit_epoch_end
}
model = YOLO(model = 'yolov8n-cls.pt')
for event,func in callbacks.items():
model.add_callback(event,func)
vlog_cb.vlog.log_start()
results = model.train(data='cats_vs_dogs',
epochs=epochs, workers=4) # train the model
vlog_cb.vlog.log_end()

四, 在transformers中使用VLog
waimai評論數(shù)據(jù)集可以在公眾號算法美食屋后臺(tái)回復(fù):torchkeras 獲取~
#回調(diào)給你寫好了~
from torchkeras.tools.transformers import VLogCallback
import numpy as np
import pandas as pd
import torch
import datasets
from transformers import AutoTokenizer,DataCollatorWithPadding
from transformers import AutoModelForSequenceClassification
from transformers import TrainingArguments,Trainer
from transformers import EarlyStoppingCallback
from tqdm import tqdm
from transformers import AdamW, get_scheduler
#一,準(zhǔn)備數(shù)據(jù)
df = pd.read_csv("waimai_10k.csv")
ds = datasets.Dataset.from_pandas(df)
ds = ds.shuffle(42)
ds = ds.rename_columns({"review":"text","label":"labels"})
tokenizer = AutoTokenizer.from_pretrained('bert-base-chinese')
ds_encoded = ds.map(lambda example:tokenizer(example["text"]),
remove_columns = ["text"],
batched=True)
#train,val,test split
ds_train_val,ds_test = ds_encoded.train_test_split(test_size=0.2).values()
ds_train,ds_val = ds_train_val.train_test_split(test_size=0.2).values()
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
dl_train = torch.utils.data.DataLoader(ds_train, batch_size=16, collate_fn = data_collator)
dl_val = torch.utils.data.DataLoader(ds_val, batch_size=16, collate_fn = data_collator)
dl_test = torch.utils.data.DataLoader(ds_test, batch_size=16, collate_fn = data_collator)
for batch in dl_train:
break
print({k: v.shape for k, v in batch.items()})
#二,定義模型
model = AutoModelForSequenceClassification.from_pretrained(
'bert-base-chinese',num_labels=2)
#三,訓(xùn)練模型
def compute_metrics(eval_preds):
logits, labels = eval_preds
preds = np.argmax(logits, axis=-1)
accuracy = np.sum(preds==labels)/len(labels)
precision = np.sum((preds==1)&(labels==1))/np.sum(preds==1)
recall = np.sum((preds==1)&(labels==1))/np.sum(labels==1)
f1 = 2*recall*precision/(recall+precision)
return {"accuracy":accuracy,"precision":precision,"recall":recall,'f1':f1}
training_args = TrainingArguments(
output_dir = "bert_waimai",
num_train_epochs = 3,
logging_steps = 20,
gradient_accumulation_steps = 10,
evaluation_strategy="steps", #epoch
metric_for_best_model='eval_f1',
greater_is_better=True,
report_to='none',
load_best_model_at_end=True
)
callbacks = [EarlyStoppingCallback(early_stopping_patience=10),
VLogCallback()] #監(jiān)控指標(biāo)同 metric_for_best_model
trainer = Trainer(
model,
training_args,
train_dataset=ds_train,
eval_dataset=ds_val,
compute_metrics=compute_metrics,
callbacks = callbacks,
data_collator=data_collator,
tokenizer=tokenizer,
)
trainer.train()
#四,評估模型
trainer.evaluate(ds_val)
#五,使用模型
from transformers import pipeline
model.config.id2label = {0:"差評",1:"好評"}
classifier = pipeline(task="text-classification",tokenizer = tokenizer,model=model.cpu())
classifier("挺好吃的哦")
#六,保存模型
model.save_pretrained("waimai_10k_bert")
tokenizer.save_pretrained("waimai_10k_bert")
classifier = pipeline("text-classification",model="waimai_10k_bert")
classifier(["味道還不錯(cuò),下次再來","我去,吃了我吐了三天"])

公眾號算法美食屋后臺(tái)回復(fù)關(guān)鍵詞:torchkeras,獲取本文notebook代碼和數(shù)據(jù)集下載鏈接~
萬水千山總是情,點(diǎn)個(gè)贊贊行不行~??
評論
圖片
表情
