Radi_tech’s blog

Radiological technologist in Japan / MRI / AI / Deep learning / MATLAB / R / Python

【MATLAB】DICOMデータをCSVへ一括変換

DICOMデータをCSVへ一括変換する。
MRIのT1 mapなどの処理に有用。


% dcm 2 csv

main_fd = "DICOMデータが入っているフォルダを指定";

imgs = dir(main_fd);
imgs = imgs(3:end);

csv_fd = "CSVファイルを保存するフォルダを指定";

for x = 1:length(imgs)

    img_path = fullfile(main_fd, imgs(x).name);

    img = dicomread(img_path);

    nm = split(imgs(x).name,".");

    csv_path = fullfile(csv_fd,strcat(nm{1},".csv"));

    csvwrite(csv_path,img)


end

【Python】Pytorchで推論をして,ファイル名,Label名,Predictionの結果をcsv出力する.

PytorchでTraining済みのmodelを用いて推論を行う。
結果をtest dataのファイル名と一緒にcsv出力する。

まずは、下準備

# module import
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision

from torch.utils.data import Dataset, DataLoader,TensorDataset,random_split,SubsetRandomSampler, ConcatDataset
from torch.nn import functional as F

from torchvision import datasets,transforms
import torchvision.transforms as transforms

from sklearn.metrics import classification_report


# device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

ws = "ワークスペースフォルダを指定"
os.chdir(ws)

# test data フォルダ
test_data_dir = 'test image を入れているフォルダ'


Data loaderの準備

# data_transforms
data_transforms = {
        torchvision.transforms.Compose([
        #torchvision.transforms.RandomResizedCrop(224),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

test_dataset = torchvision.datasets.ImageFolder(test_data_dir,  transform=data_transforms)


# なぜか、これを経ないと走らない。。。たぶん違う書き方がある
transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor()
])

test_dataset.transform=transform
#%%
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=16)


modelの準備

model_path = "modelのpath"
model = torch.load(model_path)
model.eval()
model.to("device")   #cpuでの高速で推論は可能


ファイル名を抽出する。

# data_transforms
data_transforms = {
        torchvision.transforms.Compose([
        #torchvision.transforms.RandomResizedCrop(224),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

test_dataset = torchvision.datasets.ImageFolder(test_data_dir,  transform=data_transforms)


# なぜか、これを経ないと走らない。。。たぶん違う書き方がある
transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor()
])

test_dataset.transform=transform
#%%
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=16)


modelの準備

fn= []
# get file name into list
test_data_fns = test_loader.dataset.samples
fn += [l[0] for l in test_data_fns]


推論の実行

pred = []
Y = []

for i, (x,y) in enumerate(test_loader):
    with torch.no_grad():
        output = model(x)
    pred += [int(l.argmax()) for l in output]
    Y += [int(l) for l in y]
    
print(classification_report(Y, pred))


dataframeにて、concatで結合してcsv出力

import pandas as pd

#dataframe
df_fn    = pd.Series(fn)
df_pred  = pd.Series(pred)
df_label = pd.Series(Y)


#dataframeを縦に結合
tmp_df = pd.concat([df_fn,df_pred,df_label],axis=1)
#dataframeをcolumnの名前を編集
tmp_df = tmp_df.rename(columns={0:'File name',1:'Prediction',2:'Label',3:'IC'})
#csvを出力
tmp_df.to_csv('Result_summary.csv', header=True, index=True)

【Python】Pytorch prediction 推論を行う


Pytorchで保存したmodelを使って、推論(prediction)を行う




radi-tech.hatenablog.com


radi-tech.hatenablog.com


# module import
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision

from torch.utils.data import Dataset, DataLoader,TensorDataset,random_split,SubsetRandomSampler, ConcatDataset
from torch.nn import functional as F

from torchvision import datasets,transforms
import torchvision.transforms as transforms

from sklearn.metrics import classification_report


# device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# working spaceへの移動
ws = "working spaceのフォルダ"
os.chdir(ws)


# test image を入れているフォルダ
test_data_dir = 'test image を入れているフォルダ'


# data_transforms
data_transforms = {
        torchvision.transforms.Compose([
        #torchvision.transforms.RandomResizedCrop(224),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

test_dataset = torchvision.datasets.ImageFolder(test_data_dir,  transform=data_transforms)


# なぜか、これを経ないと走らない。。。たぶん違う書き方がある
transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor()
])

test_dataset.transform=transform


# DataLoaderへ格納する
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=16)


# modelのLoad
model_path = "modelのpath"
model = torch.load(model_path)
model.eval()
model.to("cpu")

# prediction
pred = []
Y = []


for i, (x,y) in enumerate(test_loader):
    with torch.no_grad():
        output = model(x)
    pred += [int(l.argmax()) for l in output]
    Y += [int(l) for l in y]

print(classification_report(Y, pred))



# ROC curve
import numpy as np
from sklearn.metrics import roc_curve, auc

y_pred = []
y_true = []


with torch.no_grad():
    for data, target in test_loader:
        output = model(data)
        pred = output.argmax(dim=1, keepdim=True)
        y_pred.extend(pred.cpu().numpy())
        y_true.extend(target.cpu().numpy())


# True positive ratio & False positive ratio
fpr, tpr, thresholds = roc_curve(y_true, y_pred)
roc_auc = auc(fpr, tpr)


# plot
import matplotlib.pyplot as plt

fig = plt.figure()
ax = fig.add_subplot(111)

plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (AUC = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
ax.set_aspect('equal')

plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic example')
plt.legend(loc="lower right")
plt.savefig('test_ROC.png', format="png", dpi=300)
plt.show()


# confusion matrix
import pandas as pd
import seaborn as sns
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_true, y_pred)
cm = pd.DataFrame(data=cm, index=["Fracture", "Noramal"], 
                           columns=["Fracture", "Noramal"])

sns.heatmap(cm, square=True, cbar=True, annot=True, cmap='Blues')

plt.xlabel("Predction label", fontsize=10, fontstyle='italic')
plt.ylabel("Correct label", fontsize=10, fontstyle='italic')
# save as png wtih 300dpi resolution
plt.savefig('test_confusion_matrix.png', format="png", dpi=300)


radi-tech.hatenablog.com

【Python】Pytorch k-fold cross validiation


Pytorchでk-fold cross validiationを行う

radi-tech.hatenablog.com

# moduleのimport  いらないのもあるかも。。。
import os
import random
import numpy as np


import matplotlib.pyplot as plt

from sklearn.model_selection import KFold


import torch
import torch.nn as nn
import torch.optim as optim
import torchvision

from torch.utils.data import Dataset, DataLoader,TensorDataset,random_split,SubsetRandomSampler, ConcatDataset
from torch.nn import functional as F

from torchvision import datasets,transforms
import torchvision.transforms as transforms


# working spaceへの移動
ws = "work spaceのファルダを書く"
os.chdir(ws)


# 画像を格納しているフォルダを指定
train_data_dir = 'trainのファルダパス'
test_data_dir  = 'validiationのフォルダパス'


# data_transformを定義
data_transforms = {
        torchvision.transforms.Compose([
        #torchvision.transforms.RandomResizedCrop(224),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}


# データセットの作成
train_dataset= torchvision.datasets.ImageFolder(train_data_dir, transform=data_transforms)
test_dataset = torchvision.datasets.ImageFolder(test_data_dir,  transform=data_transforms)

transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor()
])

train_dataset.transform=transform
test_dataset.transform=transform

dataset = ConcatDataset([train_dataset, test_dataset])


# データ数、class数の獲得
m=len(train_dataset)
class_names = train_dataset.classes


# deviceの定義、hyper parameter
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

torch.manual_seed(42)

num_epochs=10
batch_size=16
k=5
splits=KFold(n_splits=k,shuffle=True,random_state=42)
foldperf={}


# modelの定義と設定
model = torchvision.models.resnet50(pretrained=True)

# replace fully connect layer
model.fc = nn.Linear(model.fc.in_features, len(class_names))
model = model.to(device)

# loss function & hyper parameter
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)


# fuction for trainning & validiation
def train_epoch(model,device,dataloader,loss_fn,optimizer):
    train_loss,train_correct=0.0,0
    model.train()
    for images, labels in dataloader:

        images,labels = images.to(device),labels.to(device)
        optimizer.zero_grad()
        output = model(images)
        loss = loss_fn(output,labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * images.size(0)
        scores, predictions = torch.max(output.data, 1)
        train_correct += (predictions == labels).sum().item()

    return train_loss,train_correct

def valid_epoch(model,device,dataloader,loss_fn):
   valid_loss, val_correct = 0.0, 0
   model.eval()
   with torch.no_grad():
     for images, labels in dataloader:

         images,labels = images.to(device),labels.to(device)
         output = model(images)
         loss=loss_fn(output,labels)
         valid_loss+=loss.item()*images.size(0)
         scores, predictions = torch.max(output.data,1)
         val_correct+=(predictions == labels).sum().item()

   return valid_loss,val_correct


# trainning
history = {'train_loss': [], 'test_loss': [],'train_acc':[],'test_acc':[]}

for fold, (train_idx,val_idx) in enumerate(splits.split(np.arange(len(dataset)))):

    print('Fold {}'.format(fold + 1))

    train_sampler = SubsetRandomSampler(train_idx)
    test_sampler = SubsetRandomSampler(val_idx)
    train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
    test_loader = DataLoader(dataset, batch_size=batch_size, sampler=test_sampler)


    for epoch in range(num_epochs):
        train_loss, train_correct=train_epoch(model,device,train_loader,criterion,optimizer)
        test_loss, test_correct=valid_epoch(model,device,test_loader,criterion)

        train_loss = train_loss / len(train_loader.sampler)
        train_acc = train_correct / len(train_loader.sampler) * 100
        test_loss = test_loss / len(test_loader.sampler)
        test_acc = test_correct / len(test_loader.sampler) * 100

        print("Epoch:{}/{} AVG Training Loss:{:.3f} AVG Test Loss:{:.3f} AVG Training Acc {:.2f} % AVG Test Acc {:.2f} %".format(epoch + 1,
                                                                                                             num_epochs,
                                                                                                             train_loss,
                                                                                                             test_loss,
                                                                                                             train_acc,
                                                                                                             test_acc))
        history['train_loss'].append(train_loss)
        history['test_loss'].append(test_loss)
        history['train_acc'].append(train_acc)
        history['test_acc'].append(test_acc) 
        
    
    sv_path = f"{fold + 1:0=3}" + "_model.pth"

    torch.save(model, sv_path)

【Python】Pytorch 自前画像で転移学習 ResNet50



参考にさせていただいたページ
【PyTorch】torchvision.modelsでResNetやEfficientNetの読み込みと分類クラス数の変更、ファインチューニングへの活用 | 日々、学ぶ


全コード

# moduleのimport
import os
import torch
import torch.nn as nn
import torchvision
import matplotlib.pyplot as plt

# work spaceフォルダを指定
ws = ”work spaceフォルダのpath”
os.chdir(ws)

# data_transformの作成
data_transforms = {
    'train': torchvision.transforms.Compose([
        #torchvision.transforms.RandomResizedCrop(224),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'valid': torchvision.transforms.Compose([
        #torchvision.transforms.Resize(256),
        #torchvision.transforms.CenterCrop(224),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}


# 自前データのフォルダの指定
train_data_dir = trainパスを指定
valid_data_dir = validiationパスを指定


# datasetの作成とサイズの確認
image_datasets = {
  'train': torchvision.datasets.ImageFolder(train_data_dir, transform=data_transforms['train']),
  'valid': torchvision.datasets.ImageFolder(valid_data_dir, transform=data_transforms['valid'])
}

dataloaders = {
  'train': torch.utils.data.DataLoader(image_datasets['train'], batch_size=16, shuffle=True),
  'valid': torch.utils.data.DataLoader(image_datasets['valid'], batch_size=16)
}

dataset_sizes = {
    'train': len(image_datasets['train']),
    'valid': len(image_datasets['valid'])
}


#class数の獲得
class_names = image_datasets['train'].classes


# deviceの定義
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')


# CNNの定義を出力層クラス数の書き換え (出力層の出力数を ImageNet の 1000 からこのデータセットのクラス数である 2 に置き換える。)
net_ft1 = torchvision.models.resnet50(pretrained=True)
net_ft1.fc = nn.Linear(net_ft1.fc.in_features, len(class_names))
net_ft1 = net_ft1.to(device)


# loss function & parameterの定義
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net_ft1.parameters(), lr=0.001, momentum=0.9)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)


# Training

num_epochs = 20
acc_history_ft1 = {'train': [], 'valid': []}

for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch, num_epochs - 1))
    print('-' * 10)

    for phase in ['train', 'valid']:
        if phase == 'train':
            net_ft1.train()
        else:
            net_ft1.eval()

        running_loss = 0.0
        running_corrects = 0

        for inputs, labels in dataloaders[phase]:
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                outputs = net_ft1(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
        if phase == 'train':
            scheduler.step()

        epoch_loss = running_loss / dataset_sizes[phase]
        epoch_acc = running_corrects.double() / dataset_sizes[phase]
        acc_history_ft1[phase].append(epoch_acc)
        print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
        
        # model save
        torch.save(net_ft1, 'resnet_test.pth')


# plot training process
train_value = torch.tensor(acc_history_ft1['train'], device = 'cpu')
valid_value = torch.tensor(acc_history_ft1['valid'], device = 'cpu')

fig = plt.figure(figsize=(4,4),dpi=200)
ax = fig.add_subplot()
ax.plot(train_value, label='train')
ax.plot(valid_value, label='valid')
ax.legend()
ax.set_ylim(0, 1)
fig.show()





【Python】MRI dicomデータを各pixcelの信号強度のcsvファイルに一括変換

MRI dicomデータを各pixcelの信号強度のcsvファイルに一括変換する

必要なmoduleはpydicom
事前にpip install pydicomでinstall しておく


ws = 'ワークスペース用フォルダ'
os.chdir(ws)

import os
import glob 
import pydicom
import numpy as np
import pandas as pd


# path of dicom folder 
dcm_fd = "フォルダpathを書く"
# create file list of dicom images
dcm_fls = glob.glob(dcm_fd)

# create folder to save csv file
os.mkdir("sv_dcm2csv")


for tmp_fl in dcm_fls:
  tmp_dcm = pydicom.read_file(tmp_fl)
  

  # get rescale factor for PHILIPS image
  rscale_slop = tmp_dcm.RescaleSlope

  # get rescale factor for PHILIPS image
  sacele_factor = tmp_dcm[0x2005100e]
  sacele_factor = sacele_factor.value

 # PHILIPS image
  img = tmp_dcm.pixel_array / rscale_slop /  sacele_factor


  # create file name
  tmp_file_name = os.path.splitext(tmp_fl)
  tmp_file_name = tmp_file_name[0].split("/")
  file_name = tmp_file_name[5]

  save_file_name = "ワークスペースフォルダ" + "/" + file_name + ".csv"

  # conver to float type & save using Pandas
  pd.DataFrame(img).to_csv(save_file_name, index=False, header=False, float_format='%11.6f')

【Python】重複データさけて、固有値だけを得る(患者IDなどを想定)

Excelで作成したデータなどで、固有値のみを得る方法
PythonでPandasで処理すると簡単

今回は、”患者ID”を想定

import os
import pandas as pd
import numpy as np


ws = "work space用のフォルダ名"
os.chdir(ws)

csv_path = csvファイルのpath
df = pd.read_csv(csv_path)

#患者IDを抽出
df_ID = df["患者ID"]

#重複の確認関数 duplicated
dup_df = df_ID.duplicated()

#否定構文で 重複していないもの を抽出
org_ID_df =  df_ID[~df_ID.duplicated()]

#csv output
org_ID_df.to_csv('original_ID.csv')

あとは元データと照合してvlookupなおで、datasetを作っていく