使用pytorch实现神经网络
约 1889 字大约 6 分钟
2025-10-27
from PIL import Image
import torchimport os
def verify_img(image_folder):
classes = ['Cat','Dog']
class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)}
samples = []
for class_name in classes:
cls_dir = os.path.join(image_folder,class_name)
for fname in os.listdir(cls_dir):
if not fname.endswith((".jpg",".jpeg",".png")):
continue
path = os.path.join(cls_dir,fname)
try:
with Image.open(path) as img:
img.verify() # Verify that it is, in fact an image
samples.append((path,class_to_idx[class_name]))
except (IOError, SyntaxError) as e:
print('Bad file:', path)
return samplesfrom torchvision import datasets, transforms
from torch.utils.data import DataLoader,Dataset
class ImageDatas(Dataset):
def __init__(self,img_path,transform=None):
# 对于transform下文提及
super().__init__()
self.samples = verify_img(img_path)
self.transform = transform
def __len__(self):
return len(self.samples)
def __getitem__(self, index):
path,label = self.samples[index]
img = Image.open(path).convert('RGB')
if self.transform:
img = self.transform(img)
return img,labeldevice = torch.device('cuda' if torch.cuda.is_available() else 'cpu')from torch import nn
class CNNModel(nn.Module):
def __init__(self):
super().__init__()
self.model = nn.Sequential(
nn.Conv2d(in_channels=3,out_channels=16,kernel_size=3,stride=1,padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2,stride=2), # 注意:最大池化层只是改变宽高,不改变通道数,所以input_channels仍然是16
nn.Conv2d(in_channels=16,out_channels=32,kernel_size=3,stride=1,padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2,stride=2),
nn.Conv2d(in_channels=32,out_channels=64,kernel_size=3,stride=1,padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2,stride=2),
nn.Conv2d(in_channels=64,out_channels=128,kernel_size=3,stride=1,padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2,stride=2),
nn.Conv2d(in_channels=128,out_channels=1,kernel_size=3,stride=1,padding=1),
nn.AdaptiveAvgPool2d((1,1)),
nn.Flatten(),
nn.Sigmoid()
)
def forward(self,x):
return self.model(x)
def evaluate(model,test_loader:DataLoader):
model.eval()
val_correct = 0
val_total = 0
with torch.no_grad():
for imgs,labels in test_loader:
imgs = imgs.to(device)
labels = labels.to(device)
outputs = model(imgs)
predicted = (outputs.squeeze()>0.5).long()
val_total += labels.size(0)
val_correct += (predicted == labels).sum().item()
val_acc = val_correct / val_total
return val_accimport random
DATA_DIR = './data/PetImages'
BATCH_SIZE = 64
IMG_SIZE = 128
EPOCH = 12
LR = 0.001
PR_STEP = 100
all_sample = verify_img(DATA_DIR)
random.shuffle(all_sample)
train_size = int(0.8 * len(all_sample))
train_samples = all_sample[:train_size] # 训练集
val_samples = all_sample[train_size:] # 验证集
data_tranform = transforms.Compose([
transforms.Resize((IMG_SIZE,IMG_SIZE)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485,0.456,0.406],std=[0.229,0.224,0.225])
])
# 使用 DataLoader(修复:使用分割后的数据集)
train_dataset = SplitDataset(train_samples, transform=data_tranform)
val_dataset = SplitDataset(val_samples, transform=data_tranform)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
model = CNNModel().to(device)
criterion = nn.BCELoss() # 定义loss函数
optimizer = torch.optim.Adam(model.parameters(), lr=LR) # 优化器
# 训练循环(优化版本:减少验证评估频率)
for epoch in range(EPOCH):
print(f'Epoch [{epoch+1}/{EPOCH}]')
model.train()
running_loss = 0.0
for step, (input, label) in enumerate(train_loader):
input = input.to(device)
label = label.to(device).float()
optimizer.zero_grad()
output = model(input)
output = output.view(-1) # 保证输出为 (batch,) 形状
loss = criterion(output, label)
loss.backward()
optimizer.step()
running_loss += loss.item()
if (step+1) % PR_STEP == 0:
avg_loss = running_loss / PR_STEP
print(f'Step [{step+1}/{len(train_loader)}], Loss: {avg_loss:.4f}')
running_loss = 0.0
# 每个 epoch 结束后评估验证集(而不是每100步)
val_acc = evaluate(model, val_loader)
print(f'Epoch [{epoch+1}/{EPOCH}] Validation Accuracy: {val_acc*100:.2f}%')运行结果
Bad file: ./data/PetImages\Cat\666.jpg Bad file: ./data/PetImages\Dog\11702.jpg Bad file: ./data/PetImages\Dog\11702.jpg Epoch [1/12] Epoch [1/12] Step [100/313], Loss: 0.6857 Step [100/313], Loss: 0.6857 Step [200/313], Loss: 0.6464 Step [200/313], Loss: 0.6464 Step [300/313], Loss: 0.6356 Step [300/313], Loss: 0.6356 Epoch [1/12] Validation Accuracy: 69.16% Epoch [2/12] Epoch [1/12] Validation Accuracy: 69.16% Epoch [2/12] Step [100/313], Loss: 0.5838 Step [100/313], Loss: 0.5838 Step [200/313], Loss: 0.5636 Step [200/313], Loss: 0.5636 Step [300/313], Loss: 0.5562 Step [300/313], Loss: 0.5562 Epoch [2/12] Validation Accuracy: 74.16% Epoch [3/12] Epoch [2/12] Validation Accuracy: 74.16% Epoch [3/12] Step [100/313], Loss: 0.5200 Step [100/313], Loss: 0.5200 Step [200/313], Loss: 0.5252 Step [200/313], Loss: 0.5252 Step [300/313], Loss: 0.4968 Step [300/313], Loss: 0.4968 Epoch [3/12] Validation Accuracy: 70.04% Epoch [4/12] Epoch [3/12] Validation Accuracy: 70.04% Epoch [4/12] Step [100/313], Loss: 0.4926 Step [100/313], Loss: 0.4926 Step [200/313], Loss: 0.4576 Step [200/313], Loss: 0.4576 Step [300/313], Loss: 0.4580 Step [300/313], Loss: 0.4580 Epoch [4/12] Validation Accuracy: 77.28% Epoch [5/12] Epoch [4/12] Validation Accuracy: 77.28% Epoch [5/12] Step [100/313], Loss: 0.4336 Step [100/313], Loss: 0.4336 Step [200/313], Loss: 0.4144 Step [200/313], Loss: 0.4144 Step [300/313], Loss: 0.4123 Step [300/313], Loss: 0.4123 Epoch [5/12] Validation Accuracy: 83.66% Epoch [6/12] Epoch [5/12] Validation Accuracy: 83.66% Epoch [6/12] Step [100/313], Loss: 0.3849 Step [100/313], Loss: 0.3849 Step [200/313], Loss: 0.3951 Step [200/313], Loss: 0.3951 Step [300/313], Loss: 0.4082 Step [300/313], Loss: 0.4082 Epoch [6/12] Validation Accuracy: 83.06% Epoch [7/12] Epoch [6/12] Validation Accuracy: 83.06% Epoch [7/12] Step [100/313], Loss: 0.3500 Step [100/313], Loss: 0.3500 Step [200/313], Loss: 0.3484 Step [200/313], Loss: 0.3484 Step [300/313], Loss: 0.3400 Step [300/313], Loss: 0.3400 Epoch [7/12] Validation Accuracy: 84.78% Epoch [8/12] Epoch [7/12] Validation Accuracy: 84.78% Epoch [8/12] Step [100/313], Loss: 0.3104 Step [100/313], Loss: 0.3104 Step [200/313], Loss: 0.3200 Step [200/313], Loss: 0.3200 Step [300/313], Loss: 0.2951 Step [300/313], Loss: 0.2951 Epoch [8/12] Validation Accuracy: 81.32% Epoch [9/12] Epoch [8/12] Validation Accuracy: 81.32% Epoch [9/12] Step [100/313], Loss: 0.2806 Step [100/313], Loss: 0.2806 Step [200/313], Loss: 0.2850 Step [200/313], Loss: 0.2850 Step [300/313], Loss: 0.2886 Step [300/313], Loss: 0.2886 Epoch [9/12] Validation Accuracy: 87.32% Epoch [10/12] Epoch [9/12] Validation Accuracy: 87.32% Epoch [10/12] Step [100/313], Loss: 0.2707 Step [100/313], Loss: 0.2707 Step [200/313], Loss: 0.2567 Step [200/313], Loss: 0.2567 Step [300/313], Loss: 0.2615 Step [300/313], Loss: 0.2615 Epoch [10/12] Validation Accuracy: 88.48% Epoch [11/12] Epoch [10/12] Validation Accuracy: 88.48% Epoch [11/12] Step [100/313], Loss: 0.2326 Step [100/313], Loss: 0.2326 Step [200/313], Loss: 0.2722 Step [200/313], Loss: 0.2722 Step [300/313], Loss: 0.2569 Step [300/313], Loss: 0.2569 Epoch [11/12] Validation Accuracy: 88.00% Epoch [12/12] Epoch [11/12] Validation Accuracy: 88.00% Epoch [12/12] Step [100/313], Loss: 0.2105 Step [100/313], Loss: 0.2105 Step [200/313], Loss: 0.2306 Step [200/313], Loss: 0.2306 Step [300/313], Loss: 0.2212 Step [300/313], Loss: 0.2212 Epoch [12/12] Validation Accuracy: 88.52% Epoch [12/12] Validation Accuracy: 88.52%
# 修复数据集分割问题
# 原来的代码有bug:train_dataset 和 val_dataset 都用 DATA_DIR,导致训练和验证用相同数据
# 应该用分割后的 samples 创建不同的 Dataset
class SplitDataset(Dataset):
def __init__(self, samples, transform=None):
self.samples = samples
self.transform = transform
def __len__(self):
return len(self.samples)
def __getitem__(self, index):
path, label = self.samples[index]
img = Image.open(path).convert('RGB')
if self.transform:
img = self.transform(img)
return img, label
# 使用分割后的数据集
train_dataset = SplitDataset(train_samples, transform=data_tranform)
val_dataset = SplitDataset(val_samples, transform=data_tranform)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)# 进一步优化:使用 torchvision.datasets.ImageFolder(推荐)
# 这会自动处理类标签,并且更快
# 假设你的数据结构是 ./data/PetImages/Cat/ 和 ./data/PetImages/Dog/
# from torchvision import datasets
# train_dataset = datasets.ImageFolder(root='./data/PetImages', transform=data_tranform)
# 但是这用全数据集,需要手动分割
# 如果想用分割,可以这样:
# import shutil
# # 创建临时文件夹
# os.makedirs('./data/train/Cat', exist_ok=True)
# os.makedirs('./data/train/Dog', exist_ok=True)
# os.makedirs('./data/val/Cat', exist_ok=True)
# os.makedirs('./data/val/Dog', exist_ok=True)
# # 移动文件(只做一次)
# for path, label in train_samples:
# cls = 'Cat' if label == 0 else 'Dog'
# shutil.copy(path, f'./data/train/{cls}/')
# for path, label in val_samples:
# cls = 'Cat' if label == 0 else 'Dog'
# shutil.copy(path, f'./data/val/{cls}/')
# 然后用:
# train_dataset = datasets.ImageFolder('./data/train', transform=data_tranform)
# val_dataset = datasets.ImageFolder('./data/val', transform=data_tranform)# 启用ImageFolder优化(创建文件夹并移动文件)
import shutil
# 创建文件夹
os.makedirs('./data/train/Cat', exist_ok=True)
os.makedirs('./data/train/Dog', exist_ok=True)
os.makedirs('./data/val/Cat', exist_ok=True)
os.makedirs('./data/val/Dog', exist_ok=True)
# 移动文件到train/val文件夹
for path, label in train_samples:
cls = 'Cat' if label == 0 else 'Dog'
shutil.copy(path, f'./data/train/{cls}/')
for path, label in val_samples:
cls = 'Cat' if label == 0 else 'Dog'
shutil.copy(path, f'./data/val/{cls}/')
# 使用ImageFolder
train_dataset = datasets.ImageFolder('./data/train', transform=data_tranform)
val_dataset = datasets.ImageFolder('./data/val', transform=data_tranform)
# 增加num_workers加速加载
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, pin_memory=True)上面的Normalize参数得到的方式:
ImageNet标准化值:mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225] 是从ImageNet训练集计算得出的标准值,用于预训练模型(如ResNet)的输入标准化。
为什么用这个:它能让模型更快收敛,因为预训练权重是在这个标准化下训练的。
自定义数据集:如果你想用自己的数据集统计,可以用上面的代码计算(注释掉了,因为需要数据加载器)。对于猫狗分类数据集,你可以运行它来得到更准确的值。
# 计算自定义数据集的均值和标准差(可选)
# 如果你想用自己的数据集统计,可以运行这段代码
# Normalize参数得到的方式
def compute_mean_std(loader):
mean = 0.0
std = 0.0
total_images = 0
for images, _ in loader:
batch_samples = images.size(0) # batch size
images = images.view(batch_samples, images.size(1), -1)
mean += images.mean(2).sum(0)
std += images.std(2).sum(0)
total_images += batch_samples
mean /= total_images
std /= total_images
return mean, std
# 示例:计算训练集的均值和标准差
# train_loader = DataLoader(ImageDatas('./data/PetImages', transform=transforms.ToTensor()), batch_size=64)
# mean, std = compute_mean_std(train_loader)
# print("自定义数据集均值:", mean)
# print("自定义数据集标准差:", std)### 附录
# 测试模型输出形状
import torch
# 假设输入图像尺寸为 224x224x3 (batch_size=1)
x = torch.randn(1, 3, 224, 224)
model = CNNModel()
print("输入形状:", x.shape)
# 逐层输出
out1 = model.model[0](x) # Conv2d
print("Conv2d 输出形状:", out1.shape)
out2 = model.model[1](out1) # ReLU
print("ReLU 输出形状:", out2.shape)
out3 = model.model[2](out2) # MaxPool2d
print("MaxPool2d 输出形状:", out3.shape)
out4 = model.model[3](out3) # 第二个 Conv2d
print("第二个 Conv2d 输出形状:", out4.shape)运行结果
输入形状: torch.Size([1, 3, 224, 224]) Conv2d 输出形状: torch.Size([1, 16, 224, 224]) ReLU 输出形状: torch.Size([1, 16, 224, 224]) MaxPool2d 输出形状: torch.Size([1, 16, 112, 112]) 第二个 Conv2d 输出形状: torch.Size([1, 32, 112, 112])
