PyTorch 기초와 실전 가이드: 설치부터 첫 모델까지

PyTorch란?

PyTorch는 Facebook(현재 Meta)이 개발한 오픈소스 머신러닝 라이브러리입니다. Python 친화적인 API와 직관적인 코드 구조로 인해 연구자와 개발자들 사이에서 가장 인기 있는 딥러닝 프레임워크 중 하나입니다.

PyTorch의 주요 특징

동적 계산 그래프: 런타임에 그래프 구조 변경 가능
Python 친화적: NumPy 스타일의 직관적인 API
GPU 가속: CUDA를 통한 효율적인 병렬 처리
풍부한 생태계: torchvision, torchaudio 등 다양한 확장 라이브러리
강력한 디버깅: Python 디버거 사용 가능

1. PyTorch 설치

환경 확인 및 설치

# CUDA 버전 확인 (nvidia-smi 명령어)
nvidia-smi

# PyTorch 설치 (CUDA 11.8 기준)
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

# CPU 버전 (GPU 없는 경우)
pip install torch torchvision torchaudio

설치 확인

import torch

print(f"PyTorch 버전: {torch.__version__}")
print(f"CUDA 사용 가능: {torch.cuda.is_available()}")
print(f"CUDA 버전: {torch.version.cuda}")

2. 기본 개념: 텐서(Tensor)

텐서 생성

import torch

# 0차원 텐서 (스칼라)
scalar = torch.tensor(5)
print(f"스칼라: {scalar}, 차원: {scalar.dim()}")

# 1차원 텐서 (벡터)
vector = torch.tensor([1, 2, 3, 4])
print(f"벡터: {vector}, 차원: {vector.dim()}")

# 2차원 텐서 (행렬)
matrix = torch.tensor([[1, 2], [3, 4]])
print(f"행렬: {matrix}, 차원: {matrix.dim()}")

# 3차원 텐서
tensor3d = torch.tensor([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
print(f"3D 텐서: {tensor3d}, 차원: {tensor3d.dim()}")

특수 텐서 생성

# 영행렬
zeros = torch.zeros(2, 3)

# 1로 채워진 행렬
ones = torch.ones(2, 3)

# 단위 행렬
eye = torch.eye(3)

# 랜덤 행렬 (0~1 사이)
rand = torch.rand(2, 3)

# 정규분포 랜덤 행렬
randn = torch.randn(2, 3)

# 특정 값으로 채운 행렬
full = torch.full((2, 3), 7)

# 범위 텐서
arange = torch.arange(0, 10, 2)  # [0, 2, 4, 6, 8]
linspace = torch.linspace(0, 10, 5)  # [0.0, 2.5, 5.0, 7.5, 10.0]

텐서 연산

a = torch.tensor([1, 2, 3])
b = torch.tensor([4, 5, 6])

# 기본 연산
print(f"덧셈: {a + b}")
print(f"뺄셈: {a - b}")
print(f"곱셈: {a * b}")
print(f"나눗셈: {a / b}")
print(f"거듭제곱: {a ** 2}")

# 행렬 곱셈
mat_a = torch.tensor([[1, 2], [3, 4]])
mat_b = torch.tensor([[5, 6], [7, 8]])
print(f"행렬 곱셈:\n{mat_a @ mat_b}")

# 집계 연산
print(f"합계: {a.sum()}")
print(f"평균: {a.float().mean()}")
print(f"최대값: {a.max()}")
print(f"최소값: {a.min()}")

텐서 형태 변환

x = torch.arange(12)

# 형태 변경
reshaped = x.reshape(3, 4)
print(f"reshape (3, 4):\n{reshaped}")

# 차원 추가/제거
x = torch.tensor([1, 2, 3])
unsqueeze = x.unsqueeze(0)  # [1, 3]
squeeze = unsqueeze.squeeze()  # [3]

# 전치
mat = torch.tensor([[1, 2, 3], [4, 5, 6]])
transposed = mat.T

# 텐서 결합
a = torch.tensor([[1, 2], [3, 4]])
b = torch.tensor([[5, 6], [7, 8]])
concat_dim0 = torch.cat([a, b], dim=0)  # 세로 결합
concat_dim1 = torch.cat([a, b], dim=1)  # 가로 결합

stack = torch.stack([a, b], dim=0)  # 새로운 차원 추가

3. 자동 미분 (Autograd)

PyTorch의 핵심 기능인 자동 미분을 통해 기울기(gradient)를 자동으로 계산할 수 있습니다.

# 기울기 추적 활성화
x = torch.tensor([2.0], requires_grad=True)
y = torch.tensor([3.0], requires_grad=True)

# 연산 정의
z = x ** 2 + y ** 3

# 역전파
z.backward()

# 기울기 확인
print(f"dz/dx: {x.grad}")  # 2*x = 4
print(f"dz/dy: {y.grad}")  # 3*y^2 = 27

신경망 학습에서의 활용

import torch.nn as nn

# 간단한 선형 회귀
x = torch.tensor([[1.0], [2.0], [3.0], [4.0]])
y = torch.tensor([[2.0], [4.0], [6.0], [8.0]])

# 모델 정의
model = nn.Linear(1, 1)
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

# 학습 루프
for epoch in range(100):
    # 순전파
    y_pred = model(x)
    loss = criterion(y_pred, y)
    
    # 역전파
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if epoch % 20 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item():.4f}')

# 학습된 파라미터 확인
print(f"가중치: {model.weight.item():.4f}")
print(f"편향: {model.bias.item():.4f}")

4. 신경망 구축

nn.Module로 모델 정의

import torch.nn as nn
import torch.nn.functional as F

class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        # 레이어 정의
        self.fc1 = nn.Linear(784, 128)  # 입력: 784, 은닉: 128
        self.fc2 = nn.Linear(128, 64)   # 은닉: 128, 은닉: 64
        self.fc3 = nn.Linear(64, 10)    # 은닉: 64, 출력: 10
        self.dropout = nn.Dropout(0.2)  # 드롭아웃
        
    def forward(self, x):
        # 순전파 정의
        x = x.view(-1, 784)  # 평탄화
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return x

# 모델 인스턴스화
model = SimpleNet()
print(model)

CNN 모델 예시

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # 합성곱 레이어
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        # 풀링 레이어
        self.pool = nn.MaxPool2d(2, 2)
        # 완전 연결 레이어
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)
        self.dropout = nn.Dropout(0.25)
        
    def forward(self, x):
        # 합성곱 + 풀링
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        # 평탄화
        x = x.view(-1, 64 * 7 * 7)
        # 완전 연결
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

cnn_model = CNN()
print(cnn_model)

5. 모델 학습 완전 가이드

MNIST 분류기 실전 예제

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

# 1. 데이터 준비
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = datasets.MNIST('./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST('./data', train=False, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# 2. 모델 정의
class MNISTNet(nn.Module):
    def __init__(self):
        super(MNISTNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)
        
    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output

model = MNISTNet()

# 3. 손실 함수와 옵티마이저
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 4. 학습 함수
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        
        if batch_idx % 100 == 0:
            print(f'Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} '
                  f'({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}')

# 5. 테스트 함수
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data)
            test_loss += criterion(output, target).item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
    
    test_loss /= len(test_loader)
    accuracy = 100. * correct / len(test_loader.dataset)
    
    print(f'\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({accuracy:.2f}%)\n')

# 6. 학습 실행
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

for epoch in range(1, 6):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

6. 모델 저장 및 로드

모델 저장

# 전체 모델 저장
torch.save(model, 'model.pth')

# 모델 파라미터만 저장 (권장)
torch.save(model.state_dict(), 'model_weights.pth')

# 학습 중간 체크포인트 저장
torch.save({
    'epoch': epoch,
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'loss': loss,
}, 'checkpoint.pth')

모델 로드

# 전체 모델 로드
model = torch.load('model.pth')

# 모델 파라미터 로드 (권장)
model = MNISTNet()
model.load_state_dict(torch.load('model_weights.pth'))
model.eval()

# 체크포인트 로드
checkpoint = torch.load('checkpoint.pth')
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']

7. 실전 팁과 모범 사례

GPU 활용

# GPU 사용 가능 여부 확인
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 모델과 데이터를 GPU로 이동
model = model.to(device)
data, target = data.to(device), target.to(device)

학습률 스케줄링

# 학습률 스케줄러
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

for epoch in range(100):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)
    scheduler.step()  # 학습률 감소

조기 종료 (Early Stopping)

class EarlyStopping:
    def __init__(self, patience=5, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False
        
    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0

# 사용 예시
early_stopping = EarlyStopping(patience=5, min_delta=0.001)

for epoch in range(100):
    train_loss = train(...)
    val_loss = validate(...)
    early_stopping(val_loss)
    if early_stopping.early_stop:
        print("Early stopping triggered")
        break

8. PyTorch vs TensorFlow 비교

특징	PyTorch	TensorFlow
계산 그래프	동적 (Dynamic)	정적 (Static) / 동적 (TF 2.x)
API 스타일	Pythonic, 직관적	더 복잡한 API
디버깅	Python 디버거 사용 가능	디버깅 어려움
배포	TorchScript, ONNX	TensorFlow Serving, TFLite
커뮤니티	연구 중심	산업계 중심
학습 곡선	쉬움	상대적으로 어려움

요약

PyTorch는 직관적인 API와 강력한 기능으로 머신러닝 프로젝트를 시작하기에 최적의 선택입니다. 이 가이드에서 다룬 내용을 바탕으로:

텐서 연산으로 데이터 조작
자동 미분으로 기울기 계산
nn.Module로 신경망 구축
데이터로더로 효율적인 학습
모델 저장/로드로 배포 준비

다음 단계로 torchvision을 활용한 이미지 분류, 전이 학습(Transfer Learning), GAN, Transformer 등 더 복잡한 모델을 탐색해보세요!