import numpy as np


class Module:
    
    def forward(self, *args, **kwargs):
        pass

    
class Network(Module):
    
    def __init__(self, layers=None):
        if layers is None:
            layers = []
        self.layers = layers
    
    def forward(self, x):
        for l in self.layers:
            x = l.forward(x)
        return x
    
    def add_layer(self, layer):
        self.layers.append(layer)

    
class LinearLayer(Module):
    
    def __init__(self, W, b):
        self.W = W
        self.b = b
    
    def forward(self, x):
        return self.W @ x + self.b

    
class Sigmoid(Module):
    
    def forward(self, x):
        return np.exp(x) / (np.exp(x) + 1)

    
class ReLU(Module):
    
    def forward(self, x):
        return np.maximum(x, 0)

    
class Loss(Module):
    
    def forward(self, prediction, target):
        pass


class MSE(Loss):
    
    def forward(self, prediction, target):
        return np.mean((prediction - target) ** 2)


class CrossEntropyLoss(Loss):
    
    def forward(self, prediction, target):
        prediction = prediction - prediction.max()
        return -np.log(np.exp(prediction[target])/np.exp(prediction).sum())
