TSK Fuzzy Logic System for Time Series Forecasting

import numpy as np import pandas as pd from sklearn.preprocessing import MinMaxScaler from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error import skfuzzy as fuzz

class TSK_FS(): def init(self, n_cluster=20, C=0.1): self.n_cluster = n_cluster self.lamda = C self.trained = False

def fit(self, X_train, y_train):
    n_samples, n_features = X_train.shape
    n_cluster = self.n_cluster
    assert (n_samples == len(y_train)), 'X_train and y_train samples num must be same'
    centers, delta = self.__fcm__(X_train, n_cluster)
    self.centers = centers
    self.delta = delta
    xg = self.__gaussian_feature__(X_train, centers, delta)
    xg1 = np.dot(xg.T, xg)
    pg = np.linalg.pinv(xg1 + self.lamda * np.eye(xg1.shape[0])).dot(xg.T).dot(y_train)
    self.pg = pg
    self.trained = True

def predict(self, X_test):
    assert(self.trained), "Error when predict, use fit first!"
    xg_test = self.__gaussian_feature__(X_test, self.centers, self.delta)
    y_pred = xg_test.dot(self.pg)
    return y_pred

def __fcm__(self, data, n_cluster):
    n_samples, n_features = data.shape
    centers, mem, _, _, _, _, _ = fuzz.cmeans(
        data.T, n_cluster, 2.0, error=1e-5, maxiter=200)

    delta = np.zeros([n_cluster, n_features])
    for i in range(n_cluster):
        d = (data - centers[i, :]) ** 2
        delta[i, :] = np.sum(d * mem[i, :].reshape(-1, 1),
                             axis=0) / np.sum(mem[i, :])

    return centers, delta

def __gaussian_feature__(self, data, centers, delta):
    n_cluster = self.n_cluster
    n_samples = data.shape[0]
    mu_a = np.zeros([n_samples, n_cluster])
    for i in range(n_cluster):
        tmp_k = 0 - np.sum((data - centers[i, :]) ** 2 /
                           delta[i, :], axis=1)
        mu_a[:, i] = np.exp(tmp_k)
    mu_a = mu_a / np.sum(mu_a, axis=1, keepdims=True)
    data_1 = np.concatenate((data, np.ones([n_samples, 1])), axis=1)
    zt = []
    for i in range(n_cluster):
        zt.append(data_1 * mu_a[:, i].reshape(-1, 1))
    data_fs = np.concatenate(zt, axis=1)
    data_fs = np.where(data_fs != data_fs, 1e-5, data_fs)
    return data_fs

成分数据矩阵

data = np.array([[0.758, 0.171, 0.049, 0.022], [0.758, 0.172, 0.047, 0.023], [0.762, 0.17, 0.047, 0.021], [0.762, 0.17, 0.047, 0.021], [0.76, 0.171, 0.047, 0.021], [0.762, 0.166, 0.051, 0.021], [0.761, 0.171, 0.048, 0.02], [0.757, 0.175, 0.049, 0.019], [0.747, 0.182, 0.052, 0.019], [0.75, 0.174, 0.057, 0.019], [0.746, 0.175, 0.061, 0.018], [0.747, 0.18, 0.055, 0.018], [0.715, 0.204, 0.062, 0.017], [0.696, 0.215, 0.067, 0.022], [0.68, 0.232, 0.066, 0.022], [0.661, 0.246, 0.068, 0.025], [0.653, 0.243, 0.077, 0.027], [0.661, 0.234, 0.078, 0.027], [0.702, 0.201, 0.074, 0.023], [0.702, 0.199, 0.076, 0.023], [0.724, 0.178, 0.074, 0.024], [0.724, 0.175, 0.074, 0.027], [0.725, 0.17, 0.075, 0.03], [0.715, 0.167, 0.084, 0.034], [0.716, 0.164, 0.085, 0.035], [0.692, 0.174, 0.094, 0.04], [0.702, 0.168, 0.084, 0.046], [0.685, 0.17, 0.097, 0.048], [0.674, 0.171, 0.102, 0.053], [0.658, 0.173, 0.113, 0.056], [0.638, 0.184, 0.12, 0.058], [0.622, 0.187, 0.13, 0.061], [0.606, 0.189, 0.136, 0.069], [0.59, 0.189, 0.145, 0.076], [0.577, 0.19, 0.153, 0.08], [0.569, 0.188, 0.159, 0.084], [0.559, 0.186, 0.167, 0.088], [0.562, 0.179, 0.175, 0.084]])

转换为特征矩阵(LCC方法将1改成234）

feature_matrix = np.zeros((len(data) - 1, len(data[0])-1)) for i in range(len(data) - 1): feature_matrix[i] = data[i + 1, 1:] - data[i, 1:]

构建特征矩阵的DataFrame

df = pd.DataFrame(feature_matrix, columns=['Petroleum', 'Others', 'Gas'])

目标变量(LCC方法将1改成234）

target = data[1:, 0] # 使用第一列作为目标变量

数据归一化

scaler = MinMaxScaler() df_scaled = scaler.fit_transform(df)

划分训练集和测试集

X_train, X_test, y_train, y_test = train_test_split(df_scaled, target, test_size=0.13, random_state=42)

创建TSK模型对象

model = TSK_FS(n_cluster=20, C=0.1)

拟合模型

model.fit(X_train, y_train)

数据归一化

X_train_scaled = scaler.transform(X_train) X_test_scaled = scaler.transform(X_test)

在测试集上进行预测

y_pred = model.predict(X_test_scaled)

在测试集上进行预测

y_pred = model.predict(X_test)

计算RMSE

rmse = np.sqrt(mean_squared_error(y_test, y_pred)) print("RMSE:", rmse)

构建未来10年的特征矩阵

future_features = np.zeros((10, len(data[0])-1)) current_data = data[-1, 1:] # 使用最后一行数据的除Coal外的特征作为当前数据

使用TSK_FLS模型预测未来十年的目标变量

future_pred = model.predict(future_features)

根据模型预测未来十年的特征

for i in range(10): feature = model.predict(current_data.reshape(1, -1)) # 使用模型预测特征 future_features[i] = np.concatenate(([feature[0]], current_data)) current_data = np.concatenate((current_data[1:], feature))

使用TSK_FLS模型预测未来十年的目标变量

future_pred = model.predict(future_features)

计算置信区间

residuals = y_test - y_pred mean_residuals = np.mean(residuals) std_residuals = np.std(residuals) n_samples = len(future_pred) z_score = 1.96 # 对应于95%的置信区间 margin_of_error = z_score * std_residuals / np.sqrt(n_samples)

lower_bound = future_pred - margin_of_error upper_bound = future_pred + margin_of_error

输出未来十年的预测置信区间

print("未来十年的预测置信区间:") for i in range(len(future_pred)): print(f"年份: {i+1}, 下界: {lower_bound[i]}, 上界: {upper_bound[i]}")

TSK Fuzzy Logic System for Time Series Forecasting

成分数据矩阵

转换为特征矩阵(LCC方法将1改成234）

构建特征矩阵的DataFrame

目标变量(LCC方法将1改成234）

数据归一化

划分训练集和测试集

创建TSK模型对象

拟合模型

数据归一化

在测试集上进行预测

在测试集上进行预测

计算RMSE

构建未来10年的特征矩阵

使用TSK_FLS模型预测未来十年的目标变量

根据模型预测未来十年的特征

使用TSK_FLS模型预测未来十年的目标变量

计算置信区间

输出未来十年的预测置信区间