import pandas as pd
import numpy as np
import random
from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.neighbors import KNeighborsRegressor
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline

def main():

    SEED = 42   
    random.seed(SEED)
    np.random.seed(SEED)

    train_data = pd.read_csv('train_dataset.csv')
    test_data = pd.read_csv('test_dataset.csv')
    
    train_data = train_data[train_data.isna().sum(axis=1) < 20].reset_index(drop=True)
    
    continuous_features = [col for col in train_data.columns if 'cont' in col]
    ordinal_features = [col for col in train_data.columns if 'ord' in col]
    categorical_features = [col for col in train_data.columns if 'cat' in col]
    
    to_remove = ['cat_4', 'cat_5', 'cat_6']
    categorical_features = [col for col in categorical_features if col not in to_remove]
    
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', Pipeline([
                ('scaler', StandardScaler()),
                ('imputer', SimpleImputer(strategy='constant', fill_value=0))
            ]), continuous_features),
            ('ord', OrdinalEncoder(
                handle_unknown='use_encoded_value',
                unknown_value=-1,
                encoded_missing_value=-1,
                dtype=int
            ), ordinal_features),
            ('cat', OneHotEncoder(
                drop='first',
                handle_unknown='ignore'
            ), categorical_features)
        ],
    )
    
    X = train_data.drop(columns=['target'])
    y = train_data['target']
    
    X_processed = preprocessor.fit_transform(X)
    test_processed = preprocessor.transform(test_data)
    
    model = KNeighborsRegressor(n_neighbors=5, p=1, weights='distance')
    model.fit(X_processed, y)
    
    test_predictions = model.predict(test_processed)
    
    submission = pd.DataFrame({
        'index': np.arange(len(test_predictions)),
        'value': test_predictions
    })
    
    submission.to_csv('submission.csv', index=False)
    
    return submission

if __name__ == "__main__":
    submission = main()
