Source code for matclustering.core.SimilarityClustering

# -*- coding: utf-8 -*-
"""
MAT-Tools: Python Framework for Multiple Aspect Trajectory Data Mining

The present application offers a tool, to support the user in the clustering of multiple aspect trajectory data.It integrates into a unique framework for multiple aspects trajectories and in general for multidimensional sequence data mining methods.
Copyright (C) 2022, MIT license (this portion of code is subject to licensing from source project distribution)

Created on Apr, 2024
Copyright (C) 2024, License GPL Version 3 or superior (see LICENSE file)

Authors:
    - Tarlis Portela
"""
import pandas as pd
import numpy as np

from matmodel.util.parsers import df2trajectory

from matsimilarity.methods.mat.MUITAS import *
from matsimilarity.core.utils import similarity_matrix

from matclustering.core import HSTrajectoryClustering

[docs] class SimilarityClustering(HSTrajectoryClustering): """ Similarity-based clustering for multiple-aspect trajectory data. This class extends the HSTrajectoryClustering class to provide clustering functionality based on similarity metrics for trajectory data. It includes methods to prepare input data and compute similarity matrices using various metrics. Attributes ---------- name : str Name of the clustering model. metric : object Similarity metric used for clustering. X : array-like Similarity matrix of the input trajectories. labels : list List of labels associated with each trajectory in the input data. Methods ------- default_metric(dataset_descriptor): Initializes and returns the default similarity metric (MUITAS) for the dataset. prepare_input(X, metric=None, dataset_descriptor=None): Prepares the input data by converting it to trajectories and calculating the similarity matrix. """ def __init__(self, name, random_state=1, n_jobs=1, verbose=False): super().__init__(name=name, random_state=random_state, n_jobs=n_jobs, verbose=verbose)
[docs] def default_metric(self, dataset_descriptor): # Default similarity metric is MUITAS: muitas = MUITAS(dataset_descriptor) # Default Config: for feat in dataset_descriptor.attributes: muitas.add_feature([feat], 1) return muitas
[docs] def prepare_input(self, X, metric=None, dataset_descriptor=None): if isinstance(X, pd.DataFrame): T, dataset_descriptor = df2trajectory(X.copy()) else: T = X # Trajectories already converted if not metric: if self.isverbose: print('\n['+self.name+':] Default metric set to MUITAS.') self.metric = self.default_metric(dataset_descriptor) else: self.metric = metric # self.X = list(map(lambda t1: list(map(lambda t2: self.metric(t1, t2), T)), T)) self.X = 1 - similarity_matrix(T, measure=self.metric, n_jobs=self.config['n_jobs']) #classes = list(map(lambda t1: t1.label, T)) self.labels = list(map(lambda t1: t1.label, T)) return self.X, self.labels