Source code for matclustering.methods.hierarchical.mattree.algorithm.TreeNodeObject

# -*- coding: utf-8 -*-
"""
MAT-Tools: Python Framework for Multiple Aspect Trajectory Data Mining

The present application offers a tool, to support the user in the clustering of multiple aspect trajectory data.It integrates into a unique framework for multiple aspects trajectories and in general for multidimensional sequence data mining methods.
Copyright (C) 2022, MIT license (this portion of code is subject to licensing from source project distribution)

Created on Apr, 2024
Copyright (C) 2024, License GPL Version 3 or superior (see LICENSE file)

Authors:
    - Tarlis Portela
    - Yuri Santos
"""
import itertools
from collections import defaultdict
from copy import deepcopy

from matclustering.methods.hierarchical.mattree.algorithm.set_level import set_level


[docs] class TreeNodeObject: """ A class used to represent a node in a Tree. Attributes ---------- TODO Methods ------- set_level: Defines the cluster label. check_label: Method used to verify cluster aspect label in order to avoid duplicate names in Sankey Diagram. sankey: Creates a sankey diagram from class dataset. Sankey diagram is a type of flow diagram in which the width of the arrows is proportional to the flow rate. eda: Exploratory Data Analysis. It generates a plot bar of a given feature of a given dataset and a given user. eda_corr: Exploratory Data Analysis. It generates a plot of correlation matrix of all features of a given dataset and a given user. get_similarity_matrix: Creates the distance matrix of the trajectories of a given cluster using the given similarity metric. get_entropy: Calculates the entropy value of a given dataset. dashboard: Displays the result dashboard. show: Shows info about each cluster node. graphicTree: Shows info about each cluster node in the tree generated by Digraph plot. dashTree: Method that calculates the frequency matrix and the clusters generated from the division of data based on a split criteria defined in the Tree constructor. """ # Select option in dashboard SELECT = 'Select Node' ALL = 'ALL' HEATMAP = 'HEATMAP' # Sankey diagram params label = defaultdict(list) value = defaultdict(list) source = [] id_dict = defaultdict(list) id_list = [] # Params to label tree nodes nodeNum, targetNum = -1, -1 nodeLabel = defaultdict(list) # 1. Dictionary of all nodes dataframes # 2. Dictionary of all leaves nodes dataframes df_dict = {} df_leaves = {} ############################################################################ # Frequency matrix for the initial dataset absolute_frequency_matrix = None relative = True ############################################################################ temporario = 0 clusters = 0 dendrogram_dict = defaultdict(list) ############################################################################ var_dict = {} id_iter = itertools.count() temp = {} def __init__(self, df, par=None): """DOC - __init__""" self.parent = par self.parentName = '' self.data = df self.left = None self.leftChildName = '' self.right = None self.rightChildName = '' self.done = 'No' self.freqMatrix = None self.variance = None self.threshold = None self.left_group = {} self.right_group = {} self.division = '' self.thresholdVal = None self.maxTrajPerGroup = 50 self.maxDepth = 3 if par == None: # binary, minVariance, var_red, max_red self.split = 'binary' else: self.split = par.split self.trajList = self.data.tid.unique() self.useCol = ['day', 'weather', 'root_type'] if self.parent is not None: self.skipVal = deepcopy(self.parent.skipVal) else: self.skipVal = [] if self.parent is None: self.depth = 0 else: self.depth = par.depth + 1 if par is None: self.id = f'Lvl {self.depth}' else: self.id = set_level(self.id_dict, self.depth)