# -*- coding: utf-8 -*-
"""
MAT-Tools: Python Framework for Multiple Aspect Trajectory Data Mining
The present application offers a tool, to support the user in the modeling of multiple aspect trajectory data. It integrates into a unique framework for multiple aspects trajectories and in general for multidimensional sequence data mining methods.
Copyright (C) 2022, MIT license (this portion of code is subject to licensing from source project distribution)
Created on Apr, 2024
Copyright (C) 2024, License GPL Version 3 or superior (see LICENSE file)
Authors:
- Tarlis Portela
- Vanessa Lago Machado
"""
[docs]
class Comparator:
'''
<Abstract> Calculates the distance.
Properties:
max_value=None - Maximum possible value for distance, default: Comparator.MAX_VALUE = float('inf').
'''
MAX_VALUE = float('inf')
def __init__(self, max_value=None):
self.max_value = max_value
self.min_value = 0
[docs]
def match(self, asp1, asp2, match_threshold=0):
"""
Determine whether two aspects match in equality
Override this method to change behavior, can use match_threshold to match based on a specified threshold.
Parameters:
-----------
asp1 : Aspect
The first aspect to be compared.
asp2 : Aspect
The second aspect to be compared.
match_threshold : float, optional
A threshold for matching criteria (default 0).
Currently, it is used to match based on 0 distance.
Returns:
--------
bool
True if the two aspects match, False otherwise.
"""
return True if self.distance(asp1, asp2) <= match_threshold else False
[docs]
def distance(self, asp1, asp2):
'''Calculates the distance.
Arguments:
asp1 (Aspect<?>) - value 1 to compare
asp2 (Aspect<?>) - value 2 to compare
Return:
distance - distance equality value (0 for equal values, 1 for different).
'''
return 0 if asp1.__eq__(asp2) else 1
[docs]
def normalize(self, distance):
if not self.max_value:
return distance
return (distance - self.min_value) / (self.max_value - self.min_value)
[docs]
def enhance(self, distance):
distance = (distance * distance)
if self.max_value and distance > self.max_value:
return self.max_value
else:
return distance
[docs]
@staticmethod
def instantiate(json_obj):
# TODO: new distances and specific cases:
max_value = json_obj['comparator']['maxValue'] if 'maxValue' in json_obj['comparator'].keys() else None
if max_value == -1:
max_value = None
c = None
if json_obj['comparator']['distance'] == 'difference' and json_obj['type'] == 'time':
units = json_obj['comparator']['units'] if 'units' in json_obj['comparator'].keys() else 'm'
c = TimeDistance(max_value, units)
elif json_obj['comparator']['distance'] == 'diffnotneg' or json_obj['comparator']['distance'] == 'difference':
c = AbsoluteDistance(max_value)
else:
cname = eval( str(json_obj['comparator']['distance']).capitalize()+'Distance' )
c = cname(max_value)
# Other Params:
min_value = json_obj['comparator']['minValue'] if 'minValue' in json_obj['comparator'].keys() else None
if min_value == -1:
c.min_value = min_value
for k, v in json_obj['comparator'].items():
if k not in ['distance', 'maxValue', 'minValue']:
setattr(c, k, v)
return c
[docs]
class EqualsDistance(Comparator):
def __init__(self, max_value=None):
Comparator.__init__(self, max_value)
[docs]
def distance(self, asp1, asp2):
'''Calculates the distance for eqality ignoring case.
Arguments:
asp1 (Aspect<nominal>) - value 1 to compare
asp2 (Aspect<nominal>) - value 2 to compare
Return:
distance - distance equality value (0 for equal values, 1 for different).
'''
return 0 if asp1._value.upper() == asp2._value.upper() else 1
[docs]
class CaselessDistance(Comparator):
def __init__(self, max_value=None):
Comparator.__init__(self, max_value)
[docs]
class NumericDistance(Comparator):
def __init__(self, max_value=None):
Comparator.__init__(self, max_value)
[docs]
def distance(self, asp1, asp2):
'''Calculates the numeric distance.
Arguments:
asp1 (Aspect<numeric>) - value 1 to compare
asp2 (Aspect<numeric>) - value 2 to compare
Return:
distance - distance difference value (asp1 - asp2).
'''
return asp1._value - asp2._value
[docs]
class AbsoluteDistance(NumericDistance):
def __init__(self, max_value=None):
Comparator.__init__(self, max_value)
[docs]
def distance(self, asp1, asp2):
'''Calculates the absolute distance.
Arguments:
asp1 (Aspect<numeric>) - value 1 to compare
asp2 (Aspect<numeric>) - value 2 to compare
Return:
distance - distance difference value, abs(asp1 - asp2).
'''
return abs(asp1._value - asp2._value)
[docs]
class TimeDistance(Comparator):
'''Calculates the closest time distance.
Only works for time in hours, minutes, seconds, and microseconds. Ex.: difference between 22h and 2h is 3h.
Properties:
units='ms' - Unit measure to get distance: h (hours), m (minutes), s (seconds), ms (microseconds)
max_value=None - Maximum possible value for distance (Ex.: hours = 24)
'''
def __init__(self, max_value=None, units='m'):
# Works for time in hours, minutes or seconds. Ex.: difference between 22h and 2h is 3h.
self.units = units
if units == 'h':
max_value = 23
elif units == 'm':
max_value = 24*60-1
elif units == 's':
max_value = 24*60*60-1
elif units == 'ms':
max_value = 24*60*60*1000-1
Comparator.__init__(self, max_value)
[docs]
def distance(self, asp1, asp2):
'''Calculates the closest time distance.
Arguments:
asp1 (Aspect<numeric>, DateTime Aspect) - value 1 to compare
asp2 (Aspect<numeric>, DateTime Aspect) - value 2 to compare
Return:
distance - distance difference in the informed units.
'''
v1 = asp1.get(self.units)
v2 = asp2.get(self.units)
v1, v2 = max(v1, v2), min(v1, v2)
return min( ((self.max_value - v1) + v2 +1), (v1 - v2) )
[docs]
def abs_distance(self, asp1, asp2):
'''Calculates the simple time distance.
Arguments:
asp1 (Aspect<numeric>, DateTime Aspect) - value 1 to compare
asp2 (Aspect<numeric>, DateTime Aspect) - value 2 to compare
Return:
distance - distance difference in the informed units.
'''
v1 = asp1.get(self.units)
v2 = asp2.get(self.units)
return abs(v1 - v2)
[docs]
class DatetimeDistance(Comparator):
'''Calculates the date distance in one of the following units:
D - days
M - months
Y - years
w - weeks
h - hours
m - minutes
s - seconds (default), which includes microseconds fraction
'weekday' - difference in weekdays
'=weekday' - equal weekday (0 for equal values, 1 for different)
'isweekday' - equal if both are weekdays or both are weekends (0 for equal values, 1 for different)
Properties:
units='ms' - Unit measure to get distance.
max_value=None - Maximum possible value for distance, default: Comparator.MAX_VALUE = float('inf')
'''
def __init__(self, max_value=None, units=None):
Comparator.__init__(self, max_value)
self.units = units
[docs]
def distance(self, asp1, asp2):
dt1 = max(asp1._value, asp2._value)
dt2 = min(asp1._value, asp2._value)
# delta = abs(asp1._value - asp2._value)
if self.units == None or self.units == 's':
return (dt1 - dt2).total_seconds()
if self.units == 'D':
return (dt1 - dt2).days
elif self.units == 'M': # This is a workaround datetime.timedelta:
from dateutil.relativedelta import relativedelta
delta = relativedelta(dt1, dt2)
return delta.years*12 + delta.months
elif self.units == 'Y':
return dt1.year - dt2.year
elif self.units == 'w':
return (dt1 - dt2).days // 7
elif self.units == 'h':
return (dt1 - dt2).total_seconds() // 3600
elif self.units == 'm':
return (dt1 - dt2).total_seconds() // 60
elif self.units == 'weekday':
return dt1.weekday() - dt2.weekday()
elif self.units == '=weekday':
return dt1.weekday() == dt2.weekday()
elif self.units == 'isweekday':
return 0 if dt1.isweekday() == dt2.isweekday() else 1
else:
return (dt1 - dt2).total_seconds()
[docs]
class InintervalDistance(DatetimeDistance): # TimeDistance.distance calculate difference in minutes (if units == 'm')
def __init__(self, max_value=None, units='m'):
DatetimeDistance.__init__(self, max_value, units)
[docs]
def distance(self, asp1, asp2):
from matmodel.base import Interval
# in case one aspect is an Interval return 0 if match, else return 1
if isinstance(asp1, Interval) or isinstance(asp2, Interval):
return 0 if self.match(asp1, asp2) else 1
else:
return super().distance(asp1, asp2) # Gets the time difference
[docs]
def match(self, asp1, asp2, match_threshold=0): #Check if the dates or intervals match (can use with 'DateTime' or 'Interval' instances, or mixed)
from matmodel.base import DateTime, Interval
if not (isinstance(asp1, DateTime) and isinstance(asp2, DateTime)):
raise TypeError("Aspects must be 'DateTime' or 'Interval' instances.")
# For 2 intervals:
if isinstance(asp1, Interval) and isinstance(asp2, Interval):
return self.converge(asp1, asp2)
# For 1 date and 1 interval:
# If asp1 or asp2 represents a point in time (just DateTime.start),
elif isinstance(asp1, Interval) or isinstance(asp2, Interval):
return self.match_date_interval(asp1, asp2)
# If both asp1 and asp2 represents a point in time, we use the distance to see a match:
# (match_th is a threshold for matching, default 0)
else:
return self.match_dates(asp1, asp2, match_threshold)
[docs]
def match_dates(self, asp1, asp2, match_threshold=0):
return False if int(self.distance(asp1, asp2)) > match_threshold else True
[docs]
def match_date_interval(self, asp1, asp2):
from matmodel.base import Interval
if isinstance(asp2, Interval): # asp1 is the DateTime, asp2 is the Interval
D = asp1
I = asp2
else:
D = asp2
I = asp1
return True if D.start >= I.start and D.start <= I.end else False
[docs]
def converge(self, asp1, asp2):
if not (isinstance(asp1, Interval) and isinstance(asp2, Interval)):
raise TypeError("Aspects must be 'Interval' instances.")
if max(asp1.start, asp2.start) <= min(asp1.end, asp2.end):
return True
else:
return False
[docs]
class EuclideanDistance(Comparator):
def __init__(self, max_value=None):
Comparator.__init__(self, max_value)
[docs]
def distance(self, asp1, asp2):
'''Calculates the Euclidean distance (works for points of 2D, 3D, and more).
Arguments:
asp1 (Space2D, Space3D) - value 1 to compare
asp2 (Space2D, Space3D) - value 2 to compare
Return:
distance - distance value.
'''
import math
return math.sqrt( sum(map(lambda v1, v2: abs(v1 - v2)**2, asp1.value, asp2.value)) )
# from movelets.classes.Aspect import Space2D, Space3D
# assert isinstance(asp1, Space2D) and isinstance(asp2, Space2D), 'Expected Space2D or Space3D for EuclideanDistance calculation.'
#
# import math
# diffX = abs(asp1.x - asp2.x)
# diffY = abs(asp1.y - asp2.y)
#
# if isinstance(asp1, Space3D):
# diffZ = abs(asp1.z - asp2.z)
# return math.sqrt( diffX * diffX + diffY * diffY + diffZ * diffZ )
# else:
# return math.sqrt( diffX * diffX + diffY * diffY )
[docs]
class ManhattanDistance(Comparator):
def __init__(self, max_value=None):
Comparator.__init__(self, max_value)
[docs]
def distance(self, asp1, asp2):
'''Calculates the Manhattan distance (works for points of 2D, 3D, and more).
Arguments:
asp1 (Space2D, Space3D) - value 1 to compare
asp2 (Space2D, Space3D) - value 2 to compare
Return:
distance - distance value.
'''
return sum(map(lambda v1, v2: abs(v1 - v2), asp1.value, asp2.value))
[docs]
class LcsDistance(Comparator):
def __init__(self, max_value=None):
Comparator.__init__(self, max_value)
[docs]
def lcs(self, X, Y):
m = len(X)
n = len(Y)
L = list(map(lambda i: [None]*(n + 1), range(m + 1)))
def sublcs(i, j):
if i == 0 or j == 0 :
L[i][j] = 0
elif X[i-1] == Y[j-1]:
L[i][j] = L[i-1][j-1]+1
else:
L[i][j] = max(L[i-1][j], L[i][j-1])
return L[i][j]
list(map(lambda i: list(map(lambda j: sublcs(i, j), range(n + 1))), range(m + 1)))
return L[m][n]
[docs]
def lcs_distance(self, X, Y):
return max(len(X),len(Y)) - self.lcs(X, Y)
[docs]
def distance(self, asp1, asp2):
'''Calculates the Longest Common Subsequence difference.
Arguments:
asp1 (Aspect<nominal>) - value 1 to compare
asp2 (Aspect<nominal>) - value 2 to compare
Return:
distance - LCS distance value.
'''
return self.lcs_distance(asp1._value, asp2._value)
[docs]
class EditlcsDistance(LcsDistance):
def __init__(self, max_value=None):
Comparator.__init__(self, max_value)
[docs]
def distance(self, asp1, asp2):
'''Calculates the Longest Common Subsequence difference.
Arguments:
asp1 (Aspect<nominal>) - value 1 to compare
asp2 (Aspect<nominal>) - value 2 to compare
Return:
distance - LCS distance value.
'''
lcs = self.lcs(asp1._value, asp2._value)
return ((len(asp1._value) - lcs) + (len(asp2._value) - lcs))