Distance Measures in data science with algorithms
1. Euclidean Distance:
import numpy as np
def euclidean_distance(p1, p2):
return np.sqrt(np.sum((p1 - p2) ** 2))
# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Euclidean distance:", euclidean_distance(point1, point2))
#clcoding.com
Euclidean distance: 2.8284271247461903
2. Manhattan Distance:
import numpy as np
def manhattan_distance(p1, p2):
return np.sum(np.abs(p1 - p2))
# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Manhattan distance:", manhattan_distance(point1, point2))
#clcoding.com
Manhattan distance: 4
3. Cosine Similarity:
from scipy.spatial import distance
def cosine_similarity(p1, p2):
return 1 - distance.cosine(p1, p2)
# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Cosine similarity:", cosine_similarity(point1, point2))
#clcoding.com
Cosine similarity: 0.9838699100999074
4. Minkowski Distance:
import numpy as np
def minkowski_distance(p1, p2, r):
return np.power(np.sum(np.power(np.abs(p1 - p2), r)), 1/r)
# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Minkowski distance:", minkowski_distance(point1, point2, 3))
#clcoding.com
Minkowski distance: 2.5198420997897464
5. Chebyshev Distance:
import numpy as np
def chebyshev_distance(p1, p2):
return np.max(np.abs(p1 - p2))
# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Chebyshev distance:", chebyshev_distance(point1, point2))
#clcoding.com
Chebyshev distance: 2
6. Hamming Distance:
import jellyfish
def hamming_distance(s1, s2):
return jellyfish.hamming_distance(s1, s2)
# Example usage
string1 = "hello"
string2 = "hallo"
print("Hamming distance:", hamming_distance(string1, string2))
#clcoding.com
Hamming distance: 1
7. Jaccard Similarity:
def jaccard_similarity(s1, s2):
set1 = set(s1)
set2 = set(s2)
intersection = set1.intersection(set2)
union = set1.union(set2)
return len(intersection) / len(union)
# Example usage
string1 = "hello"
string2 = "hallo"
print("Jaccard similarity:", jaccard_similarity(string1, string2))
#clcoding.com
Jaccard similarity: 0.6
8. Sørensen-Dice Index:
def sorensen_dice_index(s1, s2):
set1 = set(s1)
set2 = set(s2)
intersection = set1.intersection(set2)
return (2 * len(intersection)) / (len(set1) + len(set2))
# Example usage
string1 = "hello"
string2 = "hallo"
print("Sørensen-Dice index:", sorensen_dice_index(string1, string2))
#clcoding.com
Sørensen-Dice index: 0.75
9. Haversine Distance:
def haversine_distance(lat1, lon1, lat2, lon2):
R = 6371.0 # Radius of the earth in km
dLat = np.deg2rad(lat2 - lat1)
dLon = np.deg2rad(lon2 - lon1)
a = np.sin(dLat / 2)**2 + np.cos(np.deg2rad(lat1)) * np.cos(np.deg2rad(lat2)) * np.sin(dLon / 2)**2
c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
return R * c
# Example usage
print("Haversine distance:", haversine_distance(51.5074, 0.1278, 40.7128, -74.0060))
#clcoding.com
Input In [14]
a = np.sin(dLat / 2)**2 + np.cos(np.deg2rad(lat1)) *
^
SyntaxError: invalid syntax
10. Mahalanobis Distance:
from scipy.spatial.distance import cdist
def mahalanobis_distance(X, Y):
return cdist(X.reshape(1,-1), Y.reshape(1,-1), 'mahalanobis', VI=np.cov(X))
# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Mahalanobis distance:", mahalanobis_distance(point1, point2))
#clcoding.com
Mahalanobis distance: [[1.41421356]]
11. Pearson Correlation:
from scipy.stats import pearsonr
def pearson_correlation(X, Y):
return pearsonr(X, Y)[0]
# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Pearson correlation:", pearson_correlation(point1, point2))
#clcoding.com
Pearson correlation: 1.0
12. Squared Euclidean Distance:
def squared_euclidean_distance(X, Y):
return euclidean_distance(X, Y)**2
# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Squared Euclidean distance:", squared_euclidean_distance(point1, point2))
#clcoding.com
Squared Euclidean distance: 8.000000000000002
13. Jensen-Shannon Divergence:
def jensen_shannon_divergence(X, Y):
M = 0.5 * (X + Y)
return np.sqrt(0.5 * (rel_entr(X, M).sum() + rel_entr(Y, M).sum()))
# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Jensen-Shannon divergence:", jensen_shannon_divergence(point1, point2))
#clcoding.com
Jensen-Shannon divergence: 0.6569041853099059
14. Chi-Square Distance:
def chi_square_distance(X, Y):
X = X / np.sum(X)
Y = Y / np.sum(Y)
return np.sum((X - Y) ** 2 / (X + Y))
# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Chi-Square distance:", chi_square_distance(point1, point2))
#clcoding.com
Chi-Square distance: 0.01923076923076923
15. Spearman Correlation:
from scipy.stats import spearmanr
def spearman_correlation(X, Y):
return spearmanr(X, Y)[0]
# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Spearman correlation:", spearman_correlation(point1, point2))
#clcoding.com
Spearman correlation: 0.9999999999999999
16. Canberra Distance:
from scipy.spatial.distance import canberra
def canberra_distance(X, Y):
return canberra(X, Y)
# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Canberra distance:", canberra_distance(point1, point2))
#clcoding.com
Canberra distance: 0.8333333333333333
0 Comments:
Post a Comment