583 lines
16 KiB
C++
583 lines
16 KiB
C++
/**
|
|
* (c) Daniel Lemire, 2008
|
|
*This C++ library implements fast nearest-neighbor retrieval under the dynamic
|
|
*time warping (DTW). This library includes the dynamic programming solution, a
|
|
*fast implementation of the LB_Keogh lower bound, as well as a improved lower
|
|
*bound called LB_Improved. This library was used to show that LB_Improved can
|
|
*be used to retrieve nearest neighbors three times on several data sets
|
|
* including random walks time series or shape time series.
|
|
*
|
|
* See the classes LB_Keogh and LB_Improved for usage.
|
|
*
|
|
* Time series are represented using STL vectors.
|
|
*/
|
|
|
|
#ifndef DTW
|
|
#define DTW
|
|
|
|
#include <algorithm>
|
|
#include <cassert>
|
|
#include <cmath>
|
|
#include <deque>
|
|
#include <iostream>
|
|
#include <sstream>
|
|
#include <vector>
|
|
|
|
typedef double floattype;
|
|
typedef unsigned int uint;
|
|
using namespace std;
|
|
|
|
class MathUtil {
|
|
public:
|
|
static inline int min(int x, int y) { return x < y ? x : y; }
|
|
static inline int max(int x, int y) { return x > y ? x : y; }
|
|
};
|
|
|
|
/**
|
|
* You do not need this function, used by LB_Keogh and LB_Improved later.
|
|
*/
|
|
void computeEnvelope(const vector<floattype> &array, uint constraint,
|
|
vector<floattype> &maxvalues,
|
|
vector<floattype> &minvalues);
|
|
|
|
/**
|
|
* You do not need this class, used by LB_Keogh and LB_Improved later.
|
|
* It computes the DTW using dynamic programmming.
|
|
*/
|
|
class dtw {
|
|
public:
|
|
enum { verbose = false, vverbose = false, INF = 100000000, fast = true };
|
|
static inline double max(double x, double y) { return x > y ? x : y; }
|
|
static inline double min(double x, double y) { return x < y ? x : y; }
|
|
|
|
dtw(uint n, uint constraint);
|
|
/*
|
|
* hardcoded l1 norm (for speed)
|
|
*/
|
|
double fastdynamic(const vector<double> &v, const vector<double> &w);
|
|
vector<vector<double>> mGamma;
|
|
|
|
int mN, mConstraint;
|
|
|
|
static double dynamic(const vector<double> &v, const vector<double> &w,
|
|
int constraint = INF, int p = 2);
|
|
};
|
|
|
|
class NearestNeighbor {
|
|
public:
|
|
NearestNeighbor(const vector<double> &v, int constraint);
|
|
virtual double test(const vector<double> &candidate) { return 0; } //= 0;
|
|
virtual double getLowestCost() { return 0; }
|
|
virtual ~NearestNeighbor(){};
|
|
virtual int getNumberOfDTW() { return 0; }
|
|
virtual int getNumberOfCandidates() { return 0; }
|
|
dtw mDTW;
|
|
};
|
|
|
|
class NaiveNearestNeighbor : public NearestNeighbor {
|
|
public:
|
|
NaiveNearestNeighbor(const vector<double> &v, int constraint);
|
|
|
|
double test(const vector<double> &candidate);
|
|
|
|
void resetStatistics();
|
|
|
|
double getLowestCost();
|
|
|
|
int getNumberOfDTW();
|
|
|
|
int getNumberOfCandidates();
|
|
|
|
private:
|
|
int lb_keogh;
|
|
int full_dtw;
|
|
|
|
const vector<double> V;
|
|
double bestsofar;
|
|
};
|
|
|
|
/**
|
|
* Usage: create the object using the time series you want to match again the
|
|
* database and some DTW time constraint parameter. Then repeatedly apply
|
|
* the test function on the various candidates. The function returns the
|
|
* matching cost with the best candidate so far. By keeping track of when
|
|
* the cost was lowered, you can find the nearest neighbor in a database.
|
|
*
|
|
* Only the l1 norm is used.
|
|
*/
|
|
class LB_Keogh : public NearestNeighbor {
|
|
public:
|
|
LB_Keogh(const vector<double> &v, int constraint)
|
|
: NearestNeighbor(v, constraint),
|
|
lb_keogh(0),
|
|
full_dtw(0),
|
|
V(v),
|
|
mConstraint(constraint),
|
|
bestsofar(dtw::INF),
|
|
U(V.size(), 0),
|
|
L(V.size(), 0) {
|
|
assert(mConstraint >= 0);
|
|
assert(mConstraint < static_cast<int>(V.size()));
|
|
computeEnvelope(V, mConstraint, U, L);
|
|
}
|
|
|
|
double justlb(const vector<double> &candidate) {
|
|
double error(0.0);
|
|
for (uint i = 0; i < V.size(); ++i) {
|
|
if (candidate[i] > U[i])
|
|
error += candidate[i] - U[i];
|
|
else if (candidate[i] < L[i])
|
|
error += L[i] - candidate[i];
|
|
}
|
|
return error;
|
|
}
|
|
double test(const vector<double> &candidate) {
|
|
++lb_keogh;
|
|
double error(0.0);
|
|
for (uint i = 0; i < V.size(); ++i) {
|
|
if (candidate[i] > U[i])
|
|
error += candidate[i] - U[i];
|
|
else if (candidate[i] < L[i])
|
|
error += L[i] - candidate[i];
|
|
}
|
|
if (error < bestsofar) {
|
|
++full_dtw;
|
|
const double trueerror = mDTW.fastdynamic(V, candidate);
|
|
if (trueerror < bestsofar) bestsofar = trueerror;
|
|
}
|
|
return bestsofar;
|
|
}
|
|
|
|
int getNumberOfDTW() { return full_dtw; }
|
|
|
|
int getNumberOfCandidates() { return lb_keogh; }
|
|
|
|
double getLowestCost() { return bestsofar; }
|
|
|
|
void resetStatistics() {
|
|
lb_keogh = 0;
|
|
full_dtw = 0;
|
|
}
|
|
|
|
private:
|
|
int lb_keogh;
|
|
int full_dtw;
|
|
const vector<double> V;
|
|
int mConstraint;
|
|
double bestsofar;
|
|
vector<double> U;
|
|
vector<double> L;
|
|
};
|
|
|
|
class LB_KeoghEarly : public NearestNeighbor {
|
|
public:
|
|
LB_KeoghEarly(const vector<double> &v, int constraint)
|
|
: NearestNeighbor(v, constraint),
|
|
lb_keogh(0),
|
|
full_dtw(0),
|
|
V(v),
|
|
mConstraint(constraint),
|
|
bestsofar(dtw::INF),
|
|
U(V.size(), 0),
|
|
L(V.size(), 0) {
|
|
assert(mConstraint >= 0);
|
|
assert(mConstraint < static_cast<int>(V.size()));
|
|
computeEnvelope(V, mConstraint, U, L);
|
|
}
|
|
|
|
double test(const vector<double> &candidate) {
|
|
++lb_keogh;
|
|
double error(0.0);
|
|
for (uint i = 0; i < V.size(); ++i) {
|
|
if (candidate[i] > U[i])
|
|
error += candidate[i] - U[i];
|
|
else if (candidate[i] < L[i])
|
|
error += L[i] - candidate[i];
|
|
if (error > bestsofar) return bestsofar;
|
|
}
|
|
++full_dtw;
|
|
const double trueerror = mDTW.fastdynamic(V, candidate); //,mConstraint,1);
|
|
if (trueerror < bestsofar) bestsofar = trueerror;
|
|
return bestsofar;
|
|
}
|
|
|
|
int getNumberOfDTW() { return full_dtw; }
|
|
|
|
int getNumberOfCandidates() { return lb_keogh; }
|
|
|
|
double getLowestCost() { return bestsofar; }
|
|
|
|
void resetStatistics() {
|
|
lb_keogh = 0;
|
|
full_dtw = 0;
|
|
}
|
|
|
|
private:
|
|
int lb_keogh;
|
|
int full_dtw;
|
|
const vector<double> V;
|
|
int mConstraint;
|
|
double bestsofar;
|
|
vector<double> U;
|
|
vector<double> L;
|
|
};
|
|
|
|
/**
|
|
* Usage (same as LB_Keogh): create the object using the time series you want to
|
|
* match again the database and some DTW time constraint parameter. Then
|
|
* repeatedly apply the test function on the various candidates. The function
|
|
* returns the matching cost with the best candidate so far. By keeping track of
|
|
* when the cost was lowered, you can find the nearest neighbor in a database.
|
|
*
|
|
* Only the l1 norm is used.
|
|
*/
|
|
class LB_Improved : public NearestNeighbor {
|
|
public:
|
|
LB_Improved(const vector<double> &v, int constraint)
|
|
: NearestNeighbor(v, constraint),
|
|
lb_keogh(0),
|
|
full_dtw(0),
|
|
V(v),
|
|
buffer(v),
|
|
mConstraint(constraint),
|
|
bestsofar(dtw::INF),
|
|
U(V.size(), 0),
|
|
L(V.size(), 0),
|
|
U2(V.size(), 0),
|
|
L2(V.size(), 0) {
|
|
assert(mConstraint >= 0);
|
|
assert(mConstraint < static_cast<int>(V.size()));
|
|
computeEnvelope(V, mConstraint, U, L);
|
|
}
|
|
|
|
void resetStatistics() {
|
|
lb_keogh = 0;
|
|
full_dtw = 0;
|
|
}
|
|
|
|
double justlb(const vector<double> &candidate) {
|
|
double error(0.0);
|
|
buffer = candidate;
|
|
for (uint i = 0; i < V.size(); ++i) {
|
|
if (candidate[i] > U[i])
|
|
error += candidate[i] - U[i];
|
|
else if (candidate[i] < L[i])
|
|
error += L[i] - candidate[i];
|
|
}
|
|
computeEnvelope(buffer, mConstraint, U2, L2);
|
|
for (uint i = 0; i < V.size(); ++i) {
|
|
if (V[i] > U2[i]) {
|
|
error += V[i] - U2[i];
|
|
} else if (V[i] < L2[i]) {
|
|
error += L2[i] - V[i];
|
|
}
|
|
}
|
|
return error;
|
|
}
|
|
|
|
double test(const vector<double> &candidate) {
|
|
++lb_keogh;
|
|
double error(0.0);
|
|
for (uint i = 0; i < V.size(); ++i) {
|
|
const double &cdi(candidate[i]);
|
|
if (cdi > U[i]) {
|
|
error += cdi - (buffer[i] = U[i]);
|
|
} else if (cdi < L[i]) {
|
|
error += (buffer[i] = L[i]) - cdi;
|
|
} else
|
|
buffer[i] = cdi;
|
|
}
|
|
if (error < bestsofar) {
|
|
computeEnvelope(buffer, mConstraint, U2, L2);
|
|
for (uint i = 0; i < V.size(); ++i) {
|
|
if (V[i] > U2[i]) {
|
|
error += V[i] - U2[i];
|
|
} else if (V[i] < L2[i]) {
|
|
error += L2[i] - V[i];
|
|
}
|
|
}
|
|
if (error < bestsofar) {
|
|
++full_dtw;
|
|
const double trueerror =
|
|
mDTW.fastdynamic(V, candidate); //,mConstraint,1);
|
|
if (trueerror < bestsofar) bestsofar = trueerror;
|
|
}
|
|
}
|
|
return bestsofar;
|
|
}
|
|
|
|
/**
|
|
* for plotting purposes
|
|
*/
|
|
string dumpTextDescriptor(const vector<double> &candidate) {
|
|
buffer = candidate;
|
|
++lb_keogh;
|
|
double error(0.0);
|
|
for (uint i = 0; i < V.size(); ++i) {
|
|
if (candidate[i] > U[i]) {
|
|
error += candidate[i] - U[i];
|
|
buffer[i] = U[i];
|
|
} else if (candidate[i] < L[i]) {
|
|
error += L[i] - candidate[i];
|
|
buffer[i] = L[i];
|
|
}
|
|
assert((buffer[i] == L[i]) or (buffer[i] == U[i]) or
|
|
(buffer[i] == candidate[i]));
|
|
}
|
|
vector<double> lbimprovedarray;
|
|
computeEnvelope(buffer, mConstraint, U2, L2);
|
|
for (uint i = 0; i < V.size(); ++i) {
|
|
if (V[i] > U2[i]) {
|
|
error += V[i] - U2[i];
|
|
lbimprovedarray.push_back(U2[i]);
|
|
} else if (V[i] < L2[i]) {
|
|
error += L2[i] - V[i];
|
|
lbimprovedarray.push_back(L2[i]);
|
|
} else
|
|
lbimprovedarray.push_back(V[i]);
|
|
}
|
|
stringstream ss;
|
|
for (uint k = 0; k < V.size(); ++k) {
|
|
assert((lbimprovedarray[k] == L2[k]) or (lbimprovedarray[k] == U2[k]) or
|
|
(lbimprovedarray[k] == V[k]));
|
|
assert((buffer[k] == L[k]) or (buffer[k] == U[k]) or
|
|
(buffer[k] == candidate[k]));
|
|
ss << k << " " << V[k] << " " << candidate[k] << " " << buffer[k] << " "
|
|
<< lbimprovedarray[k] << " " << L[k] << " " << U[k] << " " << L2[k]
|
|
<< " " << U2[k] << endl;
|
|
}
|
|
return ss.str();
|
|
}
|
|
|
|
int getNumberOfDTW() { return full_dtw; }
|
|
|
|
int getNumberOfCandidates() { return lb_keogh; }
|
|
|
|
double getLowestCost() { return bestsofar; }
|
|
|
|
private:
|
|
int lb_keogh;
|
|
int full_dtw;
|
|
const vector<double> V;
|
|
vector<double> buffer;
|
|
int mConstraint;
|
|
double bestsofar;
|
|
vector<double> U;
|
|
vector<double> L;
|
|
vector<double> U2;
|
|
vector<double> L2;
|
|
};
|
|
|
|
class LB_ImprovedEarly : public NearestNeighbor {
|
|
public:
|
|
LB_ImprovedEarly(const vector<double> &v, int constraint)
|
|
: NearestNeighbor(v, constraint),
|
|
lb_keogh(0),
|
|
full_dtw(0),
|
|
V(v),
|
|
buffer(v),
|
|
mConstraint(constraint),
|
|
bestsofar(dtw::INF),
|
|
U(V.size(), 0),
|
|
L(V.size(), 0),
|
|
U2(V.size(), 0),
|
|
L2(V.size(), 0) {
|
|
assert(mConstraint >= 0);
|
|
assert(mConstraint < static_cast<int>(V.size()));
|
|
computeEnvelope(V, mConstraint, U, L);
|
|
}
|
|
|
|
void resetStatistics() {
|
|
lb_keogh = 0;
|
|
full_dtw = 0;
|
|
}
|
|
|
|
double test(const vector<double> &candidate) {
|
|
++lb_keogh;
|
|
double error(0.0);
|
|
for (uint i = 0; i < V.size(); ++i) {
|
|
const double &cdi(candidate[i]);
|
|
if (cdi > U[i]) {
|
|
error += cdi - (buffer[i] = U[i]);
|
|
} else if (cdi < L[i]) {
|
|
error += (buffer[i] = L[i]) - cdi;
|
|
} else
|
|
buffer[i] = cdi;
|
|
if (error > bestsofar) return bestsofar;
|
|
}
|
|
if (error < bestsofar) {
|
|
computeEnvelope(buffer, mConstraint, U2, L2);
|
|
for (uint i = 0; i < V.size(); ++i) {
|
|
if (V[i] > U2[i]) {
|
|
error += V[i] - U2[i];
|
|
} else if (V[i] < L2[i]) {
|
|
error += L2[i] - V[i];
|
|
}
|
|
if (error > bestsofar) return bestsofar;
|
|
}
|
|
if (error < bestsofar) {
|
|
++full_dtw;
|
|
const double trueerror =
|
|
mDTW.fastdynamic(V, candidate); //,mConstraint,1);
|
|
if (trueerror < bestsofar) bestsofar = trueerror;
|
|
}
|
|
}
|
|
return bestsofar;
|
|
}
|
|
|
|
int getNumberOfDTW() { return full_dtw; }
|
|
|
|
int getNumberOfCandidates() { return lb_keogh; }
|
|
|
|
double getLowestCost() { return bestsofar; }
|
|
|
|
private:
|
|
int lb_keogh;
|
|
int full_dtw;
|
|
const vector<double> V;
|
|
vector<double> buffer;
|
|
int mConstraint;
|
|
double bestsofar;
|
|
vector<double> U;
|
|
vector<double> L;
|
|
vector<double> U2;
|
|
vector<double> L2;
|
|
};
|
|
|
|
void piecewiseSumReduction(const vector<floattype> &array,
|
|
vector<floattype> &out);
|
|
|
|
/**
|
|
* for debugging purposes
|
|
*/
|
|
class DimReducedLB_Keogh : public NearestNeighbor {
|
|
public:
|
|
DimReducedLB_Keogh(const vector<double> &v, int constraint, int reduced)
|
|
: NearestNeighbor(v, constraint),
|
|
lb_keogh(0),
|
|
full_dtw(0),
|
|
V(v),
|
|
mConstraint(constraint),
|
|
bestsofar(dtw::INF),
|
|
U(V.size(), 0),
|
|
L(V.size(), 0),
|
|
Ured(reduced, 0),
|
|
Lred(reduced) {
|
|
assert(mConstraint >= 0);
|
|
assert(mConstraint < static_cast<int>(V.size()));
|
|
computeEnvelope(V, mConstraint, U, L);
|
|
piecewiseSumReduction(U, Ured);
|
|
piecewiseSumReduction(L, Lred);
|
|
}
|
|
|
|
double test(const vector<double> &candidate) {
|
|
vector<double> reducedCandidate(Ured.size());
|
|
piecewiseSumReduction(candidate, reducedCandidate);
|
|
double smallerror(0.0);
|
|
for (uint i = 0; i < Ured.size(); ++i) {
|
|
if (reducedCandidate[i] > Ured[i])
|
|
smallerror += reducedCandidate[i] - Ured[i];
|
|
else if (reducedCandidate[i] < Lred[i])
|
|
smallerror += Lred[i] - reducedCandidate[i];
|
|
}
|
|
if (smallerror > bestsofar) return bestsofar;
|
|
++lb_keogh;
|
|
double error(0.0);
|
|
for (uint i = 0; i < V.size(); ++i) {
|
|
if (candidate[i] > U[i])
|
|
error += candidate[i] - U[i];
|
|
else if (candidate[i] < L[i])
|
|
error += L[i] - candidate[i];
|
|
}
|
|
if (error < bestsofar) {
|
|
++full_dtw;
|
|
const double trueerror =
|
|
mDTW.fastdynamic(V, candidate); //,mConstraint,1);
|
|
if (trueerror < bestsofar) bestsofar = trueerror;
|
|
}
|
|
return bestsofar;
|
|
}
|
|
|
|
int getNumberOfDTW() { return full_dtw; }
|
|
|
|
int getNumberOfCandidates() { return lb_keogh; }
|
|
|
|
double getLowestCost() { return bestsofar; }
|
|
|
|
private:
|
|
int lb_keogh;
|
|
int full_dtw;
|
|
const vector<double> V;
|
|
int mConstraint;
|
|
double bestsofar;
|
|
vector<double> U, L;
|
|
vector<double> Ured, Lred;
|
|
};
|
|
|
|
/**
|
|
* this class is not used?
|
|
*/
|
|
class Envelope {
|
|
public:
|
|
Envelope() : maxfifo(), minfifo() {}
|
|
virtual ~Envelope() {}
|
|
void compute(const vector<floattype> &array, uint constraint,
|
|
vector<floattype> &maxvalues, vector<floattype> &minvalues) {
|
|
const uint width = 1 + 2 * constraint;
|
|
maxfifo.clear();
|
|
minfifo.clear();
|
|
for (uint i = 1; i < array.size(); ++i) {
|
|
if (i >= constraint + 1) {
|
|
maxvalues[i - constraint - 1] =
|
|
array[maxfifo.size() > 0 ? maxfifo.front() : i - 1];
|
|
minvalues[i - constraint - 1] =
|
|
array[minfifo.size() > 0 ? minfifo.front() : i - 1];
|
|
}
|
|
if (array[i] > array[i - 1]) { // overshoot
|
|
minfifo.push_back(i - 1);
|
|
if (i == width + minfifo.front()) minfifo.pop_front();
|
|
while (maxfifo.size() > 0) {
|
|
if (array[i] <= array[maxfifo.back()]) {
|
|
if (i == width + maxfifo.front()) maxfifo.pop_front();
|
|
break;
|
|
}
|
|
maxfifo.pop_back();
|
|
}
|
|
} else {
|
|
maxfifo.push_back(i - 1);
|
|
if (i == width + maxfifo.front()) maxfifo.pop_front();
|
|
while (minfifo.size() > 0) {
|
|
if (array[i] >= array[minfifo.back()]) {
|
|
if (i == width + minfifo.front()) minfifo.pop_front();
|
|
break;
|
|
}
|
|
minfifo.pop_back();
|
|
}
|
|
}
|
|
}
|
|
for (uint i = array.size(); i <= array.size() + constraint; ++i) {
|
|
if (maxfifo.size() > 0) {
|
|
maxvalues[i - constraint - 1] = array[maxfifo.front()];
|
|
if (i - maxfifo.front() >= width) {
|
|
maxfifo.pop_front();
|
|
}
|
|
} else {
|
|
maxvalues[i - constraint - 1] = array[array.size() - 1];
|
|
}
|
|
if (minfifo.size() > 0) {
|
|
minvalues[i - constraint - 1] = array[minfifo.front()];
|
|
if (i - minfifo.front() >= width) {
|
|
minfifo.pop_front();
|
|
}
|
|
} else {
|
|
minvalues[i - constraint - 1] = array[array.size() - 1];
|
|
}
|
|
}
|
|
}
|
|
deque<int> maxfifo, minfifo;
|
|
};
|
|
|
|
// only used for unit testing
|
|
double l1diff(const vector<double> &v, const vector<double> &w);
|
|
#endif
|