eis/eqpalg/.do_not_use/utility-no-use/dtw/dtw.h

583 lines
16 KiB
C
Raw Normal View History

/**
* (c) Daniel Lemire, 2008
*This C++ library implements fast nearest-neighbor retrieval under the dynamic
*time warping (DTW). This library includes the dynamic programming solution, a
*fast implementation of the LB_Keogh lower bound, as well as a improved lower
*bound called LB_Improved. This library was used to show that LB_Improved can
*be used to retrieve nearest neighbors three times on several data sets
* including random walks time series or shape time series.
*
* See the classes LB_Keogh and LB_Improved for usage.
*
* Time series are represented using STL vectors.
*/
#ifndef DTW
#define DTW
#include <algorithm>
#include <cassert>
#include <cmath>
#include <deque>
#include <iostream>
#include <sstream>
#include <vector>
typedef double floattype;
typedef unsigned int uint;
using namespace std;
class MathUtil {
public:
static inline int min(int x, int y) { return x < y ? x : y; }
static inline int max(int x, int y) { return x > y ? x : y; }
};
/**
* You do not need this function, used by LB_Keogh and LB_Improved later.
*/
void computeEnvelope(const vector<floattype> &array, uint constraint,
vector<floattype> &maxvalues,
vector<floattype> &minvalues);
/**
* You do not need this class, used by LB_Keogh and LB_Improved later.
* It computes the DTW using dynamic programmming.
*/
class dtw {
public:
enum { verbose = false, vverbose = false, INF = 100000000, fast = true };
static inline double max(double x, double y) { return x > y ? x : y; }
static inline double min(double x, double y) { return x < y ? x : y; }
dtw(uint n, uint constraint);
/*
* hardcoded l1 norm (for speed)
*/
double fastdynamic(const vector<double> &v, const vector<double> &w);
vector<vector<double>> mGamma;
int mN, mConstraint;
static double dynamic(const vector<double> &v, const vector<double> &w,
int constraint = INF, int p = 2);
};
class NearestNeighbor {
public:
NearestNeighbor(const vector<double> &v, int constraint);
virtual double test(const vector<double> &candidate) { return 0; } //= 0;
virtual double getLowestCost() { return 0; }
virtual ~NearestNeighbor(){};
virtual int getNumberOfDTW() { return 0; }
virtual int getNumberOfCandidates() { return 0; }
dtw mDTW;
};
class NaiveNearestNeighbor : public NearestNeighbor {
public:
NaiveNearestNeighbor(const vector<double> &v, int constraint);
double test(const vector<double> &candidate);
void resetStatistics();
double getLowestCost();
int getNumberOfDTW();
int getNumberOfCandidates();
private:
int lb_keogh;
int full_dtw;
const vector<double> V;
double bestsofar;
};
/**
* Usage: create the object using the time series you want to match again the
* database and some DTW time constraint parameter. Then repeatedly apply
* the test function on the various candidates. The function returns the
* matching cost with the best candidate so far. By keeping track of when
* the cost was lowered, you can find the nearest neighbor in a database.
*
* Only the l1 norm is used.
*/
class LB_Keogh : public NearestNeighbor {
public:
LB_Keogh(const vector<double> &v, int constraint)
: NearestNeighbor(v, constraint),
lb_keogh(0),
full_dtw(0),
V(v),
mConstraint(constraint),
bestsofar(dtw::INF),
U(V.size(), 0),
L(V.size(), 0) {
assert(mConstraint >= 0);
assert(mConstraint < static_cast<int>(V.size()));
computeEnvelope(V, mConstraint, U, L);
}
double justlb(const vector<double> &candidate) {
double error(0.0);
for (uint i = 0; i < V.size(); ++i) {
if (candidate[i] > U[i])
error += candidate[i] - U[i];
else if (candidate[i] < L[i])
error += L[i] - candidate[i];
}
return error;
}
double test(const vector<double> &candidate) {
++lb_keogh;
double error(0.0);
for (uint i = 0; i < V.size(); ++i) {
if (candidate[i] > U[i])
error += candidate[i] - U[i];
else if (candidate[i] < L[i])
error += L[i] - candidate[i];
}
if (error < bestsofar) {
++full_dtw;
const double trueerror = mDTW.fastdynamic(V, candidate);
if (trueerror < bestsofar) bestsofar = trueerror;
}
return bestsofar;
}
int getNumberOfDTW() { return full_dtw; }
int getNumberOfCandidates() { return lb_keogh; }
double getLowestCost() { return bestsofar; }
void resetStatistics() {
lb_keogh = 0;
full_dtw = 0;
}
private:
int lb_keogh;
int full_dtw;
const vector<double> V;
int mConstraint;
double bestsofar;
vector<double> U;
vector<double> L;
};
class LB_KeoghEarly : public NearestNeighbor {
public:
LB_KeoghEarly(const vector<double> &v, int constraint)
: NearestNeighbor(v, constraint),
lb_keogh(0),
full_dtw(0),
V(v),
mConstraint(constraint),
bestsofar(dtw::INF),
U(V.size(), 0),
L(V.size(), 0) {
assert(mConstraint >= 0);
assert(mConstraint < static_cast<int>(V.size()));
computeEnvelope(V, mConstraint, U, L);
}
double test(const vector<double> &candidate) {
++lb_keogh;
double error(0.0);
for (uint i = 0; i < V.size(); ++i) {
if (candidate[i] > U[i])
error += candidate[i] - U[i];
else if (candidate[i] < L[i])
error += L[i] - candidate[i];
if (error > bestsofar) return bestsofar;
}
++full_dtw;
const double trueerror = mDTW.fastdynamic(V, candidate); //,mConstraint,1);
if (trueerror < bestsofar) bestsofar = trueerror;
return bestsofar;
}
int getNumberOfDTW() { return full_dtw; }
int getNumberOfCandidates() { return lb_keogh; }
double getLowestCost() { return bestsofar; }
void resetStatistics() {
lb_keogh = 0;
full_dtw = 0;
}
private:
int lb_keogh;
int full_dtw;
const vector<double> V;
int mConstraint;
double bestsofar;
vector<double> U;
vector<double> L;
};
/**
* Usage (same as LB_Keogh): create the object using the time series you want to
* match again the database and some DTW time constraint parameter. Then
* repeatedly apply the test function on the various candidates. The function
* returns the matching cost with the best candidate so far. By keeping track of
* when the cost was lowered, you can find the nearest neighbor in a database.
*
* Only the l1 norm is used.
*/
class LB_Improved : public NearestNeighbor {
public:
LB_Improved(const vector<double> &v, int constraint)
: NearestNeighbor(v, constraint),
lb_keogh(0),
full_dtw(0),
V(v),
buffer(v),
mConstraint(constraint),
bestsofar(dtw::INF),
U(V.size(), 0),
L(V.size(), 0),
U2(V.size(), 0),
L2(V.size(), 0) {
assert(mConstraint >= 0);
assert(mConstraint < static_cast<int>(V.size()));
computeEnvelope(V, mConstraint, U, L);
}
void resetStatistics() {
lb_keogh = 0;
full_dtw = 0;
}
double justlb(const vector<double> &candidate) {
double error(0.0);
buffer = candidate;
for (uint i = 0; i < V.size(); ++i) {
if (candidate[i] > U[i])
error += candidate[i] - U[i];
else if (candidate[i] < L[i])
error += L[i] - candidate[i];
}
computeEnvelope(buffer, mConstraint, U2, L2);
for (uint i = 0; i < V.size(); ++i) {
if (V[i] > U2[i]) {
error += V[i] - U2[i];
} else if (V[i] < L2[i]) {
error += L2[i] - V[i];
}
}
return error;
}
double test(const vector<double> &candidate) {
++lb_keogh;
double error(0.0);
for (uint i = 0; i < V.size(); ++i) {
const double &cdi(candidate[i]);
if (cdi > U[i]) {
error += cdi - (buffer[i] = U[i]);
} else if (cdi < L[i]) {
error += (buffer[i] = L[i]) - cdi;
} else
buffer[i] = cdi;
}
if (error < bestsofar) {
computeEnvelope(buffer, mConstraint, U2, L2);
for (uint i = 0; i < V.size(); ++i) {
if (V[i] > U2[i]) {
error += V[i] - U2[i];
} else if (V[i] < L2[i]) {
error += L2[i] - V[i];
}
}
if (error < bestsofar) {
++full_dtw;
const double trueerror =
mDTW.fastdynamic(V, candidate); //,mConstraint,1);
if (trueerror < bestsofar) bestsofar = trueerror;
}
}
return bestsofar;
}
/**
* for plotting purposes
*/
string dumpTextDescriptor(const vector<double> &candidate) {
buffer = candidate;
++lb_keogh;
double error(0.0);
for (uint i = 0; i < V.size(); ++i) {
if (candidate[i] > U[i]) {
error += candidate[i] - U[i];
buffer[i] = U[i];
} else if (candidate[i] < L[i]) {
error += L[i] - candidate[i];
buffer[i] = L[i];
}
assert((buffer[i] == L[i]) or (buffer[i] == U[i]) or
(buffer[i] == candidate[i]));
}
vector<double> lbimprovedarray;
computeEnvelope(buffer, mConstraint, U2, L2);
for (uint i = 0; i < V.size(); ++i) {
if (V[i] > U2[i]) {
error += V[i] - U2[i];
lbimprovedarray.push_back(U2[i]);
} else if (V[i] < L2[i]) {
error += L2[i] - V[i];
lbimprovedarray.push_back(L2[i]);
} else
lbimprovedarray.push_back(V[i]);
}
stringstream ss;
for (uint k = 0; k < V.size(); ++k) {
assert((lbimprovedarray[k] == L2[k]) or (lbimprovedarray[k] == U2[k]) or
(lbimprovedarray[k] == V[k]));
assert((buffer[k] == L[k]) or (buffer[k] == U[k]) or
(buffer[k] == candidate[k]));
ss << k << " " << V[k] << " " << candidate[k] << " " << buffer[k] << " "
<< lbimprovedarray[k] << " " << L[k] << " " << U[k] << " " << L2[k]
<< " " << U2[k] << endl;
}
return ss.str();
}
int getNumberOfDTW() { return full_dtw; }
int getNumberOfCandidates() { return lb_keogh; }
double getLowestCost() { return bestsofar; }
private:
int lb_keogh;
int full_dtw;
const vector<double> V;
vector<double> buffer;
int mConstraint;
double bestsofar;
vector<double> U;
vector<double> L;
vector<double> U2;
vector<double> L2;
};
class LB_ImprovedEarly : public NearestNeighbor {
public:
LB_ImprovedEarly(const vector<double> &v, int constraint)
: NearestNeighbor(v, constraint),
lb_keogh(0),
full_dtw(0),
V(v),
buffer(v),
mConstraint(constraint),
bestsofar(dtw::INF),
U(V.size(), 0),
L(V.size(), 0),
U2(V.size(), 0),
L2(V.size(), 0) {
assert(mConstraint >= 0);
assert(mConstraint < static_cast<int>(V.size()));
computeEnvelope(V, mConstraint, U, L);
}
void resetStatistics() {
lb_keogh = 0;
full_dtw = 0;
}
double test(const vector<double> &candidate) {
++lb_keogh;
double error(0.0);
for (uint i = 0; i < V.size(); ++i) {
const double &cdi(candidate[i]);
if (cdi > U[i]) {
error += cdi - (buffer[i] = U[i]);
} else if (cdi < L[i]) {
error += (buffer[i] = L[i]) - cdi;
} else
buffer[i] = cdi;
if (error > bestsofar) return bestsofar;
}
if (error < bestsofar) {
computeEnvelope(buffer, mConstraint, U2, L2);
for (uint i = 0; i < V.size(); ++i) {
if (V[i] > U2[i]) {
error += V[i] - U2[i];
} else if (V[i] < L2[i]) {
error += L2[i] - V[i];
}
if (error > bestsofar) return bestsofar;
}
if (error < bestsofar) {
++full_dtw;
const double trueerror =
mDTW.fastdynamic(V, candidate); //,mConstraint,1);
if (trueerror < bestsofar) bestsofar = trueerror;
}
}
return bestsofar;
}
int getNumberOfDTW() { return full_dtw; }
int getNumberOfCandidates() { return lb_keogh; }
double getLowestCost() { return bestsofar; }
private:
int lb_keogh;
int full_dtw;
const vector<double> V;
vector<double> buffer;
int mConstraint;
double bestsofar;
vector<double> U;
vector<double> L;
vector<double> U2;
vector<double> L2;
};
void piecewiseSumReduction(const vector<floattype> &array,
vector<floattype> &out);
/**
* for debugging purposes
*/
class DimReducedLB_Keogh : public NearestNeighbor {
public:
DimReducedLB_Keogh(const vector<double> &v, int constraint, int reduced)
: NearestNeighbor(v, constraint),
lb_keogh(0),
full_dtw(0),
V(v),
mConstraint(constraint),
bestsofar(dtw::INF),
U(V.size(), 0),
L(V.size(), 0),
Ured(reduced, 0),
Lred(reduced) {
assert(mConstraint >= 0);
assert(mConstraint < static_cast<int>(V.size()));
computeEnvelope(V, mConstraint, U, L);
piecewiseSumReduction(U, Ured);
piecewiseSumReduction(L, Lred);
}
double test(const vector<double> &candidate) {
vector<double> reducedCandidate(Ured.size());
piecewiseSumReduction(candidate, reducedCandidate);
double smallerror(0.0);
for (uint i = 0; i < Ured.size(); ++i) {
if (reducedCandidate[i] > Ured[i])
smallerror += reducedCandidate[i] - Ured[i];
else if (reducedCandidate[i] < Lred[i])
smallerror += Lred[i] - reducedCandidate[i];
}
if (smallerror > bestsofar) return bestsofar;
++lb_keogh;
double error(0.0);
for (uint i = 0; i < V.size(); ++i) {
if (candidate[i] > U[i])
error += candidate[i] - U[i];
else if (candidate[i] < L[i])
error += L[i] - candidate[i];
}
if (error < bestsofar) {
++full_dtw;
const double trueerror =
mDTW.fastdynamic(V, candidate); //,mConstraint,1);
if (trueerror < bestsofar) bestsofar = trueerror;
}
return bestsofar;
}
int getNumberOfDTW() { return full_dtw; }
int getNumberOfCandidates() { return lb_keogh; }
double getLowestCost() { return bestsofar; }
private:
int lb_keogh;
int full_dtw;
const vector<double> V;
int mConstraint;
double bestsofar;
vector<double> U, L;
vector<double> Ured, Lred;
};
/**
* this class is not used?
*/
class Envelope {
public:
Envelope() : maxfifo(), minfifo() {}
virtual ~Envelope() {}
void compute(const vector<floattype> &array, uint constraint,
vector<floattype> &maxvalues, vector<floattype> &minvalues) {
const uint width = 1 + 2 * constraint;
maxfifo.clear();
minfifo.clear();
for (uint i = 1; i < array.size(); ++i) {
if (i >= constraint + 1) {
maxvalues[i - constraint - 1] =
array[maxfifo.size() > 0 ? maxfifo.front() : i - 1];
minvalues[i - constraint - 1] =
array[minfifo.size() > 0 ? minfifo.front() : i - 1];
}
if (array[i] > array[i - 1]) { // overshoot
minfifo.push_back(i - 1);
if (i == width + minfifo.front()) minfifo.pop_front();
while (maxfifo.size() > 0) {
if (array[i] <= array[maxfifo.back()]) {
if (i == width + maxfifo.front()) maxfifo.pop_front();
break;
}
maxfifo.pop_back();
}
} else {
maxfifo.push_back(i - 1);
if (i == width + maxfifo.front()) maxfifo.pop_front();
while (minfifo.size() > 0) {
if (array[i] >= array[minfifo.back()]) {
if (i == width + minfifo.front()) minfifo.pop_front();
break;
}
minfifo.pop_back();
}
}
}
for (uint i = array.size(); i <= array.size() + constraint; ++i) {
if (maxfifo.size() > 0) {
maxvalues[i - constraint - 1] = array[maxfifo.front()];
if (i - maxfifo.front() >= width) {
maxfifo.pop_front();
}
} else {
maxvalues[i - constraint - 1] = array[array.size() - 1];
}
if (minfifo.size() > 0) {
minvalues[i - constraint - 1] = array[minfifo.front()];
if (i - minfifo.front() >= width) {
minfifo.pop_front();
}
} else {
minvalues[i - constraint - 1] = array[array.size() - 1];
}
}
}
deque<int> maxfifo, minfifo;
};
// only used for unit testing
double l1diff(const vector<double> &v, const vector<double> &w);
#endif