PMF.cpp
Go to the documentation of this file.
00001 /************************************************************************
00002  *  Copyright (C) 2012 Eindhoven University of Technology (TU/e).       *
00003  *  All rights reserved.                                                *
00004  ************************************************************************
00005  *  Redistribution and use in source and binary forms, with or without  *
00006  *  modification, are permitted provided that the following conditions  *
00007  *  are met:                                                            *
00008  *                                                                      *
00009  *      1.  Redistributions of source code must retain the above        *
00010  *          copyright notice, this list of conditions and the following *
00011  *          disclaimer.                                                 *
00012  *                                                                      *
00013  *      2.  Redistributions in binary form must reproduce the above     *
00014  *          copyright notice, this list of conditions and the following *
00015  *          disclaimer in the documentation and/or other materials      *
00016  *          provided with the distribution.                             *
00017  *                                                                      *
00018  *  THIS SOFTWARE IS PROVIDED BY TU/e "AS IS" AND ANY EXPRESS OR        *
00019  *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED      *
00020  *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE  *
00021  *  ARE DISCLAIMED. IN NO EVENT SHALL TU/e OR CONTRIBUTORS BE LIABLE    *
00022  *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR        *
00023  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT   *
00024  *  OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;     *
00025  *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF       *
00026  *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT           *
00027  *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE   *
00028  *  USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH    *
00029  *  DAMAGE.                                                             *
00030  *                                                                      *
00031  *  The views and conclusions contained in the software and             *
00032  *  documentation are those of the authors and should not be            *
00033  *  interpreted as representing official policies, either expressed or  *
00034  *  implied, of TU/e.                                                   *
00035  ************************************************************************/
00036 
00037 #include "problib/pdfs/PMF.h"
00038 
00039 using namespace pbl;
00040 
00041 PMF::PMF(int domain_size) : PDF(1, PDF::DISCRETE), ptr_(new PMFStruct(domain_size)) {
00042 }
00043 
00044 PMF::PMF(const PMF& pmf) : PDF(1, PDF::DISCRETE), ptr_(pmf.ptr_) {
00045     if (ptr_) {
00046         ++ptr_->n_ptrs_;
00047     }
00048 }
00049 
00050 PMF::~PMF() {
00051         --ptr_->n_ptrs_;
00052 
00053         if (ptr_->n_ptrs_ == 0) {
00054                 delete ptr_;
00055         }
00056 }
00057 
00058 PMF& PMF::operator=(const PMF& other)  {
00059     if (this != &other)  {
00060         if (ptr_) {
00061                         --ptr_->n_ptrs_;
00062                         if (ptr_->n_ptrs_ == 0) {
00063                                 delete ptr_;
00064                         }
00065         }
00066 
00067         ptr_ = other.ptr_;
00068         ++ptr_->n_ptrs_;
00069 
00070         dimensions_ = other.dimensions_;
00071     }
00072     return *this;
00073 }
00074 
00075 PMF* PMF::clone() const {
00076         return new PMF(*this);
00077 }
00078 
00079 void PMF::cloneStruct() {
00080         if (ptr_->n_ptrs_ > 1) {
00081                 --ptr_->n_ptrs_;
00082                 ptr_ = new PMFStruct(*ptr_);
00083         }
00084 }
00085 
00086 double PMF::getProbability(const std::string& value) const {
00087         return getProbability(value, ptr_->domain_size_);
00088 }
00089 
00090 double PMF::getProbability(const std::string& value, int domain_size) const {
00091         std::map<std::string, double>::const_iterator it = ptr_->pmf_.find(value);
00092         if (it != ptr_->pmf_.end()) {
00093                 return (*it).second;
00094         }
00095         // if now probability is known for this value, calculate its probability
00096         // based on a uniform distribution over all unknown values.
00097         return getProbabilityUnknown(domain_size);
00098 }
00099 
00100 void PMF::setProbability(const std::string& value, double p) {
00101         cloneStruct();
00102 
00103         std::map<std::string, double>::iterator it = ptr_->pmf_.find(value);
00104         if (it != ptr_->pmf_.end()) {
00105                 ptr_->total_prob_ -= (*it).second;
00106                 (*it).second = p;
00107         } else {
00108                 ptr_->pmf_[value] = p;
00109         }
00110         ptr_->total_prob_ += p;
00111 }
00112 
00113 void PMF::setExact(const std::string& value) {
00114         if (ptr_->n_ptrs_ > 1) {
00115                 --ptr_->n_ptrs_;
00116                 ptr_ = new PMFStruct(ptr_->domain_size_);
00117         } else {
00118                 ptr_->pmf_.clear();
00119         }
00120         ptr_->pmf_[value] = 1.0;
00121         ptr_->total_prob_ = 1.0;
00122 }
00123 
00124 void PMF::getValues(std::vector<std::string>& values) const {
00125         for(std::map<std::string, double>::const_iterator it = ptr_->pmf_.begin(); it != ptr_->pmf_.end(); ++it) {
00126                 values.push_back((*it).first);
00127         }
00128 }
00129 
00130 void PMF::getProbabilities(std::vector<double>& probabilities) const {
00131         for(std::map<std::string, double>::const_iterator it = ptr_->pmf_.begin(); it != ptr_->pmf_.end(); ++it) {
00132                 probabilities.push_back((*it).second);
00133         }
00134 }
00135 
00136 bool PMF::getExpectedValue(std::string& v) const {
00137         double p_max = 0;
00138         for(std::map<std::string, double>::const_iterator it = ptr_->pmf_.begin(); it != ptr_->pmf_.end(); ++it) {
00139                 if ((*it).second > p_max) {
00140                         v = (*it).first;
00141                         p_max = (*it).second;
00142                 }
00143         }
00144 
00145         if (ptr_->domain_size_ > 0 && p_max < getProbabilityUnknown(ptr_->domain_size_)) {
00146         v = "";
00147                 return false;
00148         }
00149 
00150         return true;
00151 }
00152 
00153 double PMF::getLikelihood(const PDF& pdf) const {
00154         assert_msg(pdf.type() == PDF::DISCRETE, "PMF: Likelihood can only be calculated with another PMF.");
00155 
00156         const PMF* pmf = static_cast<const pbl::PMF*>(&pdf);
00157         return getLikelihood(*pmf);
00158 }
00159 
00160 double PMF::getLikelihood(const PMF& other) const {
00161         int my_domain_size = ptr_->domain_size_;
00162         int other_domain_size = other.ptr_->domain_size_;
00163 
00164         assert(my_domain_size == -1 || other_domain_size == -1 || my_domain_size == other_domain_size);
00165         int domain_size = std::max(my_domain_size, other_domain_size);
00166 
00167         // determine which pmf has the most determined values, and which one less
00168         const PMF* small_pmf = this;
00169         const PMF* big_pmf = &other;
00170         if (this->ptr_->pmf_.size() > other.ptr_->pmf_.size()) {
00171                 small_pmf = &other;
00172                 big_pmf = this;
00173         }
00174 
00175         // determine the likelihood based on all determined values in small_pmf
00176         double likelihood = 0;
00177         double big_p_total_match = 0;  // keeps track of the total probability of all values in big_pmf
00178                                                                    // that are matched to small_pmf
00179         for(std::map<std::string, double>::const_iterator it = small_pmf->ptr_->pmf_.begin(); it != small_pmf->ptr_->pmf_.end(); ++it) {
00180                 double big_p_v = big_pmf->getProbability((*it).first, domain_size);
00181                 likelihood += big_p_v * (*it).second;
00182                 big_p_total_match += big_p_v;
00183         }
00184 
00185         // if the determined values in small_pdf do NOT add up to probability 1, AND
00186         // we have not yet matched all values in big_pmf with a total probability of 1, it means
00187         // we still need to match the undetermined values in small_pmf and big_pmf. ASSUME a
00188         // UNIFORM DISTRIBUTION over the undetermined values in both small_pmf and big_pmf
00189         if (small_pmf->ptr_->total_prob_ < 1 && big_p_total_match < 1) {
00190                 // determine the number of unmatched values. Since we used small_pmf as a basis for
00191                 // matching above (we matched all its determined values), the number of unmatched values
00192                 // equals the domain size MINUS the number of determined values
00193                 int num_unmatched_values = (domain_size - small_pmf->ptr_->pmf_.size());
00194                 assert(num_unmatched_values > 0);
00195 
00196                 // determine the average probability of an unknown value in big_pmf, assuming
00197                 // a uniform distribution over all unknown values.
00198                 double big_p_unknown   = (1 - big_p_total_match)      / num_unmatched_values;
00199                 // determine the probability of an unknown value in small_pmf, assuming
00200                 // a uniform distribution over all unknown values
00201                 double small_p_unknown = (1 - small_pmf->ptr_->total_prob_) / num_unmatched_values;
00202 
00203                 // add to the likelihood the SUM of big_p_unknown * small_p_unknown over all
00204                 // unmatches values, which equals num_unmatched_values * big_p_unknown * small_p_unknown
00205                 likelihood += num_unmatched_values * big_p_unknown * small_p_unknown;
00206         }
00207 
00208         return likelihood;
00209 }
00210 
00214 void PMF::update(const pbl::PMF& other) {
00215     assert(this->ptr_->domain_size_ == -1 || other.ptr_->domain_size_ == -1
00216                 || this->ptr_->domain_size_ == other.ptr_->domain_size_);
00217 
00218     cloneStruct();
00219 
00220     this->ptr_->domain_size_ = std::max(this->ptr_->domain_size_, other.ptr_->domain_size_);
00221 
00222         // calculate likelihood
00223         double likelihood = getLikelihood(other);
00224 
00225         // calculate the current probability of unknown values
00226         double p_unknown = getProbabilityUnknown(this->ptr_->domain_size_);
00227 
00228         std::set<std::string> updated_values;
00229 
00230         ptr_->total_prob_ = 0;
00231         for(std::map<std::string, double>::iterator it = ptr_->pmf_.begin(); it != ptr_->pmf_.end(); ++it) {
00232                 double new_prob = (*it).second * other.getProbability((*it).first, this->ptr_->domain_size_) / likelihood;
00233                 (*it).second = new_prob;
00234                 ptr_->total_prob_ += new_prob;
00235                 updated_values.insert((*it).first);
00236         }
00237 
00238         for(std::map<std::string, double>::const_iterator it = other.ptr_->pmf_.begin(); it != other.ptr_->pmf_.end(); ++it) {
00239                 if (updated_values.find((*it).first) == updated_values.end()) {
00240                         double new_prob = p_unknown * (*it).second / likelihood;
00241                         ptr_->pmf_[(*it).first] = new_prob;
00242                         ptr_->total_prob_ += new_prob;
00243                 }
00244         }
00245 }
00246 
00247 void PMF::setDomainSize(int domain_size) {
00248         cloneStruct();
00249         ptr_->domain_size_ = domain_size;
00250 }
00251 
00252 int PMF::getDomainSize() const {
00253         return ptr_->domain_size_;
00254 }
00255 
00256 double PMF::getProbabilityUnknown() const {
00257         return getProbabilityUnknown(ptr_->domain_size_);
00258 }
00259 
00260 double PMF::getProbabilityUnknown(int domain_size) const {
00261         if (ptr_->total_prob_ == 1) return 0;
00262         assert(domain_size > 0);
00263         if (domain_size == (int)ptr_->pmf_.size()) return 0;
00264         return (1 - ptr_->total_prob_) / (domain_size - ptr_->pmf_.size());
00265 }
00266 
00267 void PMF::normalize() {
00268         if (ptr_->total_prob_ == 1) return;
00269 
00270         assert(ptr_->total_prob_ > 0);
00271 
00272         cloneStruct();
00273 
00274         for(std::map<std::string, double>::iterator it = ptr_->pmf_.begin(); it != ptr_->pmf_.end(); ++it) {
00275                 (*it).second /= ptr_->total_prob_;
00276         }
00277 
00278         ptr_->domain_size_ = ptr_->pmf_.size();
00279 
00280         ptr_->total_prob_ = 1;
00281 }
00282 
00283 double PMF::getDensity(const arma::vec& v) const {
00284         assert_msg(false, "Cannot get density of a PMF");
00285         return 0;
00286 }
00287 
00288 double PMF::getMaxDensity() const {
00289         assert_msg(false, "Cannot get max density of a PMF");
00290         return 0;
00291 }
00292 
00293 std::string PMF::toString(const std::string& indent) const {
00294         std::stringstream ss;
00295         ss << indent << "PMF(" << ptr_->domain_size_ << ")[";
00296 
00297         std::map<std::string, double>::const_iterator it = ptr_->pmf_.begin();
00298         if (it != ptr_->pmf_.end()) {
00299                 ss << " " << (*it).first << " : " << (*it).second;
00300                 ++it;
00301                 for(; it != ptr_->pmf_.end(); ++it) {
00302                         ss << ", " << (*it).first << " : " << (*it).second;
00303                 }
00304         }
00305         ss << " ]";
00306         return ss.str();
00307 }
00308 
00309 /* * * * * * * * OBSOLETE * * * * * * * * */
00310 
00311 std::string PMF::getMostProbableValue() const {
00312         std::string v;
00313         getExpectedValue(v);
00314         return v;
00315 }


problib
Author(s): Sjoerd van den Dries
autogenerated on Tue Jan 7 2014 11:42:42