$search
00001 /************************************************************************ 00002 * Copyright (C) 2012 Eindhoven University of Technology (TU/e). * 00003 * All rights reserved. * 00004 ************************************************************************ 00005 * Redistribution and use in source and binary forms, with or without * 00006 * modification, are permitted provided that the following conditions * 00007 * are met: * 00008 * * 00009 * 1. Redistributions of source code must retain the above * 00010 * copyright notice, this list of conditions and the following * 00011 * disclaimer. * 00012 * * 00013 * 2. Redistributions in binary form must reproduce the above * 00014 * copyright notice, this list of conditions and the following * 00015 * disclaimer in the documentation and/or other materials * 00016 * provided with the distribution. * 00017 * * 00018 * THIS SOFTWARE IS PROVIDED BY TU/e "AS IS" AND ANY EXPRESS OR * 00019 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * 00020 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * 00021 * ARE DISCLAIMED. IN NO EVENT SHALL TU/e OR CONTRIBUTORS BE LIABLE * 00022 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * 00023 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * 00024 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * 00025 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * 00026 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * 00027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * 00028 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * 00029 * DAMAGE. * 00030 * * 00031 * The views and conclusions contained in the software and * 00032 * documentation are those of the authors and should not be * 00033 * interpreted as representing official policies, either expressed or * 00034 * implied, of TU/e. * 00035 ************************************************************************/ 00036 00037 #include "problib/pdfs/PMF.h" 00038 00039 using namespace pbl; 00040 00041 PMF::PMF(int domain_size) : PDF(1, PDF::DISCRETE), ptr_(new PMFStruct(domain_size)) { 00042 } 00043 00044 PMF::PMF(const PMF& pmf) : PDF(1, PDF::DISCRETE), ptr_(pmf.ptr_) { 00045 if (ptr_) { 00046 ++ptr_->n_ptrs_; 00047 } 00048 } 00049 00050 PMF::~PMF() { 00051 --ptr_->n_ptrs_; 00052 00053 if (ptr_->n_ptrs_ == 0) { 00054 delete ptr_; 00055 } 00056 } 00057 00058 PMF& PMF::operator=(const PMF& other) { 00059 if (this != &other) { 00060 if (ptr_) { 00061 --ptr_->n_ptrs_; 00062 if (ptr_->n_ptrs_ == 0) { 00063 delete ptr_; 00064 } 00065 } 00066 00067 ptr_ = other.ptr_; 00068 ++ptr_->n_ptrs_; 00069 00070 dimensions_ = other.dimensions_; 00071 } 00072 return *this; 00073 } 00074 00075 PMF* PMF::clone() const { 00076 return new PMF(*this); 00077 } 00078 00079 void PMF::cloneStruct() { 00080 if (ptr_->n_ptrs_ > 1) { 00081 --ptr_->n_ptrs_; 00082 ptr_ = new PMFStruct(*ptr_); 00083 } 00084 } 00085 00086 double PMF::getProbability(const std::string& value) const { 00087 return getProbability(value, ptr_->domain_size_); 00088 } 00089 00090 double PMF::getProbability(const std::string& value, int domain_size) const { 00091 std::map<std::string, double>::const_iterator it = ptr_->pmf_.find(value); 00092 if (it != ptr_->pmf_.end()) { 00093 return (*it).second; 00094 } 00095 // if now probability is known for this value, calculate its probability 00096 // based on a uniform distribution over all unknown values. 00097 return getProbabilityUnknown(domain_size); 00098 } 00099 00100 void PMF::setProbability(const std::string& value, double p) { 00101 cloneStruct(); 00102 00103 std::map<std::string, double>::iterator it = ptr_->pmf_.find(value); 00104 if (it != ptr_->pmf_.end()) { 00105 ptr_->total_prob_ -= (*it).second; 00106 (*it).second = p; 00107 } else { 00108 ptr_->pmf_[value] = p; 00109 } 00110 ptr_->total_prob_ += p; 00111 } 00112 00113 void PMF::setExact(const std::string& value) { 00114 if (ptr_->n_ptrs_ > 1) { 00115 --ptr_->n_ptrs_; 00116 ptr_ = new PMFStruct(ptr_->domain_size_); 00117 } else { 00118 ptr_->pmf_.clear(); 00119 } 00120 ptr_->pmf_[value] = 1.0; 00121 ptr_->total_prob_ = 1.0; 00122 } 00123 00124 void PMF::getValues(std::vector<std::string>& values) const { 00125 for(std::map<std::string, double>::const_iterator it = ptr_->pmf_.begin(); it != ptr_->pmf_.end(); ++it) { 00126 values.push_back((*it).first); 00127 } 00128 } 00129 00130 void PMF::getProbabilities(std::vector<double>& probabilities) const { 00131 for(std::map<std::string, double>::const_iterator it = ptr_->pmf_.begin(); it != ptr_->pmf_.end(); ++it) { 00132 probabilities.push_back((*it).second); 00133 } 00134 } 00135 00136 bool PMF::getExpectedValue(std::string& v) const { 00137 double p_max = 0; 00138 for(std::map<std::string, double>::const_iterator it = ptr_->pmf_.begin(); it != ptr_->pmf_.end(); ++it) { 00139 if ((*it).second > p_max) { 00140 v = (*it).first; 00141 p_max = (*it).second; 00142 } 00143 } 00144 00145 if (ptr_->domain_size_ > 0 && p_max < getProbabilityUnknown(ptr_->domain_size_)) { 00146 v = ""; 00147 return false; 00148 } 00149 00150 return true; 00151 } 00152 00153 double PMF::getLikelihood(const PDF& pdf) const { 00154 assert_msg(pdf.type() == PDF::DISCRETE, "PMF: Likelihood can only be calculated with another PMF."); 00155 00156 const PMF* pmf = static_cast<const pbl::PMF*>(&pdf); 00157 return getLikelihood(*pmf); 00158 } 00159 00160 double PMF::getLikelihood(const PMF& other) const { 00161 int my_domain_size = ptr_->domain_size_; 00162 int other_domain_size = other.ptr_->domain_size_; 00163 00164 assert(my_domain_size == -1 || other_domain_size == -1 || my_domain_size == other_domain_size); 00165 int domain_size = std::max(my_domain_size, other_domain_size); 00166 00167 // determine which pmf has the most determined values, and which one less 00168 const PMF* small_pmf = this; 00169 const PMF* big_pmf = &other; 00170 if (this->ptr_->pmf_.size() > other.ptr_->pmf_.size()) { 00171 small_pmf = &other; 00172 big_pmf = this; 00173 } 00174 00175 // determine the likelihood based on all determined values in small_pmf 00176 double likelihood = 0; 00177 double big_p_total_match = 0; // keeps track of the total probability of all values in big_pmf 00178 // that are matched to small_pmf 00179 for(std::map<std::string, double>::const_iterator it = small_pmf->ptr_->pmf_.begin(); it != small_pmf->ptr_->pmf_.end(); ++it) { 00180 double big_p_v = big_pmf->getProbability((*it).first, domain_size); 00181 likelihood += big_p_v * (*it).second; 00182 big_p_total_match += big_p_v; 00183 } 00184 00185 // if the determined values in small_pdf do NOT add up to probability 1, AND 00186 // we have not yet matched all values in big_pmf with a total probability of 1, it means 00187 // we still need to match the undetermined values in small_pmf and big_pmf. ASSUME a 00188 // UNIFORM DISTRIBUTION over the undetermined values in both small_pmf and big_pmf 00189 if (small_pmf->ptr_->total_prob_ < 1 && big_p_total_match < 1) { 00190 // determine the number of unmatched values. Since we used small_pmf as a basis for 00191 // matching above (we matched all its determined values), the number of unmatched values 00192 // equals the domain size MINUS the number of determined values 00193 int num_unmatched_values = (domain_size - small_pmf->ptr_->pmf_.size()); 00194 assert(num_unmatched_values > 0); 00195 00196 // determine the average probability of an unknown value in big_pmf, assuming 00197 // a uniform distribution over all unknown values. 00198 double big_p_unknown = (1 - big_p_total_match) / num_unmatched_values; 00199 // determine the probability of an unknown value in small_pmf, assuming 00200 // a uniform distribution over all unknown values 00201 double small_p_unknown = (1 - small_pmf->ptr_->total_prob_) / num_unmatched_values; 00202 00203 // add to the likelihood the SUM of big_p_unknown * small_p_unknown over all 00204 // unmatches values, which equals num_unmatched_values * big_p_unknown * small_p_unknown 00205 likelihood += num_unmatched_values * big_p_unknown * small_p_unknown; 00206 } 00207 00208 return likelihood; 00209 } 00210 00214 void PMF::update(const pbl::PMF& other) { 00215 assert(this->ptr_->domain_size_ == -1 || other.ptr_->domain_size_ == -1 00216 || this->ptr_->domain_size_ == other.ptr_->domain_size_); 00217 00218 cloneStruct(); 00219 00220 this->ptr_->domain_size_ = std::max(this->ptr_->domain_size_, other.ptr_->domain_size_); 00221 00222 // calculate likelihood 00223 double likelihood = getLikelihood(other); 00224 00225 // calculate the current probability of unknown values 00226 double p_unknown = getProbabilityUnknown(this->ptr_->domain_size_); 00227 00228 std::set<std::string> updated_values; 00229 00230 ptr_->total_prob_ = 0; 00231 for(std::map<std::string, double>::iterator it = ptr_->pmf_.begin(); it != ptr_->pmf_.end(); ++it) { 00232 double new_prob = (*it).second * other.getProbability((*it).first, this->ptr_->domain_size_) / likelihood; 00233 (*it).second = new_prob; 00234 ptr_->total_prob_ += new_prob; 00235 updated_values.insert((*it).first); 00236 } 00237 00238 for(std::map<std::string, double>::const_iterator it = other.ptr_->pmf_.begin(); it != other.ptr_->pmf_.end(); ++it) { 00239 if (updated_values.find((*it).first) == updated_values.end()) { 00240 double new_prob = p_unknown * (*it).second / likelihood; 00241 ptr_->pmf_[(*it).first] = new_prob; 00242 ptr_->total_prob_ += new_prob; 00243 } 00244 } 00245 } 00246 00247 void PMF::setDomainSize(int domain_size) { 00248 cloneStruct(); 00249 ptr_->domain_size_ = domain_size; 00250 } 00251 00252 int PMF::getDomainSize() const { 00253 return ptr_->domain_size_; 00254 } 00255 00256 double PMF::getProbabilityUnknown() const { 00257 return getProbabilityUnknown(ptr_->domain_size_); 00258 } 00259 00260 double PMF::getProbabilityUnknown(int domain_size) const { 00261 if (ptr_->total_prob_ == 1) return 0; 00262 assert(domain_size > 0); 00263 if (domain_size == (int)ptr_->pmf_.size()) return 0; 00264 return (1 - ptr_->total_prob_) / (domain_size - ptr_->pmf_.size()); 00265 } 00266 00267 void PMF::normalize() { 00268 if (ptr_->total_prob_ == 1) return; 00269 00270 assert(ptr_->total_prob_ > 0); 00271 00272 cloneStruct(); 00273 00274 for(std::map<std::string, double>::iterator it = ptr_->pmf_.begin(); it != ptr_->pmf_.end(); ++it) { 00275 (*it).second /= ptr_->total_prob_; 00276 } 00277 00278 ptr_->domain_size_ = ptr_->pmf_.size(); 00279 00280 ptr_->total_prob_ = 1; 00281 } 00282 00283 double PMF::getDensity(const arma::vec& v) const { 00284 assert_msg(false, "Cannot get density of a PMF"); 00285 return 0; 00286 } 00287 00288 double PMF::getMaxDensity() const { 00289 assert_msg(false, "Cannot get max density of a PMF"); 00290 return 0; 00291 } 00292 00293 std::string PMF::toString(const std::string& indent) const { 00294 std::stringstream ss; 00295 ss << indent << "PMF(" << ptr_->domain_size_ << ")["; 00296 00297 std::map<std::string, double>::const_iterator it = ptr_->pmf_.begin(); 00298 if (it != ptr_->pmf_.end()) { 00299 ss << " " << (*it).first << " : " << (*it).second; 00300 ++it; 00301 for(; it != ptr_->pmf_.end(); ++it) { 00302 ss << ", " << (*it).first << " : " << (*it).second; 00303 } 00304 } 00305 ss << " ]"; 00306 return ss.str(); 00307 } 00308 00309 /* * * * * * * * OBSOLETE * * * * * * * * */ 00310 00311 std::string PMF::getMostProbableValue() const { 00312 std::string v; 00313 getExpectedValue(v); 00314 return v; 00315 }