Go to the documentation of this file.00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 
00029 
00030 
00031 
00032 
00033 
00034 
00035 
00036 
00037 #include "problib/pdfs/PMF.h"
00038 
00039 using namespace pbl;
00040 
00041 PMF::PMF(int domain_size) : PDF(1, PDF::DISCRETE), ptr_(new PMFStruct(domain_size)) {
00042 }
00043 
00044 PMF::PMF(const PMF& pmf) : PDF(1, PDF::DISCRETE), ptr_(pmf.ptr_) {
00045     if (ptr_) {
00046         ++ptr_->n_ptrs_;
00047     }
00048 }
00049 
00050 PMF::~PMF() {
00051         --ptr_->n_ptrs_;
00052 
00053         if (ptr_->n_ptrs_ == 0) {
00054                 delete ptr_;
00055         }
00056 }
00057 
00058 PMF& PMF::operator=(const PMF& other)  {
00059     if (this != &other)  {
00060         if (ptr_) {
00061                         --ptr_->n_ptrs_;
00062                         if (ptr_->n_ptrs_ == 0) {
00063                                 delete ptr_;
00064                         }
00065         }
00066 
00067         ptr_ = other.ptr_;
00068         ++ptr_->n_ptrs_;
00069 
00070         dimensions_ = other.dimensions_;
00071     }
00072     return *this;
00073 }
00074 
00075 PMF* PMF::clone() const {
00076         return new PMF(*this);
00077 }
00078 
00079 void PMF::cloneStruct() {
00080         if (ptr_->n_ptrs_ > 1) {
00081                 --ptr_->n_ptrs_;
00082                 ptr_ = new PMFStruct(*ptr_);
00083         }
00084 }
00085 
00086 double PMF::getProbability(const std::string& value) const {
00087         return getProbability(value, ptr_->domain_size_);
00088 }
00089 
00090 double PMF::getProbability(const std::string& value, int domain_size) const {
00091         std::map<std::string, double>::const_iterator it = ptr_->pmf_.find(value);
00092         if (it != ptr_->pmf_.end()) {
00093                 return (*it).second;
00094         }
00095         
00096         
00097         return getProbabilityUnknown(domain_size);
00098 }
00099 
00100 void PMF::setProbability(const std::string& value, double p) {
00101         cloneStruct();
00102 
00103         std::map<std::string, double>::iterator it = ptr_->pmf_.find(value);
00104         if (it != ptr_->pmf_.end()) {
00105                 ptr_->total_prob_ -= (*it).second;
00106                 (*it).second = p;
00107         } else {
00108                 ptr_->pmf_[value] = p;
00109         }
00110         ptr_->total_prob_ += p;
00111 }
00112 
00113 void PMF::setExact(const std::string& value) {
00114         if (ptr_->n_ptrs_ > 1) {
00115                 --ptr_->n_ptrs_;
00116                 ptr_ = new PMFStruct(ptr_->domain_size_);
00117         } else {
00118                 ptr_->pmf_.clear();
00119         }
00120         ptr_->pmf_[value] = 1.0;
00121         ptr_->total_prob_ = 1.0;
00122 }
00123 
00124 void PMF::getValues(std::vector<std::string>& values) const {
00125         for(std::map<std::string, double>::const_iterator it = ptr_->pmf_.begin(); it != ptr_->pmf_.end(); ++it) {
00126                 values.push_back((*it).first);
00127         }
00128 }
00129 
00130 void PMF::getProbabilities(std::vector<double>& probabilities) const {
00131         for(std::map<std::string, double>::const_iterator it = ptr_->pmf_.begin(); it != ptr_->pmf_.end(); ++it) {
00132                 probabilities.push_back((*it).second);
00133         }
00134 }
00135 
00136 bool PMF::getExpectedValue(std::string& v) const {
00137         double p_max = 0;
00138         for(std::map<std::string, double>::const_iterator it = ptr_->pmf_.begin(); it != ptr_->pmf_.end(); ++it) {
00139                 if ((*it).second > p_max) {
00140                         v = (*it).first;
00141                         p_max = (*it).second;
00142                 }
00143         }
00144 
00145         if (ptr_->domain_size_ > 0 && p_max < getProbabilityUnknown(ptr_->domain_size_)) {
00146         v = "";
00147                 return false;
00148         }
00149 
00150         return true;
00151 }
00152 
00153 double PMF::getLikelihood(const PDF& pdf) const {
00154         assert_msg(pdf.type() == PDF::DISCRETE, "PMF: Likelihood can only be calculated with another PMF.");
00155 
00156         const PMF* pmf = static_cast<const pbl::PMF*>(&pdf);
00157         return getLikelihood(*pmf);
00158 }
00159 
00160 double PMF::getLikelihood(const PMF& other) const {
00161         int my_domain_size = ptr_->domain_size_;
00162         int other_domain_size = other.ptr_->domain_size_;
00163 
00164         assert(my_domain_size == -1 || other_domain_size == -1 || my_domain_size == other_domain_size);
00165         int domain_size = std::max(my_domain_size, other_domain_size);
00166 
00167         
00168         const PMF* small_pmf = this;
00169         const PMF* big_pmf = &other;
00170         if (this->ptr_->pmf_.size() > other.ptr_->pmf_.size()) {
00171                 small_pmf = &other;
00172                 big_pmf = this;
00173         }
00174 
00175         
00176         double likelihood = 0;
00177         double big_p_total_match = 0;  
00178                                                                    
00179         for(std::map<std::string, double>::const_iterator it = small_pmf->ptr_->pmf_.begin(); it != small_pmf->ptr_->pmf_.end(); ++it) {
00180                 double big_p_v = big_pmf->getProbability((*it).first, domain_size);
00181                 likelihood += big_p_v * (*it).second;
00182                 big_p_total_match += big_p_v;
00183         }
00184 
00185         
00186         
00187         
00188         
00189         if (small_pmf->ptr_->total_prob_ < 1 && big_p_total_match < 1) {
00190                 
00191                 
00192                 
00193                 int num_unmatched_values = (domain_size - small_pmf->ptr_->pmf_.size());
00194                 assert(num_unmatched_values > 0);
00195 
00196                 
00197                 
00198                 double big_p_unknown   = (1 - big_p_total_match)      / num_unmatched_values;
00199                 
00200                 
00201                 double small_p_unknown = (1 - small_pmf->ptr_->total_prob_) / num_unmatched_values;
00202 
00203                 
00204                 
00205                 likelihood += num_unmatched_values * big_p_unknown * small_p_unknown;
00206         }
00207 
00208         return likelihood;
00209 }
00210 
00214 void PMF::update(const pbl::PMF& other) {
00215     assert(this->ptr_->domain_size_ == -1 || other.ptr_->domain_size_ == -1
00216                 || this->ptr_->domain_size_ == other.ptr_->domain_size_);
00217 
00218     cloneStruct();
00219 
00220     this->ptr_->domain_size_ = std::max(this->ptr_->domain_size_, other.ptr_->domain_size_);
00221 
00222         
00223         double likelihood = getLikelihood(other);
00224 
00225         
00226         double p_unknown = getProbabilityUnknown(this->ptr_->domain_size_);
00227 
00228         std::set<std::string> updated_values;
00229 
00230         ptr_->total_prob_ = 0;
00231         for(std::map<std::string, double>::iterator it = ptr_->pmf_.begin(); it != ptr_->pmf_.end(); ++it) {
00232                 double new_prob = (*it).second * other.getProbability((*it).first, this->ptr_->domain_size_) / likelihood;
00233                 (*it).second = new_prob;
00234                 ptr_->total_prob_ += new_prob;
00235                 updated_values.insert((*it).first);
00236         }
00237 
00238         for(std::map<std::string, double>::const_iterator it = other.ptr_->pmf_.begin(); it != other.ptr_->pmf_.end(); ++it) {
00239                 if (updated_values.find((*it).first) == updated_values.end()) {
00240                         double new_prob = p_unknown * (*it).second / likelihood;
00241                         ptr_->pmf_[(*it).first] = new_prob;
00242                         ptr_->total_prob_ += new_prob;
00243                 }
00244         }
00245 }
00246 
00247 void PMF::setDomainSize(int domain_size) {
00248         cloneStruct();
00249         ptr_->domain_size_ = domain_size;
00250 }
00251 
00252 int PMF::getDomainSize() const {
00253         return ptr_->domain_size_;
00254 }
00255 
00256 double PMF::getProbabilityUnknown() const {
00257         return getProbabilityUnknown(ptr_->domain_size_);
00258 }
00259 
00260 double PMF::getProbabilityUnknown(int domain_size) const {
00261         if (ptr_->total_prob_ == 1) return 0;
00262         assert(domain_size > 0);
00263         if (domain_size == (int)ptr_->pmf_.size()) return 0;
00264         return (1 - ptr_->total_prob_) / (domain_size - ptr_->pmf_.size());
00265 }
00266 
00267 void PMF::normalize() {
00268         if (ptr_->total_prob_ == 1) return;
00269 
00270         assert(ptr_->total_prob_ > 0);
00271 
00272         cloneStruct();
00273 
00274         for(std::map<std::string, double>::iterator it = ptr_->pmf_.begin(); it != ptr_->pmf_.end(); ++it) {
00275                 (*it).second /= ptr_->total_prob_;
00276         }
00277 
00278         ptr_->domain_size_ = ptr_->pmf_.size();
00279 
00280         ptr_->total_prob_ = 1;
00281 }
00282 
00283 double PMF::getDensity(const arma::vec& v) const {
00284         assert_msg(false, "Cannot get density of a PMF");
00285         return 0;
00286 }
00287 
00288 double PMF::getMaxDensity() const {
00289         assert_msg(false, "Cannot get max density of a PMF");
00290         return 0;
00291 }
00292 
00293 std::string PMF::toString(const std::string& indent) const {
00294         std::stringstream ss;
00295         ss << indent << "PMF(" << ptr_->domain_size_ << ")[";
00296 
00297         std::map<std::string, double>::const_iterator it = ptr_->pmf_.begin();
00298         if (it != ptr_->pmf_.end()) {
00299                 ss << " " << (*it).first << " : " << (*it).second;
00300                 ++it;
00301                 for(; it != ptr_->pmf_.end(); ++it) {
00302                         ss << ", " << (*it).first << " : " << (*it).second;
00303                 }
00304         }
00305         ss << " ]";
00306         return ss.str();
00307 }
00308 
00309 
00310 
00311 std::string PMF::getMostProbableValue() const {
00312         std::string v;
00313         getExpectedValue(v);
00314         return v;
00315 }