diff --git a/Config.cpp b/Config.cpp
index 82d12fc..b5c4d81 100644
--- a/Config.cpp
+++ b/Config.cpp
@@ -44,13 +44,17 @@ bool Config::readConfig(const std::string& fileName) {
     cmpIdx_ = (it != cfg.items().end())
       ? columnIdx[it->second.asString()] : -1;
 
+    it = cfg.find("weight_column");
+    weightIdx_ = (it != cfg.items().end())
+      ? columnIdx[it->second.asString()] : -1;
+
     it = cfg.find("loss_function");
     if (it != cfg.items().end() && it->second.asString() == "logistic") {
       lossFunction_ = L2Logistic;
     } else {
       lossFunction_ = L2Regression;
     }
-    
+
     const dynamic& trainColumns = cfg["train_columns"];
     for (auto it = trainColumns.begin(); it != trainColumns.end(); ++it) {
       trainIdx_.push_back(columnIdx.at(it->asString()));
diff --git a/Config.h b/Config.h
index de1229e..54a4d41 100644
--- a/Config.h
+++ b/Config.h
@@ -10,7 +10,7 @@ enum LossFunction {
   L2Regression = 0,
   L2Logistic   = 1
 };
- 
+
 // Specifying the training parameters and data format
 struct Config {
 
@@ -46,6 +46,10 @@ struct Config {
     return targetIdx_;
   }
 
+  int getWeightIdx() const {
+    return weightIdx_;
+  }
+
   int getCompareIdx() const {
     return cmpIdx_;
   }
@@ -93,8 +97,10 @@ struct Config {
 
   int targetIdx_;
   int cmpIdx_;
+  int weightIdx_;
+
   LossFunction lossFunction_;
-  
+
   std::vector<int> trainIdx_;
   std::vector<int> weakIdx_;
   std::vector<int> evalIdx_;
diff --git a/DataSet.cpp b/DataSet.cpp
index 149b7ac..bbf7dcf 100644
--- a/DataSet.cpp
+++ b/DataSet.cpp
@@ -25,6 +25,10 @@ DataSet::DataSet(const Config& cfg, int bucketingThresh, int examplesThresh)
     features_[i].fvec.reset(new vector<double>());
     features_[i].encoding = DOUBLE;
   }
+
+  if (cfg_.getWeightIdx() != -1) {
+    weights_.reset(new vector<double>());
+  }
 }
 
 bool DataSet::getEvalColumns(const std::string& line,
@@ -41,6 +45,7 @@ bool DataSet::getEvalColumns(const std::string& line,
 
 bool DataSet::getRow(const string& line, double* target,
                      boost::scoped_array<double>& fvec,
+                     double* weight,
                      double* cmpValue) const {
   try {
     vector<folly::StringPiece> sv;
@@ -64,7 +69,11 @@ bool DataSet::getRow(const string& line, double* target,
     if (cfg_.getCompareIdx() != -1 && cmpValue != NULL) {
       *cmpValue = atof(sv[cfg_.getCompareIdx()].toString().c_str());
     }
-
+    if (cfg_.getWeightIdx() != -1 && weight != NULL) {
+      *weight = atof(sv[cfg_.getWeightIdx()].toString().c_str());
+    } else {
+      *weight = 1.0;
+    }
   } catch (...) {
     LOG(ERROR) << "fail to process line: " << line;
     return false;
@@ -100,7 +109,7 @@ double DataSet::getPrediction(TreeNode<uint16_t>* rt, int eid) const {
 }
 
 bool DataSet::addVector(const boost::scoped_array<double>& fvec,
-                        double target) {
+                        double target, double weight) {
   if (examplesThresh_ != -1 && numExamples_ > examplesThresh_) {
     return false;
   }
@@ -128,6 +137,10 @@ bool DataSet::addVector(const boost::scoped_array<double>& fvec,
       }
     }
   }
+  if (weights_) {
+    weights_->push_back(weight);
+  }
+
   targets_.push_back(target);
   numExamples_++;
 
diff --git a/DataSet.h b/DataSet.h
index d4334bb..6aa032e 100644
--- a/DataSet.h
+++ b/DataSet.h
@@ -49,12 +49,14 @@ class DataSet {
  public:
   DataSet(const Config& cfg, int bucketingThresh, int examplesThresh=-1);
 
-  bool addVector(const boost::scoped_array<double>& fvec, double target);
+  bool addVector(const boost::scoped_array<double>& fvec,
+                 double target, double weight);
 
   bool getRow(const std::string& line,
               double* target,
               boost::scoped_array<double>& fvec,
-              double* cmpValue = NULL) const;
+              double* weight,
+              double* cmpValue) const;
 
   bool getEvalColumns(const std::string& line,
 		      boost::scoped_array<std::string>& feval) const;
@@ -63,6 +65,10 @@ class DataSet {
     return numExamples_;
   }
 
+  const std::unique_ptr<std::vector<double>>& getWeights() const {
+    return weights_;
+  }
+
   void getFeatureVec(const int eid, boost::scoped_array<uint16_t>& fvec) const {
     for (int i = 0; i < numFeatures_; i++) {
       if (features_[i].encoding == EMPTY) {
@@ -103,6 +109,7 @@ class DataSet {
 
   boost::scoped_array<FeatureData> features_;
   std::vector<double> targets_;
+  std::unique_ptr<std::vector<double>> weights_;
 
   friend class TreeRegressor;
   friend class Gbm;
@@ -126,4 +133,3 @@ template<class T> void split(const std::vector<int>& subset,
 }
 
 }
-
diff --git a/Gbm.cpp b/Gbm.cpp
index 86ecc6a..5be65da 100644
--- a/Gbm.cpp
+++ b/Gbm.cpp
@@ -49,7 +49,8 @@ class ParallelEval : public apache::thrift::concurrency::Runnable {
         //double score = weakModel_->eval(fvec);
         double score = ds_.getPrediction(weakModel_.get(), i);
         F_[i] += score;
-        subLoss_[workIdx_] += fun_.getExampleLoss(targets_[i], F_[i]);
+        const double wt = ds_.getWeights() ? (*(ds_.getWeights()))[i] : 1.0;
+        subLoss_[workIdx_] += fun_.getExampleLoss(targets_[i], F_[i], wt);
       }
     }
     monitor_.decrement();
@@ -78,14 +79,14 @@ void Gbm::getModel(
   boost::scoped_array<double> F(new double[numExamples]);
   boost::scoped_array<double> y(new double[numExamples]);
 
-  double f0 = fun_.getF0(ds_.targets_);
+  double f0 = fun_.getF0(ds_.targets_, ds_.getWeights().get());
   for (int i = 0; i < numExamples; i++) {
     F[i] = f0;
   }
 
   model->push_back(new LeafNode<double>(f0));
 
-  double initLoss = fun_.getInitLoss(ds_.targets_);
+  double initLoss = fun_.getInitLoss(ds_.targets_, ds_.getWeights().get());
 
   LOG(INFO) << "init avg loss " << initLoss / numExamples;
 
@@ -93,7 +94,7 @@ void Gbm::getModel(
 
     LOG(INFO) << "------- iteration " << it << " -------";
 
-    fun_.getGradient(ds_.targets_, F, y);
+    fun_.getGradient(ds_.targets_, F, y, ds_.getWeights().get());
     TreeRegressor regressor(ds_, y, fun_);
 
     std::unique_ptr<TreeNode<uint16_t>> weakModel(
@@ -131,7 +132,8 @@ void Gbm::getModel(
         // double score = weakModel->eval(fvec);
         double score = ds_.getPrediction(weakModel.get(), i);
         F[i] += score;
-        newLoss += fun_.getExampleLoss(ds_.targets_[i], F[i]);
+        const double wt = ds_.getWeights() ? (*(ds_.getWeights()))[i] : 1.0;
+        newLoss += fun_.getExampleLoss(ds_.targets_[i], F[i], wt);
       }
     }
 
diff --git a/GbmFun.h b/GbmFun.h
index a02fd2d..0d79baa 100644
--- a/GbmFun.h
+++ b/GbmFun.h
@@ -2,6 +2,7 @@
 
 #include <boost/scoped_array.hpp>
 #include <vector>
+#include "glog/logging.h"
 
 namespace boosting {
 
@@ -12,19 +13,23 @@ namespace boosting {
 class GbmFun {
  public:
   virtual double getLeafVal(const std::vector<int>& subset,
-                            const boost::scoped_array<double>& y) const = 0;
+                            const boost::scoped_array<double>& y,
+                            const std::vector<double>* wts = NULL) const = 0;
 
-  virtual double getF0(const std::vector<double>& y) const = 0;
+  virtual double getF0(const std::vector<double>& y,
+                       const std::vector<double>* wts = NULL) const = 0;
 
   virtual void getGradient(const std::vector<double>& y,
                            const boost::scoped_array<double>& F,
-                           boost::scoped_array<double>& grad) const = 0;
+                           boost::scoped_array<double>& grad,
+                           const std::vector<double>* wts = NULL) const = 0;
 
-  virtual double getInitLoss(const std::vector<double>& y) const = 0;
+  virtual double getInitLoss(const std::vector<double>& y,
+                             const std::vector<double>* wts = NULL) const = 0;
 
-  virtual double getExampleLoss(const double y, const double f) const = 0;
+  virtual double getExampleLoss(const double y, const double f, const double w) const = 0;
 
-  virtual void accumulateExampleLoss(const double y, const double f) = 0;
+  virtual void accumulateExampleLoss(const double y, const double f, const double w) = 0;
 
   virtual double getReduction() const = 0;
 
@@ -36,30 +41,36 @@ class GbmFun {
 
 class LeastSquareFun : public GbmFun {
  public:
-  LeastSquareFun() : numExamples_(0), sumy_(0.0), sumy2_(0.0), l2_(0.0) {
+  LeastSquareFun() : numExamples_(0), sumy_(0.0), sumy2_(0.0), l2_(0.0), sumw_(0.0) {
   }
 
   double getLeafVal(const std::vector<int>& subset,
-                    const boost::scoped_array<double>& y) const {
-
-    double sum = 0;
+                    const boost::scoped_array<double>& y, const std::vector<double>* wts = NULL) const {
+    double sumwy = 0;
+    double sumw = 0;
     for (const auto& id : subset) {
-      sum += y[id];
+      double w = ((wts != NULL) ? (*wts)[id] : 1.0);
+      sumw += w;
+      sumwy += w * y[id];
     }
-    return sum/subset.size();
+    return sumwy/sumw;
   }
 
-  double getF0(const std::vector<double>& yvec) const {
-    double sum = 0.0;
-    for (const auto& y : yvec) {
-      sum += y;
+  double getF0(const std::vector<double>& yvec, const std::vector<double>* wts = NULL) const {
+    double sumwy = 0;
+    double sumw = 0;
+    for (int i = 0; i < yvec.size(); i++) {
+      double w = ((wts != NULL) ? (*wts)[i] : 1.0);
+      sumw += w;
+      sumwy += w * yvec[i];
     }
-    return sum/yvec.size();
+    return sumwy/sumw;
   }
 
   void getGradient(const std::vector<double>& y,
                    const boost::scoped_array<double>& F,
-                   boost::scoped_array<double>& grad) const {
+                   boost::scoped_array<double>& grad,
+                   const std::vector<double>* wts = NULL) const {
 
     int size = y.size();
 
@@ -68,31 +79,40 @@ class LeastSquareFun : public GbmFun {
     }
   }
 
-  double getInitLoss(const std::vector<double>& yvec) const {
+  double getInitLoss(const std::vector<double>& yvec,
+                     const std::vector<double>* wts = NULL) const {
+
     double sumy = 0.0;
     double sumy2 = 0.0;
+    double sumw = 0.0;
 
-    for (const auto& y : yvec) {
-      sumy += y;
-      sumy2 += y*y;
+    for (int i = 0; i < yvec.size(); i++) {
+      double w = ((wts != NULL) ? (*wts)[i] : 1.0);
+      double y = yvec[i];
+
+      sumw += w;
+      sumy += w*y;
+      sumy2 += w*y*y;
     }
 
-    return sumy2 - sumy * sumy/yvec.size();
+    return sumy2 - sumy * sumy/sumw;
   }
 
-  double getExampleLoss(const double y, const double f) const {
-    return (y - f) * (y - f);
+  double getExampleLoss(const double y, const double f, const double w) const {
+    return w * (y - f) * (y - f);
   }
 
-  void accumulateExampleLoss(const double y, const double f) {
-    sumy_ += y;
+  void accumulateExampleLoss(const double y, const double f, const double w) {
+    sumy_ += w * y;
     numExamples_ += 1;
-    sumy2_ += y * y;
-    l2_ += getExampleLoss(y, f);
+    sumw_ += w;
+    sumy2_ += w * y * y;
+
+    l2_ += getExampleLoss(y, f, w);
   }
 
   double getReduction() const {
-    return 1.0 - l2_/(sumy2_ - sumy_ * sumy_/numExamples_);
+    return 1.0 - l2_/(sumy2_ - sumy_ * sumy_/sumw_);
   }
 
   int getNumExamples() const {
@@ -108,6 +128,7 @@ class LeastSquareFun : public GbmFun {
   double sumy_;
   double sumy2_;
   double l2_;
+  double sumw_;
 };
 
 }
diff --git a/LogisticFun.h b/LogisticFun.h
index 94b4d00..3420833 100644
--- a/LogisticFun.h
+++ b/LogisticFun.h
@@ -7,7 +7,8 @@ namespace boosting {
 class LogisticFun : public GbmFun {
  public:
   double getLeafVal(const std::vector<int>& subset,
-		    const boost::scoped_array<double>& y) const {
+		    const boost::scoped_array<double>& y,
+                    const std::vector<double>* wts = NULL) const {
     double wx = 0.0, wy = 0.0;
     for (const auto& id : subset) {
       double yi = y[id];
@@ -17,7 +18,8 @@ class LogisticFun : public GbmFun {
     return wy / wx;
   }
 
-  double getF0(const std::vector<double>& y) const {
+  double getF0(const std::vector<double>& y,
+               const std::vector<double>* wts = NULL) const {
     double sumy = 0.0;
     for (const auto yi  : y) {
       sumy += yi;
@@ -28,14 +30,15 @@ class LogisticFun : public GbmFun {
 
   void getGradient(const std::vector<double>& y,
 		   const boost::scoped_array<double>& F,
-		   boost::scoped_array<double>& grad) const {
+		   boost::scoped_array<double>& grad,
+                   const std::vector<double>* wts = NULL) const {
     int size = y.size();
     for (int i = 0; i < size; i++) {
       grad[i] = 2.0 * y[i]/(1.0 + exp(2.0 * y[i] * F[i]));
     }
   }
 
-  double getInitLoss(const std::vector<double>& y) const {
+  double getInitLoss(const std::vector<double>& y, const std::vector<double>* wts = NULL) const {
     int posCount = 0;
     for (const auto yi : y) {
       if (yi > 0) {
@@ -45,16 +48,16 @@ class LogisticFun : public GbmFun {
     return getEntropy(posCount, y.size()) * y.size();
   }
 
-  double getExampleLoss(const double y, const double f) const {
+  double getExampleLoss(const double y, const double f, const double w) const {
     return log(1.0 + exp(-2.0 * y * f));
   }
 
-  void accumulateExampleLoss(const double y, const double f) {
+  void accumulateExampleLoss(const double y, const double f, const double w) {
     numExamples_ += 1;
     if (y > 0) {
       posCount_ += 1;
     }
-    logloss_ += getExampleLoss(y, f);
+    logloss_ += getExampleLoss(y, f, 1.0);
   }
 
   double getReduction() const {
@@ -75,7 +78,7 @@ class LogisticFun : public GbmFun {
     double posProb = double(posCount)/numExamples;
     return -(posProb * log(posProb) + (1 - posProb) * log(1.0 - posProb));
   }
-  
+
   int numExamples_;
   int posCount_;
   double logloss_;
diff --git a/Train.cpp b/Train.cpp
index 0ae31b1..7ab20b3 100644
--- a/Train.cpp
+++ b/Train.cpp
@@ -67,7 +67,11 @@ class DataChunk : public apache::thrift::concurrency::Runnable {
 
   DataChunk(const Config& cfg, const DataSet& dataSet,
             CounterMonitor* monitorPtr = NULL) :
-    cfg_(cfg), dataSet_(dataSet), monitorPtr_(monitorPtr) {}
+    cfg_(cfg), dataSet_(dataSet), monitorPtr_(monitorPtr) {
+    if (cfg_.getWeightIdx() != -1) {
+      weights_.reset(new vector<double>());
+    }
+  }
 
   bool addLine(const string& s) {
     if (s.empty()) {
@@ -81,10 +85,14 @@ class DataChunk : public apache::thrift::concurrency::Runnable {
     featureVectors_.reserve(lines_.size());
     targets_.reserve(lines_.size());
     boost::scoped_array<double> farr(new double[cfg_.getNumFeatures()]);
-    double target;
+    double target, weight, cmpValue;
+
     for (const string& line : lines_) {
-      if (dataSet_.getRow(line, &target, farr)) {
+      if (dataSet_.getRow(line, &target, farr, &weight, &cmpValue)) {
         targets_.push_back(target);
+        if (weights_) {
+          weights_->push_back(weight);
+        }
         featureVectors_.emplace_back(farr.get(),
                                      farr.get() + cfg_.getNumFeatures());
       }
@@ -102,10 +110,6 @@ class DataChunk : public apache::thrift::concurrency::Runnable {
     return featureVectors_;
   }
 
-  const vector<double>& getTargets() const {
-    return targets_;
-  }
-
   size_t getLineBufferSize() const {
     return lines_.size();
   }
@@ -121,10 +125,12 @@ class DataChunk : public apache::thrift::concurrency::Runnable {
       << "featureVectors_ and targets_ vectors must be the same size";
     boost::scoped_array<double> farr(new double[cfg_.getNumFeatures()]);
     size_t size = featureVectors_.size();
+
     for (size_t i = 0; i < size; ++i) {
       const auto fvec = featureVectors_[i];
       copy(fvec.begin(), fvec.end(), farr.get());
-      if (!dataSet->addVector(farr, targets_[i])) {
+      double weight = weights_ ? (*weights_)[i] : 1.0;
+      if (!dataSet->addVector(farr, targets_[i], weight)) {
         return i;
       }
     }
@@ -139,7 +145,7 @@ class DataChunk : public apache::thrift::concurrency::Runnable {
   vector<string> lines_;
   vector<vector<double>> featureVectors_;
   vector<double> targets_;
-
+  unique_ptr<vector<double>> weights_;
 };
 
 // Divide training data file's lines into chunks,
@@ -237,6 +243,8 @@ int main(int argc, char **argv) {
   LOG(INFO) << "loading config";
 
   CHECK(cfg.readConfig(FLAGS_config_file));
+  LOG(INFO) << "Examples has weights: " << cfg.getWeightIdx();
+
   unique_ptr<GbmFun> pfun = getGbmFun(cfg.getLossFunction());
   GbmFun& fun = *pfun;
 
@@ -272,6 +280,9 @@ int main(int argc, char **argv) {
 
     ds.close();
 
+    LOG(INFO) << "weights: " << (*ds.getWeights())[0];
+    LOG(INFO) << "weights 1024: " << (*ds.getWeights())[1024];
+
     // Second, train the models
     Gbm engine(fun, ds, cfg);
     double* fimps = new double[cfg.getNumFeatures()];
@@ -313,7 +324,7 @@ int main(int argc, char **argv) {
     }
 
     // See how well the model performs on testing data
-    double target, score;
+    double target, score, wt, cmpValue;
     boost::scoped_array<double> fvec(new double[cfg.getNumFeatures()]);
     int numEvalColumns = cfg.getEvalIdx().size();
     boost::scoped_array<string> feval(new string[numEvalColumns]);
@@ -338,13 +349,14 @@ int main(int argc, char **argv) {
       }
       string line;
       vector<double> scores;
+
       while(getline(*is, line)) {
-        ds.getRow(line, &target, fvec, &score);
+        ds.getRow(line, &target, fvec, &wt, &cmpValue);
         double f;
         if (FLAGS_find_optimal_num_trees) {
           f = predict_vec(model, fvec, &scores);
           for (int i = 0; i < model.size(); i++) {
-            funs[i]->accumulateExampleLoss(target, scores[i]);
+            funs[i]->accumulateExampleLoss(target, scores[i], wt);
           }
           scores.clear();
         } else {
@@ -359,7 +371,7 @@ int main(int argc, char **argv) {
 	  (*os) << f << endl;
 	}
 
-	fun.accumulateExampleLoss(target, f);
+	fun.accumulateExampleLoss(target, f, wt);
 
 	if (fun.getNumExamples() % 1000 == 0) {
 	  LOG(INFO) << "test loss reduction: " << fun.getReduction()
@@ -388,4 +400,3 @@ int main(int argc, char **argv) {
 	      << " on num examples: " << fun.getNumExamples();
   }
 }
-
diff --git a/TreeRegressor.cpp b/TreeRegressor.cpp
index 4f2fd59..0d56246 100644
--- a/TreeRegressor.cpp
+++ b/TreeRegressor.cpp
@@ -77,12 +77,13 @@ void TreeRegressor::getBestSplitFromHistogram(
   // Since the first term (sum of squares of all y-values) is independent
   // of our choice of where to split, it makes no difference, so we ignore it
   // in calculating loss.
+  // with weights, loss = - sum(w*y) * sum(w*y) / sum(w)
 
   // loss function if we don't split at all
-  double lossBefore = -1.0 * hist.totalSum * hist.totalSum / hist.totalCnt;
+  double lossBefore = -1.0 * hist.totalWeightedSum * hist.totalWeightedSum / hist.totalWeight;
 
-  int cntLeft = 0;       // number of observations on or to left of idx
-  double sumLeft = 0.0;  // number of observations strictly to right of idx
+  double weightLeft = 0;        // sum of weights for observations on or to left of idx
+  double weightedSumLeft = 0.0;  // sum of (w*y) for observations on or to left of idx
 
   double bestGain = 0.0;
   int bestIdx = -1;      // everything strictly to right of idx
@@ -91,22 +92,22 @@ void TreeRegressor::getBestSplitFromHistogram(
 
   for (int i = 0; i < hist.num - 1; i++) {
 
-    cntLeft += hist.cnt[i];
-    sumLeft += hist.sumy[i];
+    weightLeft += hist.weight[i];
+    weightedSumLeft += hist.sumwy[i];
 
-    double sumRight = hist.totalSum - sumLeft;
-    int cntRight = hist.totalCnt - cntLeft;
+    double weightedSumRight = hist.totalWeightedSum - weightedSumLeft;
+    int weightRight = hist.totalWeight - weightLeft;
 
-    if (cntLeft < FLAGS_min_leaf_examples) {
+    if (weightLeft < FLAGS_min_leaf_examples) {
       continue;
     }
-    if (cntRight < FLAGS_min_leaf_examples) {
+    if (weightRight < FLAGS_min_leaf_examples) {
       break;
     }
 
     double lossAfter =
-      -1.0 * sumLeft * sumLeft / cntLeft
-      - 1.0 * sumRight * sumRight / cntRight;
+      -1.0 * weightedSumLeft * weightedSumLeft / weightLeft
+      - 1.0 * weightedSumRight * weightedSumRight / weightRight;
 
     double gain = lossBefore - lossAfter;
     if (gain > bestGain) {
@@ -139,10 +140,19 @@ TreeRegressor::getBestSplit(const vector<int>* subset,
   // return a valid but degenerate split
   double bestGain = 0.0;
 
-  double totalSum = 0.0;  // sum of all target values
-
-  for (auto& id : *subset) {
-    totalSum += y_[id];
+  double totalWeightedSum = 0.0;  // sum of all target values
+  double totalWeights = 0.0;
+  const std::unique_ptr<std::vector<double>>& wv = ds_.getWeights();
+  if (wv) {
+    for (auto& id : *subset) {
+      totalWeights += (*wv)[id];
+      totalWeightedSum += (*wv)[id] * y_[id];
+    }
+  } else {
+    totalWeights = subset->size();
+    for (auto& id : *subset) {
+      totalWeightedSum += y_[id];
+    }
   }
 
   // For each of a random sampling of features, see if splitting on that
@@ -155,13 +165,13 @@ TreeRegressor::getBestSplit(const vector<int>* subset,
       continue;
     }
 
-    Histogram hist(f.transitions.size() + 1, subset->size(), totalSum);
+    Histogram hist(f.transitions.size() + 1, totalWeights, totalWeightedSum);
 
     if (f.encoding == BYTE) {
-      buildHistogram<uint8_t>(*subset, *(f.bvec), hist);
+      buildHistogram<uint8_t>(*subset, *(f.bvec), ds_.getWeights().get(), hist);
     } else {
       CHECK(f.encoding == SHORT);
-      buildHistogram<uint16_t>(*subset, *(f.svec), hist);
+      buildHistogram<uint16_t>(*subset, *(f.svec), ds_.getWeights().get(), hist);
     }
 
     int fv;
@@ -214,11 +224,9 @@ TreeNode<uint16_t>* TreeRegressor::getTreeHelper(
     return NULL;
   } else if (!split->selected) {
     // leaf of decision tree
-    double fvote = fun_.getLeafVal(*(split->subset), y_);
+    double fvote = fun_.getLeafVal(*(split->subset), y_, ds_.getWeights().get());
     LOG(INFO) << "leaf:  " << fvote << ", #examples:"
               << split->subset->size();
-    CHECK(split->subset->size() >= FLAGS_min_leaf_examples);
-
     return new LeafNode<uint16_t>(fvote);
   } else {
     // internal node of decision tree
@@ -228,7 +236,7 @@ TreeNode<uint16_t>* TreeRegressor::getTreeHelper(
               << std::min(split->left->subset->size(), split->right->subset->size());
 
     fimps[split->fid] += split->gain;
-    double fvote = fun_.getLeafVal(*(split->subset), y_);
+    double fvote = fun_.getLeafVal(*(split->subset), y_, ds_.getWeights().get());
     PartitionNode<uint16_t>* node = new PartitionNode<uint16_t>(split->fid, split->fv);
     node->setLeft(getTreeHelper(split->left, fimps));
     node->setRight(getTreeHelper(split->right, fimps));
@@ -289,4 +297,3 @@ TreeRegressor::SplitNode* TreeRegressor::getBestSplits(
 }
 
 }
-
diff --git a/TreeRegressor.h b/TreeRegressor.h
index 34e9c11..a280cd3 100644
--- a/TreeRegressor.h
+++ b/TreeRegressor.h
@@ -57,24 +57,25 @@ class TreeRegressor {
   // observations (as in a basic histogram), but also the sum of y-values
   // of those observations.
   struct Histogram {
-    const int num;             // number of buckets
-    std::vector<int> cnt;      // number of observations in each bucket
-    std::vector<double> sumy;  // sum of y-values of those observations
-    const int totalCnt;
-    const double totalSum;
+    const int num;                   // number of buckets
+    std::vector<double> weight;      // number of observations in each bucket
+    std::vector<double> sumwy;       // sum of y-values of those observations
+    const int totalWeight;
+    const double totalWeightedSum;
 
-    Histogram(int n, int cnt, double sum)
+    Histogram(int n, double sumw, double sumwy)
     : num(n),
-      cnt(num, 0),
-      sumy(num, 0.0),
-      totalCnt(cnt),
-      totalSum(sum) {
+      weight(num, 0.0),
+      sumwy(num, 0.0),
+      totalWeight(sumw),
+      totalWeightedSum(sumwy) {
     }
   };
 
   template<class T>
     void buildHistogram(const std::vector<int>& subset,
                         const std::vector<T>& fvec,
+			const std::vector<double>* weights,
                         Histogram& hist) const;
 
   // Choose the x-value such that, by splitting the data at that value, we
@@ -128,14 +129,24 @@ class TreeRegressor {
 template<class T>
   void TreeRegressor::buildHistogram(const std::vector<int>& subset,
                                      const std::vector<T>& fvec,
+				     const std::vector<double>* weights,
                                      Histogram& hist) const {
 
-  for(auto id : subset) {
-    const T& v = fvec[id];
+  if (weights == NULL) {
+    for(auto id : subset) {
+      const T& v = fvec[id];
 
-    hist.cnt[v] += 1;
-    hist.sumy[v] += y_[id];
+      hist.weight[v] += 1.0;
+      hist.sumwy[v] += y_[id];
+    }
+  } else {
+    for(auto id : subset) {
+      const T& v = fvec[id];
+      double w = (*weights)[id];
+
+      hist.weight[v] += w;
+      hist.sumwy[v] += w * y_[id];
+    }
   }
 }
-
 }