From 642af39395ff75b9772eda69cb176cb733a1b196 Mon Sep 17 00:00:00 2001
From: Ramon Novoa <ramonnovoa@gmail.com>
Date: Fri, 3 Dec 2021 11:47:58 +0100
Subject: [PATCH 1/2] Add support for trend modules.

Ref. pandora_enterprise#7456.
---
 pandora_server/lib/PandoraFMS/PredictionServer.pm | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pandora_server/lib/PandoraFMS/PredictionServer.pm b/pandora_server/lib/PandoraFMS/PredictionServer.pm
index f7c557128d..6800561317 100644
--- a/pandora_server/lib/PandoraFMS/PredictionServer.pm
+++ b/pandora_server/lib/PandoraFMS/PredictionServer.pm
@@ -224,6 +224,13 @@ sub exec_prediction_module ($$$$) {
 		return;
 	}
 
+	# Trend module.
+	if ($agent_module->{'prediction_module'} == 8) {
+		logger ($pa_config, "Executing trend module " . $agent_module->{'nombre'}, 10);
+		enterprise_hook ('exec_trend_module', [$pa_config, $agent_module, $server_id, $dbh]);
+		return;
+	}
+
 	# Get a full hash for target agent_module record reference ($target_module)
 	my $target_module = get_db_single_row ($dbh, 'SELECT * FROM tagente_modulo WHERE id_agente_modulo = ?', $agent_module->{'custom_integer_1'});
 	return unless defined $target_module;

From b8565d59dcb9963244d0cbe54b15e84151f025f9 Mon Sep 17 00:00:00 2001
From: Ramon Novoa <ramonnovoa@gmail.com>
Date: Mon, 13 Dec 2021 12:28:58 +0100
Subject: [PATCH 2/2] Add support for capacity planning modules.

Capacity planning modules replace traditional prediction modules. Ref.
pandora_enterprise#3220.
---
 .../lib/PandoraFMS/PredictionServer.pm        | 234 +++---
 .../lib/PandoraFMS/Statistics/Regression.pm   | 776 ++++++++++++++++++
 2 files changed, 891 insertions(+), 119 deletions(-)
 create mode 100644 pandora_server/lib/PandoraFMS/Statistics/Regression.pm

diff --git a/pandora_server/lib/PandoraFMS/PredictionServer.pm b/pandora_server/lib/PandoraFMS/PredictionServer.pm
index 6800561317..45d9a8fb2f 100644
--- a/pandora_server/lib/PandoraFMS/PredictionServer.pm
+++ b/pandora_server/lib/PandoraFMS/PredictionServer.pm
@@ -26,7 +26,7 @@ use Thread::Semaphore;
 
 use IO::Socket::INET;
 use Net::Ping;
-use POSIX qw(strftime);
+use POSIX qw(floor strftime);
 
 # Default lib dir for RPM and DEB packages
 use lib '/usr/lib/perl5';
@@ -35,6 +35,7 @@ use PandoraFMS::Tools;
 use PandoraFMS::DB;
 use PandoraFMS::Core;
 use PandoraFMS::ProducerConsumerServer;
+use PandoraFMS::Statistics::Regression;
 
 #For debug
 #use Data::Dumper;
@@ -231,134 +232,129 @@ sub exec_prediction_module ($$$$) {
 		return;
 	}
 
-	# Get a full hash for target agent_module record reference ($target_module)
-	my $target_module = get_db_single_row ($dbh, 'SELECT * FROM tagente_modulo WHERE id_agente_modulo = ?', $agent_module->{'custom_integer_1'});
-	return unless defined $target_module;
-	
-	# Prediction mode explanation
-	#
-	# 0 is for target type of generic_proc. It compares latest data with current data. Needs to get
-	# data on a "middle" interval, so if interval is 300, get data to compare with 150  before 
-	# and 150 in the future. If current data is ABOVE or BELOW average +- typical_deviation 
-	# this is a BAD value (0), if not is ok (1) and written in target module as is.
-	# more interval configured for this module, more "margin" has to compare data.
-	#
-	# 1 is for target type of generic_data. It get's data in the future, using the interval given in
-	# module. It gets average from current timestamp to INTERVAL in the future and gets average
-	# value. Typical deviation is not used here. 
-	
-	# 0 proc, 1 data
-	my $prediction_mode = ($agent_module->{'id_tipo_modulo'} == 2) ? 0 : 1;
-	
-	# Initialize another global sub variables.
-	my $module_data = 0;    # 0 data for default
-	
-	# Get current timestamp
-	my $utimestamp = time ();
-	my $timestamp = strftime ("%Y-%m-%d %H:%M:%S", localtime($utimestamp));
-	
-	# Get different data from each week one month ago (4 values)
-	# $agent_module->{'module_interval'} uses a margin of interval to get average data from the past
-	my @week_data;
-	my @week_utimestamp;
-	
-	for (my $i=0; $i<4; $i++) {
-		$week_utimestamp[$i] = $utimestamp - (84600*7*($i+1));
-		# Adjust for proc prediction
-		if ($prediction_mode == 0) {
-			$week_utimestamp[$i] = $week_utimestamp[$i] - ($agent_module->{'module_interval'} / 2);
-		}
+	# Capacity planning module.
+	exec_capacity_planning_module($pa_config, $agent_module, $server_id, $dbh);
+}
+
+########################################################################
+# Execute a capacity planning module.
+########################################################################
+sub exec_capacity_planning_module($$$$) {
+	my ($pa_config, $module, $server_id, $dbh) = @_;
+	my $pred;
+
+	# Retrieve the target module.
+	my $target_module = get_db_single_row($dbh, 'SELECT * FROM tagente_modulo WHERE id_agente_modulo = ?', $module->{'custom_integer_1'});
+	if (!defined($target_module)) {
+		pandora_update_module_on_error ($pa_config, $module, $dbh);
+		return;
 	}
-	
-	# Let's calculate statistical average using past data
-	# n = total of real data values
-	my ($n, $average, $temp1) = (0, 0, 0);
-	for (my $i=0; $i < 4; $i++) {
-		my ($first_data, $last_data, $average_interval);
-		my $sum_data = 0;
-		
-		$temp1 = $week_utimestamp[$i] + $agent_module->{'module_interval'};
-		# Get data for week $i in the past
-		
-		$average_interval = get_db_value ($dbh, 'SELECT AVG(datos)
-			FROM tagente_datos
-			WHERE id_agente_modulo = ?
-				AND utimestamp > ?
-				AND utimestamp < ?', $target_module->{'id_agente_modulo'}, $week_utimestamp[$i], $temp1);
-		
-		# Need to get data outside interval because no data.
-		if (!(defined($average_interval)) || ($average_interval == 0)) {
-			$last_data = get_db_value ($dbh, 'SELECT datos
-				FROM tagente_datos
-				WHERE id_agente_modulo = ?
-					AND utimestamp > ?
-				LIMIT 1', $target_module->{'id_agente_modulo'}, $week_utimestamp[$i]);
-			next unless defined ($last_data);
-			$first_data = get_db_value ($dbh, 'SELECT datos
-				FROM tagente_datos
-				WHERE id_agente_modulo = ?
-					AND utimestamp < ?
-				LIMIT 1', $target_module->{'id_agente_modulo'}, $temp1);
-			next unless defined ($first_data);
-			$sum_data++ if ($last_data != 0);
-			$sum_data++ if ($first_data != 0);
-			$week_data[$i] = ($sum_data > 0) ? (($last_data + $first_data) / $sum_data) : 0;
-		}
-		else {
-			$week_data[$i] = $average_interval;
-		}
-		
-		# It's possible that one of the week_data[i] values was not valid (NULL)
-		# so recheck it and relay on n=0 for "no data" values set to 0 in result
-		# Calculate total ammount of valida data for each data sample
-		if ((is_numeric($week_data[$i])) && ($week_data[$i] > 0)) {
-			$n++;
-			# Average SUM
-			$average = $average + $week_data[$i];
-		}
+
+	# Set the period.
+	my $period;
+
+	# Weekly.
+	if ($module->{'custom_integer_2'} == 0) {
+		$period = 604800;
 	}
-	
-	# Real average value
-	$average = ($n > 0) ? ($average / $n) : 0;
-	
-	# (PROC) Compare with current data
-	if ($prediction_mode == 0) {
-		# Calculate typical deviation
-		my $typical_deviation = 0;
-		for (my $i=0; $i< $n; $i++) {
-			if ((is_numeric($week_data[$i])) && ($week_data[$i] > 0)) {
-				$typical_deviation = $typical_deviation + (($week_data[$i] - $average)**2);
+	# Monthly.
+	elsif ($module->{'custom_integer_2'} == 1) {
+		$period = 2678400;
+	}
+	# Daily.
+	else {
+		$period = 86400;
+	}
+
+	# Set other parameters.
+	my $now = time();
+	my $from = $now - $period;
+	my $type = $module->{'custom_string_2'};
+	my $target_value = $module->{'custom_string_1'};
+
+	# Fit a line of the form: y = theta_0 + x * theta_1
+	my ($theta_0, $theta_1);
+	eval {
+		($theta_0, $theta_1) = linear_regression($target_module, $from, $now, $dbh);
+	};
+	if (!defined($theta_0) || !defined($theta_1)) {
+		pandora_update_module_on_error ($pa_config, $module, $dbh);
+		return;
+	}
+
+	# Predict the value.
+	if ($type eq 'estimation_absolute') {
+		# y = theta_0 + x * theta_1
+		$pred = $theta_0 + ($now + $target_value) * $theta_1;
+	}
+	# Predict the date.
+	else {
+		# Infinity.
+		if ($theta_1 == 0) {
+			$pred = -1;
+		} else {
+			# x = (y - theta_0) / theta_1
+			$pred = ($target_value - $theta_0) / $theta_1;
+
+			# Convert the prediction from a unix timestamp to days from now.
+			$pred = ($pred - $now) / 86400;
+
+			# We are not interested in past dates.
+			if ($pred < 0) {
+				$pred = -1;
 			}
 		}
-		$typical_deviation = ($n > 1) ? sqrt ($typical_deviation / ($n-1)) : 0;
-		
-		my $current_value = get_db_value ($dbh, 'SELECT datos
-			FROM tagente_estado
-			WHERE id_agente_modulo = ?', $target_module->{'id_agente_modulo'});
-		if ( ($current_value > ($average - $typical_deviation)) && ($current_value < ($average + $typical_deviation)) ){
-			$module_data = 1; # OK !!
-		}
-		else {
-			$module_data = 0; # Out of predictions
-		}
-	}
-	else {
-		# Prediction based on data
-		$module_data = $average;
 	}
 	
-	my %data = ("data" => $module_data);
-	pandora_process_module ($pa_config, \%data, '', $agent_module, '', $timestamp, $utimestamp, $server_id, $dbh);
-	
-	my $agent_os_version = get_db_value ($dbh, 'SELECT os_version
-		FROM tagente
-		WHERE id_agente = ?', $agent_module->{'id_agente'});
+	# Update the module.
+	my %data = ("data" => $pred);
+	my $utimestamp = time ();
+	my $timestamp = strftime ("%Y-%m-%d %H:%M:%S", localtime($utimestamp));
+	pandora_process_module ($pa_config, \%data, '', $module, '', $timestamp, $utimestamp, $server_id, $dbh);
 	
+	# Update the agent.
+	my $agent_os_version = get_db_value ($dbh, 'SELECT os_version FROM tagente WHERE id_agente = ?', $module->{'id_agente'});
 	if ($agent_os_version eq ''){
 		$agent_os_version = $pa_config->{'servername'}.'_Prediction';
 	}
-	
-	pandora_update_agent ($pa_config, $timestamp, $agent_module->{'id_agente'}, undef, undef, -1, $dbh);
+	pandora_update_agent ($pa_config, $timestamp, $module->{'id_agente'}, undef, undef, -1, $dbh);
+}
+
+########################################################################
+# Perform linear regression on the given module.
+########################################################################
+sub linear_regression($$$$) {
+	my ($module, $from, $to, $dbh) = @_;
+
+	# Should not happen.
+	return if ($module->{'module_interval'} < 1);
+
+	# Retrieve the data.
+	my @rows = get_db_rows($dbh, 'SELECT datos, utimestamp FROM tagente_datos WHERE id_agente_modulo = ? AND utimestamp > ? AND utimestamp < ? ORDER BY utimestamp ASC', $module->{'id_agente_modulo'}, $from, $to);
+	return if scalar(@rows) <= 0;
+
+	# Perform linear regression on the data.
+	my $reg = PandoraFMS::Statistics::Regression->new( "linear regression", ["const", "x"] );
+	my $prev_utimestamp = $from;
+	foreach my $row (@rows) {
+		my ($utimestamp, $data) = ($row->{'utimestamp'}, $row->{'datos'});
+
+		# Elapsed time.
+		my $elapsed = $utimestamp - $prev_utimestamp;
+		$elapsed = 1 unless $elapsed > 0;
+		$prev_utimestamp = $utimestamp;
+
+		# Number of points (Pandora compresses data!)
+		my $local_count = floor($elapsed / $module->{'module_interval'});
+		$local_count = 1 if $local_count <= 0;
+
+		# Add the points.
+		for (my $i = 0; $i < $local_count; $i++) {
+			$reg->include($data, [1.0, $utimestamp]);
+		}
+	}
+
+	return $reg->theta();
 }
 
 1;
diff --git a/pandora_server/lib/PandoraFMS/Statistics/Regression.pm b/pandora_server/lib/PandoraFMS/Statistics/Regression.pm
new file mode 100644
index 0000000000..832df4d648
--- /dev/null
+++ b/pandora_server/lib/PandoraFMS/Statistics/Regression.pm
@@ -0,0 +1,776 @@
+################################################################
+# Statistics::Regression package included in Pandora FMS.
+# See: https://metacpan.org/pod/Statistics::Regression
+################################################################
+package PandoraFMS::Statistics::Regression;
+
+$VERSION = '0.53';
+my $DATE = "2007/07/07";
+my $MNAME= "$0::Statistics::Regression";
+
+use strict;
+use warnings FATAL => qw{ uninitialized };
+
+use Carp;
+
+################################################################
+=pod
+
+=head1 NAME
+
+  Regression.pm - weighted linear regression package (line+plane fitting)
+
+
+=head1 SYNOPSIS
+
+  use Statistics::Regression;
+
+  # Create regression object
+  my $reg = Statistics::Regression->new( "sample regression", [ "const", "someX", "someY" ] );
+
+  # Add data points
+  $reg->include( 2.0, [ 1.0, 3.0, -1.0 ] );
+  $reg->include( 1.0, [ 1.0, 5.0, 2.0 ] );
+  $reg->include( 20.0, [ 1.0, 31.0, 0.0 ] );
+  $reg->include( 15.0, [ 1.0, 11.0, 2.0 ] );
+
+or
+
+  my %d;
+  $d{const} = 1.0; $d{someX}= 5.0; $d{someY}= 2.0; $d{ignored}="anything else";
+  $reg->include( 3.0, \%d );  # names are picked off the Regression specification
+
+Please note that *you* must provide the constant if you want one.
+
+  # Finally, print the result
+  $reg->print();
+
+This prints the following:
+
+  ****************************************************************
+  Regression 'sample regression'
+  ****************************************************************
+  Name           	       Theta	      StdErr	 T-stat
+  [0='const']    	      0.2950	      6.0512	   0.05
+  [1='someX']    	      0.6723	      0.3278	   2.05
+  [2='someY']    	      1.0688	      2.7954	   0.38
+
+  R^2= 0.808, N= 4
+  ****************************************************************
+
+
+
+The hash input method has the advantage that you can now just
+fill the observation hashes with all your variables, and use the
+same code to run regression, changing the regression
+specification at one and only one spot (the new() invokation).
+You do not need to change the inputs in the include() statement.
+For example,
+
+  my @obs;  ## a global variable.  observations are like: %oneobs= %{$obs[1]};
+
+  sub run_regression {
+    my $reg = Statistics::Regression->new( $_[0], $_[2] );
+    foreach my $obshashptr (@obs) { $reg->include( $_[1], $_[3] ); }
+    $reg->print();
+  }
+
+  run_regression("bivariate regression",  $obshashptr->{someY}, [ "const", "someX" ] );
+  run_regression("trivariate regression",  $obshashptr->{someY}, [ "const", "someX", "someZ" ] );
+
+
+
+Of course, you can use the subroutines to do the printing work yourself:
+
+  my @theta  = $reg->theta();
+  my @se     = $reg->standarderrors();
+  my $rsq    = $reg->rsq();
+  my $adjrsq = $reg->adjrsq();
+  my $ybar   = $reg->ybar();  ## the average of the y vector
+  my $sst    = $reg->sst();  ## the sum-squares-total
+  my $sigmasq= $reg->sigmasq();  ## the variance of the residual
+  my $k      = $reg->k();   ## the number of variables
+  my $n      = $reg->n();   ## the number of observations
+
+In addition, there are some other helper routines, and a
+subroutine linearcombination_variance().  If you don't know what
+this is, don't use it.
+
+
+=head1 BACKGROUND WARNING
+
+You should have an understanding of OLS regressions if you want
+to use this package.  You can get this from an introductory
+college econometrics class and/or from most intermediate college
+statistics classes.  If you do not have this background
+knowledge, then this package will remain a mystery to you.
+There is no support for this package--please don't expect any.
+
+
+=head1 DESCRIPTION
+
+Regression.pm is a multivariate linear regression package.  That
+is, it estimates the c coefficients for a line-fit of the type
+
+  y= c(0)*x(0) + c(1)*x1 + c(2)*x2 + ... + c(k)*xk
+
+given a data set of N observations, each with k independent x
+variables and one y variable.  Naturally, N must be greater than
+k---and preferably considerably greater.  Any reasonable
+undergraduate statistics book will explain what a regression is.
+Most of the time, the user will provide a constant ('1') as x(0)
+for each observation in order to allow the regression package to
+fit an intercept.
+
+
+=head1 ALGORITHM
+
+=head2 Original Algorithm (ALGOL-60):
+
+	W.  M.  Gentleman, University of Waterloo, "Basic
+	Description For Large, Sparse Or Weighted Linear Least
+	Squares Problems (Algorithm AS 75)," Applied Statistics
+	(1974) Vol 23; No. 3
+
+Gentleman's algorithm is I<the> statistical standard. Insertion
+of a new observation can be done one observation at any time
+(WITH A WEIGHT!), and still only takes a low quadratic time.
+The storage space requirement is of quadratic order (in the
+indep variables). A practically infinite number of observations
+can easily be processed!
+
+=head2 Internal Data Structures
+
+R=Rbar is an upperright triangular matrix, kept in normalized
+form with implicit 1's on the diagonal.  D is a diagonal scaling
+matrix.  These correspond to "standard Regression usage" as
+
+                X' X  = R' D R
+
+A backsubsitution routine (in thetacov) allows to invert the R
+matrix (the inverse is upper-right triangular, too!). Call this
+matrix H, that is H=R^(-1).
+
+	  (X' X)^(-1) = [(R' D^(1/2)') (D^(1/2) R)]^(-1)
+	  = [ R^-1 D^(-1/2) ] [ R^-1 D^(-1/2) ]'
+
+
+=head1 BUGS/PROBLEMS
+
+None known.
+
+=over 4
+
+=item Perl Problem
+
+Unfortunately, perl is unaware of IEEE number representations.
+This makes it a pain to test whether an observation contains any
+missing variables (coded as 'NaN' in Regression.pm).
+
+=back
+
+=for comment
+pod2html -noindex -title "perl weighted least squares regression package" Regression.pm > Regression.html
+
+
+=head1 VERSION and RECENT CHANGES
+
+2007/04/04:  Added Coefficient Standard Errors
+
+2007/07/01:  Added self-test use (if invoked as perl Regression.pm)
+	     at the end.  cleaned up some print sprintf.
+             changed syntax on new() to eliminate passing K.
+
+2007/07/07:  allowed passing hash with names to include().
+
+
+=head1 AUTHOR
+
+Naturally, Gentleman invented this algorithm.  It was adaptated
+by Ivo Welch.  Alan Miller (alan\@dmsmelb.mel.dms.CSIRO.AU)
+pointed out nicer ways to compute the R^2. Ivan Tubert-Brohman
+helped wrap the module as as a standard CPAN distribution.
+
+=head1 LICENSE
+
+This module is released for free public use under a GPL license.
+
+(C) Ivo Welch, 2001,2004, 2007.
+
+=cut
+
+
+################################################################
+#### let's start with handling of missing data ("nan" or "NaN")
+################################################################
+use constant TINY => 1e-8;
+my $nan= "NaN";
+
+sub isNaN { 
+  if ($_[0] !~ /[0-9nan]/) { confess "$MNAME:isNaN: definitely not a number in NaN: '$_[0]'"; }
+  return ($_[0]=~ /NaN/i) || ($_[0] != $_[0]);
+}
+
+
+################################################################
+### my $reg = Statistics::Regression->new($regname, \@var_names)
+###
+### Receives the number of variables on each observations (i.e.,
+### an integer) and returns the blessed data structure as a
+### Statistics::Regression object. Also takes an optional name
+### for this regression to remember, as well as a reference to a
+### k*1 array of names for the X coefficients.
+###
+### I have now made it mandatory to give some names.
+###
+################################################################
+sub new {
+  my $classname= shift;  (!ref($classname)) or confess "$MNAME:new: bad class call to new ($classname).\n";
+  my $regname= shift || "no-name";
+  my $xnameptr= shift;
+
+  (defined($regname)) or confess "$MNAME:new: bad name in for regression.  no undef allowed.\n";
+  (!ref($regname)) or confess "$MNAME:new: bad name in for regression.\n";
+  (defined($xnameptr)) or confess "$MNAME:new: You must provide variable names, because this tells me the number of columns.  no undef allowed.\n";
+  (ref($xnameptr) eq "ARRAY") or confess "$MNAME:new: bad xnames for regression. Must be pointer.\n";
+
+  my $K= (@{$xnameptr});
+
+  if (!defined($K)) { confess "$MNAME:new: cannot determine the number of variables"; }
+  if ($K<=1) { confess "$MNAME:new: Cannot run a regression without at least two variables."; }
+
+  sub zerovec {
+    my @rv;
+    for (my $i=0; $i<=$_[0]; ++$i) { $rv[$i]=0; } 
+    return \@rv;
+  }
+
+  bless {
+	 k => $K,
+	 regname => $regname,
+	 xnames => $xnameptr,
+
+	 # constantly updated
+	 n => 0,
+	 sse => 0,
+	 syy => 0,
+	 sy => 0,
+	 wghtn => 0,
+	 d => zerovec($K),
+	 thetabar => zerovec($K),
+	 rbarsize => ($K+1)*$K/2+1,
+	 rbar => zerovec(($K+1)*$K/2+1),
+
+	 # other constants
+	 neverabort => 0,
+
+	 # computed on demand
+	 theta => undef,
+	 sigmasq => undef,
+	 rsq => undef,
+	 adjrsq => undef
+	}, $classname;
+}
+
+
+################################################################
+### $reg->include( $y, [ $x1, $x2, $x3 ... $xk ], $weight );
+###
+### Add one new observation. The weight is optional. Note that
+### inclusion with a weight of -1 can be used to delete an
+### observation.
+###
+### The error checking and transfer of arguments is clutzy, but
+### works.  if I had POSIX assured, I could do better number
+### checking.  right now, I don't do any.
+###
+### Returns the number of observations so far included.
+################################################################
+sub include {
+  my $this = shift;
+  my $yelement= shift;
+  my $xin= shift;
+  my $weight= shift || 1.0;
+
+  # modest input checking;
+  (ref($this)) or confess "$MNAME:include: bad class call to include.\n";
+  (defined($yelement)) or confess "$MNAME:include: bad call for y to include.  no undef allowed.\n";
+  (!ref($yelement)) or confess "$MNAME:include: bad call for y to include.  need scalar.\n";
+  (defined($xin)) or confess "$MNAME:include: bad call for x to include.  no undef allowed.\n";
+  (ref($xin)) or confess "$MNAME:include: bad call for x to include. need reference.\n";
+  (!ref($weight)) or confess "$MNAME:include: bad call for weight to include. need scalar.\n";
+
+
+  # omit observations with missing observations;
+  (defined($yelement)) or confess "$MNAME:include: you must give a y value (predictor).";
+  (isNaN($yelement)) and return $this->{n};  # ignore this observation;
+  ## should check for number, not string
+
+
+  # check and transfer the X vector
+  my @xrow;
+  if (ref($xin) eq "ARRAY") { @xrow= @{$xin}; }
+  else {
+    my $xctr=0;
+    foreach my $nm (@{$this->{xnames}}) {
+      (defined($xin->{$nm})) or confess "$MNAME:include: Variable '$nm' needs to be set in hash.\n";
+      $xrow[$xctr]= $xin->{$nm};
+      ++$xctr;
+    }
+  }
+
+  my @xcopy;
+  for (my $i=1; $i<=$this->{k}; ++$i) { 
+    (defined($xrow[$i-1]))
+      or confess "$MNAME:include: Internal Error: at N=".($this->{n}).", the x[".($i-1)."] is undef.  use NaN for missing.";
+    (isNaN($xrow[$i-1])) and return $this->{n};
+    $xcopy[$i]= $xrow[$i-1];
+    ## should check for number, not string
+  }
+
+  ################ now comes the real routine
+
+  $this->{syy}+= ($weight*($yelement*$yelement));
+  $this->{sy}+= ($weight*($yelement));
+  if ($weight>=0.0) { ++$this->{n}; } else { --$this->{n}; }
+
+  $this->{wghtn}+= $weight;
+
+  for (my $i=1; $i<=$this->{k};++$i) {
+    if ($weight==0.0) { return $this->{n}; }
+    if (abs($xcopy[$i])>(TINY)) {
+      my $xi=$xcopy[$i];
+
+      my $di=$this->{d}->[$i];
+      my $dprimei=$di+$weight*($xi*$xi);
+      my $cbar= $di/$dprimei;
+      my $sbar= $weight*$xi/$dprimei;
+      $weight*=($cbar);
+      $this->{d}->[$i]=$dprimei;
+      my $nextr=int( (($i-1)*( (2.0*$this->{k}-$i))/2.0+1) );
+      if (!($nextr<=$this->{rbarsize}) ) { confess "$MNAME:include: Internal Error 2"; }
+      my $xk;
+      for (my $kc=$i+1;$kc<=$this->{k};++$kc) {
+	$xk=$xcopy[$kc]; $xcopy[$kc]=$xk-$xi*$this->{rbar}->[$nextr];
+	$this->{rbar}->[$nextr]= $cbar * $this->{rbar}->[$nextr]+$sbar*$xk;
+	++$nextr;
+      }
+      $xk=$yelement; $yelement-= $xi*$this->{thetabar}->[$i];
+      $this->{thetabar}->[$i]= $cbar*$this->{thetabar}->[$i]+$sbar*$xk;
+    }
+  }
+  $this->{sse}+=$weight*($yelement*$yelement);
+
+  # indicate that Theta is garbage now
+  $this->{theta}= undef;
+  $this->{sigmasq}= undef; $this->{rsq}= undef; $this->{adjrsq}= undef;
+
+  return $this->{n};
+}
+
+
+################################################################
+###
+### $reg->rsq(), $reg->adjrsq(), $reg->sigmasq(), $reg->ybar(),
+### $reg->sst(), $reg->k(), $reg->n()
+###
+### These methods provide common auxiliary information.  rsq,
+### adjrsq, sigmasq, sst, and ybar have not been checked but are
+### likely correct.  The results are stored for later usage,
+### although this is somewhat unnecessary because the
+### computation is so simple anyway.
+################################################################
+
+sub rsq {
+  my $this= shift;
+  return $this->{rsq}= 1.0- $this->{sse} / $this->sst();
+}
+
+sub adjrsq {
+  my $this= shift;
+  return $this->{adjrsq}= 1.0- (1.0- $this->rsq())*($this->{n}-1)/($this->{n} - $this->{k});
+}
+
+sub sigmasq {
+  my $this= shift;
+  return $this->{sigmasq}= ($this->{n}<=$this->{k}) ? "Inf" : ($this->{sse}/($this->{n} - $this->{k}));
+}
+
+sub ybar {
+  my $this= shift;
+  return $this->{ybar}= $this->{sy}/$this->{wghtn};
+}
+
+sub sst {
+  my $this= shift;
+  return $this->{sst}= ($this->{syy} - $this->{wghtn}*($this->ybar())**2);
+}
+
+sub k {
+  my $this= shift;
+  return $this->{k};
+}
+sub n {
+  my $this= shift;
+  return $this->{n};
+}
+
+
+
+################################################################
+###  $reg->print()  [no arguments!]
+###
+### prints the estimated coefficients, and R^2 and N. For an
+### example see the Synopsis.
+################################################################
+sub print {
+  my $this= shift;
+
+  print "****************************************************************\n";
+  print "Regression '$this->{regname}'\n";
+  print "****************************************************************\n";
+
+  my $theta= $this->theta();
+  my @standarderrors= $this->standarderrors();
+
+  printf "%-15s\t%12s\t%12s\t%7s\n", "Name", "Theta", "StdErr", "T-stat";
+  for (my $i=0; $i< $this->k(); ++$i) {
+    my $name= "[$i".(defined($this->{xnames}->[$i]) ? "='$this->{xnames}->[$i]'":"")."]";
+    printf "%-15s\t", $name;
+    printf "%12.4f\t", $theta->[$i];
+    printf "%12.4f\t", $standarderrors[$i];
+    printf "%7.2f", ($theta->[$i]/$standarderrors[$i]);
+    printf "\n";
+  }
+
+  print "\nR^2= ".sprintf("%.3f", $this->rsq()).", N= ".$this->n().", K= ".$this->k()."\n";
+  print "****************************************************************\n";
+}
+
+
+
+################################################################
+### $theta = $reg->theta or @theta = $reg->theta
+###
+### This is the work horse.  It estimates and returns the vector
+### of coefficients. In scalar context returns an array
+### reference; in list context it returns the list of
+### coefficients.
+################################################################
+sub theta {
+  my $this= shift;
+
+  if (defined($this->{theta})) { 
+    return wantarray ? @{$this->{theta}} : $this->{theta}; 
+  }
+
+  if ($this->{n} < $this->{k}) { return; }
+  for (my $i=($this->{k}); $i>=1; --$i) {
+    $this->{theta}->[$i]= $this->{thetabar}->[$i];
+    my $nextr= int (($i-1)*((2.0*$this->{k}-$i))/2.0+1);
+    if (!($nextr<=$this->{rbarsize})) { confess "$MNAME:theta: Internal Error 3"; }
+    for (my $kc=$i+1;$kc<=$this->{k};++$kc) {
+      $this->{theta}->[$i]-=($this->{rbar}->[$nextr]*$this->{theta}->[$kc]);
+      ++$nextr;
+    }
+  }
+
+
+  my $ref = $this->{theta}; shift(@$ref); # we are counting from 0
+
+  # if in a scalar context, otherwise please return the array directly
+  wantarray ? @{$this->{theta}} : $this->{theta};
+}
+
+################################################################
+### @se= $reg->standarderrors()
+###
+### This is the most difficult routine.  Take it on faith.
+###
+###  R=Rbar is an upperright triangular matrix, kept in normalized
+###  form with implicit 1's on the diagonal.  D is a diagonal scaling
+###  matrix.  These correspond to "standard Regression usage" as
+###
+###                X' X  = R' D R
+###
+###  A backsubsitution routine (in thetacov) allows to invert the R
+###  matrix (the inverse is upper-right triangular, too!). Call this
+###  matrix H, that is H=R^(-1).
+###
+###	  (X' X)^(-1) = [(R' D^(1/2)') (D^(1/2) R)]^(-1)
+###	  = [ R^-1 D^(-1/2) ] [ R^-1 D^(-1/2) ]'
+###
+###  Let's work this for our example, where
+###
+###  $reg->include( 2.0, [ 1.0, 3.0, -1.0 ] );
+###  $reg->include( 1.0, [ 1.0, 5.0, 2.0 ] );
+###  $reg->include( 20.0, [ 1.0, 31.0, 0.0 ] );
+###  $reg->include( 15.0, [ 1.0, 11.0, 2.0 ] );
+###
+###  For debuggin, the X'X matrix for our example is
+###	4, 50, 3
+###	50 1116 29
+###	3 29 9
+###
+###  Its inverse is
+###	 0.70967 -0.027992 -0.146360
+###	-0.02799  0.002082  0.002622
+###	-0.14636  0.002622  0.151450
+###
+###  Internally, this is kept as follows
+###
+###  R is 1, 0, 0
+###       12.5 1 0
+###       0.75 -0.0173 1
+###
+###  d is the diagonal(4,491,6.603) matrix, which as 1/sqrt becomes dhi= 0.5, 0.04513, 0.3892
+###
+###  R * d * R' is indeed the X' X matrix.
+###
+###  The inverse of R is
+###
+###  1, 0, 0
+###  -12.5 1 0
+###  -0.9664 0.01731 1
+###
+###  in R, t(solve(R) %*% dhi) %*% t( t(solve(R) %*% dhi) ) is the correct inverse.
+###
+### The routine has a debug switch which makes it come out very verbose.
+################################################################
+my $debug=0;
+
+sub standarderrors {
+  my $this= shift;
+  our $K= $this->{k};  # convenience
+
+  our @u;
+  sub ui {
+    if ($debug) {
+      ($_[0]<1)||($_[0]>$K) and confess "$MNAME:standarderrors: bad index 0 $_[0]\n";
+      ($_[1]<1)||($_[1]>$K) and confess "$MNAME:standarderrors: bad index 1 $_[0]\n";
+    }
+    return (($K*($_[0]-1))+($_[1]-1));
+  }
+  sub giveuclear { 
+    for (my $i=0; $i<($K**2); ++$i) { $u[$i]=0.0; }
+    return (wantarray) ? @u : \@u;
+  }
+
+  sub u { return $u[ui($_[0], $_[1])]; }
+  sub setu { return $u[ui($_[0], $_[1])]= $_[2]; }
+  sub add2u { return $u[ui($_[0], $_[1])]+= $_[2]; }
+  sub mult2u { return $u[ui($_[0], $_[1])]*= $_[2]; }
+
+  (defined($K)) or confess "$MNAME:standarderrors: Internal Error: I forgot the number of variables.\n";
+  if ($debug) {
+    print "The Start Matrix is:\n";
+    for (my $i=1; $i<=$K; ++$i) {
+      print "[$i]:\t";
+      for (my $j=1; $j<=$K; ++$j) {
+	print $this->rbr($i, $j)."\t";
+      }
+      print "\n";
+    }
+    print "The Start d vector is:\n";
+    for (my $i=1; $i<=$K; ++$i) {
+      print "".$this->{d}[$i]."\t";
+    }
+    print "\n";
+  }
+
+  sub rbrindex {
+    return ($_[0] == $_[1]) ? -9 :
+      ($_[0]>$_[1]) ? -8 :
+	((($_[0]-1.0)* (2.0*$K-$_[0])/2.0+1.0) + $_[1] - 1 - $_[0] ); }
+
+  # now a real member routine;
+  sub rbr {
+    my $this= shift;
+    return ($_[0] == $_[1]) ? 1 : ( ($_[0]>$_[1]) ? 0 : ($this->{rbar}[rbrindex($_[0],$_[1])]));
+  }
+
+  my $u= giveuclear();
+
+  for (my $j=$K; $j>=1; --$j) {
+    setu($j,$j, 1.0/($this->rbr($j,$j)));
+    for (my $k=$j-1; $k>=1; --$k) {
+      setu($k,$j,0);
+      for (my $i=$k+1; $i<=$j; ++$i) { add2u($k,$j, $this->rbr($k,$i)*u($i,$j)); }
+      mult2u($k,$j, (-1.0)/$this->rbr($k,$k));
+    }
+  }
+
+  if ($debug) {
+    print "The Inverse Matrix of R is:\n";
+    for (my $i=1; $i<=$K; ++$i) {
+      print "[$i]:\t";
+      for (my $j=1; $j<=$K; ++$j) {
+	print $u[ui($i,$j)]."\t";
+      }
+      print "\n";
+    }
+  }
+
+  for (my $i=1;$i<=$K;++$i) {
+    for (my $j=1;$j<=$K;++$j) {
+      if (abs($this->{d}[$j])<TINY) {
+	mult2u($i,$j, sqrt(1.0/TINY));
+	if (abs($this->{d}[$j])==0.0) {
+	  if ($this->{neverabort}) {
+	    for (my $i=0; $i<($K**2); ++$i) { $u[$i]= "NaN"; }
+	    return undef;
+	  }
+	  else { confess "$MNAME:standarderrors: I cannot compute the theta-covariance matrix for variable $j ".($this->{d}[$j])."\n"; }
+	}
+      }
+      else { mult2u($i,$j, sqrt(1.0/$this->{d}[$j])); }
+    }
+  }
+
+  if ($debug) {
+    print "The Inverse Matrix of R multipled by D^(-1/2) is:\n";
+    for (my $i=1; $i<=$K; ++$i) {
+      print "[$i]:\t";
+      for (my $j=1; $j<=$K; ++$j) {
+	print $u[ui($i,$j)]."\t";
+      }
+      print "\n";
+    }
+  }
+
+  $this->{sigmasq}= ($this->{n}<=$K) ? "Inf" : ($this->{sse}/($this->{n} - $K));
+  my @xpxinv;
+  for (my $i=1;$i<=$K; ++$i) {
+    for (my $j=$i;$j<=$K;++$j) {
+      my $indexij= ui($i,$j);
+      $xpxinv[$indexij]= 0.0;
+      for (my $k=1;$k<=$K;++$k) {
+	$xpxinv[$indexij] += $u[ui($i,$k)]*$u[ui($j,$k)];
+      }
+      $xpxinv[ui($j,$i)]= $xpxinv[$indexij]; # this is symmetric
+    }
+  }
+
+  if ($debug) {
+    print "The full inverse matrix of X'X is:\n";
+    for (my $i=1; $i<=$K; ++$i) {
+      print "[$i]:\t";
+      for (my $j=1; $j<=$K; ++$j) {
+	print $xpxinv[ui($i,$j)]."\t";
+      }
+      print "\n";
+    }
+    print "The sigma^2 is ".$this->{sigmasq}."\n";
+  }
+
+  ## 99% of the usage here will be to print the diagonal elements of sqrt ( (X' X) sigma^2 )
+  ## so, let's make this our first returned object;
+
+  my @secoefs;
+  for (my $i=1; $i<=$K; ++$i) {
+    $secoefs[$i-1]= sqrt($xpxinv[ui($i,$i)] * $this->{sigmasq});
+  }
+  if ($debug) { for (my $i=0; $i<$K; ++$i) { print " $secoefs[$i] "; } print "\n"; }
+
+  # the following are clever return methods;  if the user goes over the secoefs,
+  # almost surely an error will result, because he will run into xpxinv.  For special
+  # usage, however, xpxinv may still be useful.
+
+  return ( @secoefs, \@xpxinv, $this->sigmasq );
+}
+
+
+################################
+sub linearcombination_variance {
+  my $this= shift;
+  our $K= $this->{k};  # convenience
+
+  my @linear= @_;
+
+  ($#linear+1 == $K) or confess "$MNAME:linearcombination_variance: ".
+    "Sorry, you must give a vector of length $K, not ".($#linear+1)."\n";
+
+  my @allback= $this->standarderrors();  # compute everything we need;
+
+  my $xpxinv= $allback[$this->{k}];
+  my $sigmasq= $allback[$this->{k}+1];
+
+  my $sum=0;
+  for (my $i=1; $i<=$K; ++$i) {
+    for (my $j=1; $j<=$K; ++$j) {
+      $sum+= $linear[$i-1]*$linear[$j-1]*$xpxinv->[ui($i,$j)];
+    }
+  }
+  $sum*=$sigmasq;
+  return $sum;
+}
+
+
+################################################################
+### sub dump() was used internally for debugging.
+################################################################
+sub dump {
+  my $this= $_[0];
+  print "****************************************************************\n";
+  print "Regression '$this->{regname}'\n";
+  print "****************************************************************\n";
+  sub print1val {
+    no strict;
+    print "$_[1]($_[2])=\t". ((defined($_[0]->{ $_[2] }) ? $_[0]->{ $_[2] } : "intentionally undef"));
+
+    my $ref=$_[0]->{ $_[2] };
+
+    if (ref($ref) eq 'ARRAY') {
+      my $arrayref= $ref;
+      print " $#$arrayref+1 elements:\n";
+      if ($#$arrayref>30) {
+	print "\t";
+	for(my $i=0; $i<$#$arrayref+1; ++$i) { print "$i='$arrayref->[$i]';"; }
+	print "\n";
+      }
+      else {
+	for(my $i=0; $i<$#$arrayref+1; ++$i) { print "\t$i=\t'$arrayref->[$i]'\n"; }
+      }
+    }
+    elsif (ref($ref) eq 'HASH') {
+      my $hashref= $ref;
+      print " ".scalar(keys(%$hashref))." elements\n";
+      while (my ($key, $val) = each(%$hashref)) {
+	print "\t'$key'=>'$val';\n";
+      }
+    }
+    else {
+      print " [was scalar]\n"; }
+  }
+
+  while (my ($key, $val) = each(%$this)) {
+    $this->print1val($key, $key);
+  }
+  print "****************************************************************\n";
+}
+
+################################################################
+### The Test Program.  Invoke as "perl Regression.pm".
+################################################################
+
+
+if ($0 eq "Regression.pm") {
+
+  # Create regression object
+  my $reg = Statistics::Regression->new( "sample regression", [ "const", "someX", "someY" ] );
+
+  # Add data points
+  $reg->include( 2.0, [ 1.0, 3.0, -1.0 ] );
+  $reg->include( 1.0, [ 1.0, 5.0, 2.0 ] );
+  $reg->include( 20.0, [ 1.0, 31.0, 0.0 ] );
+
+  my %inhash= ( const => 1.0, someX => 11.0, someY => 2.0, ignored => "ignored" );
+  $reg->include( 15.0, \%inhash );
+  # $reg->include( 15.0, [ 1.0, 11.0, 2.0 ] );
+
+  # Print the result
+  $reg->print();
+}
+
+
+1;