Skip to content

Commit

Permalink
Fixing Codification filter issue when one of the processed columns ha…
Browse files Browse the repository at this point in the history
…ve a max-length constraint.
  • Loading branch information
cesarsouza committed Apr 6, 2014
1 parent 549d27b commit 1e56577
Show file tree
Hide file tree
Showing 9 changed files with 211 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,8 @@
<Folder Include="images\" />
<Folder Include="images\" />
<Folder Include="diagrams\classes\" />
<Folder Include="images\filters\" />
<Folder Include="images\hmm\" />
<Folder Include="resources\" />
<Folder Include="diagrams\" />
</ItemGroup>
Expand Down Expand Up @@ -640,6 +642,46 @@
<ImageId>Accord.Imaging.Filters</ImageId>
<AlternateText>Accord.Imaging.Filters</AlternateText>
</Content>
<Content Include="images\filters\input-table.png">
<ImageId>input-table</ImageId>
<AlternateText>input-table</AlternateText>
</Content>
<Content Include="images\filters\output-codification.png">
<ImageId>output-codification</ImageId>
<AlternateText>output-codification</AlternateText>
</Content>
<Content Include="images\filters\output-discretization.png">
<ImageId>output-discretization</ImageId>
<AlternateText>output-discretization</AlternateText>
</Content>
<Content Include="images\filters\output-projection.png">
<ImageId>output-projection</ImageId>
<AlternateText>output-projection</AlternateText>
</Content>
<Content Include="images\hmm\hmm-joint-probability.png">
<ImageId>hmm-joint-probability</ImageId>
<AlternateText>hmm-joint-probability</AlternateText>
</Content>
<Content Include="images\hmm\hmm-model.png">
<ImageId>hmm-model</ImageId>
<AlternateText>hmm-model</AlternateText>
</Content>
<Content Include="images\hmm\hmm-sequence-probability.png">
<ImageId>hmm-sequence-probability</ImageId>
<AlternateText>hmm-sequence-probability</AlternateText>
</Content>
<Content Include="images\hmm\hmm-tuple.png">
<ImageId>hmm-tuple</ImageId>
<AlternateText>hmm-tuple</AlternateText>
</Content>
<Content Include="images\hmm\map-decision.png">
<ImageId>map-decision</ImageId>
<AlternateText>map-decision</AlternateText>
</Content>
<Content Include="images\hmm\ml-decision.png">
<ImageId>ml-decision</ImageId>
<AlternateText>ml-decision</AlternateText>
</Content>
<Image Include="resources\Accord.NET.png">
<CopyToMedia>True</CopyToMedia>
<AlternateText>Accord.NET</AlternateText>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,8 @@ namespace Accord.Statistics.Distributions.Multivariate
///
[Serializable]
public class MultivariateEmpiricalDistribution : MultivariateContinuousDistribution,
IFittableDistribution<double[], MultivariateEmpiricalOptions>
IFittableDistribution<double[], MultivariateEmpiricalOptions>,
ISampleableDistribution<double[]>
{

// Distribution parameters
Expand Down Expand Up @@ -434,5 +435,41 @@ public override object Clone()
return smoothing;
}

/// <summary>
/// Generates a random vector of observations from the current distribution.
/// </summary>
///
/// <param name="samples">The number of samples to generate.</param>
/// <returns>A random vector of observations drawn from this distribution.</returns>
///
public double[][] Generate(int samples)
{
var generator = Accord.Math.Tools.Random;

double[][] s = new double[samples][];
for (int i = 0; i < s.Length; i++)
{
int index = generator.Next(this.samples.Length);
s[i] = this.samples[index];
}

return s;
}

/// <summary>
/// Generates a random observation from the current distribution.
/// </summary>
///
/// <returns>A random observations drawn from this distribution.</returns>
///
public double[] Generate()
{
var generator = Accord.Math.Tools.Random;

int index = generator.Next(this.samples.Length);

return this.samples[index];
}

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ namespace Accord.Statistics.Distributions.Univariate
///
[Serializable]
public class EmpiricalDistribution : UnivariateContinuousDistribution,
IFittableDistribution<double, EmpiricalOptions>
IFittableDistribution<double, EmpiricalOptions>,
ISampleableDistribution<double>
{

// Distribution parameters
Expand Down Expand Up @@ -445,5 +446,40 @@ public static double SmoothingRule(double[] observations)
return sigma * Math.Pow(4.0 / (3.0 * observations.Length), -1 / 5.0);
}

/// <summary>
/// Generates a random vector of observations from the current distribution.
/// </summary>
///
/// <param name="samples">The number of samples to generate.</param>
/// <returns>A random vector of observations drawn from this distribution.</returns>
///
public double[] Generate(int samples)
{
var generator = Accord.Math.Tools.Random;

double[] s = new double[samples];
for (int i = 0; i < s.Length; i++)
{
int index = generator.Next(this.samples.Length);
s[i] = this.samples[index];
}

return s;
}

/// <summary>
/// Generates a random observation from the current distribution.
/// </summary>
///
/// <returns>A random observations drawn from this distribution.</returns>
///
public double Generate()
{
var generator = Accord.Math.Tools.Random;

int index = generator.Next(this.samples.Length);

return this.samples[index];
}
}
}
1 change: 1 addition & 0 deletions Sources/Accord.Statistics/Filters/Codification.cs
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,7 @@ protected override DataTable ProcessFilter(DataTable data)
foreach (Options options in Columns)
{
// Change its type from string to integer
result.Columns[options.ColumnName].MaxLength = -1;
result.Columns[options.ColumnName].DataType = typeof(int);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ public class MaximumLikelihoodLearning<TDistribution> : ISupervisedLearning

private HiddenMarkovModel<TDistribution> model;
private bool useLaplaceRule = true;
private bool useWeights = false;

private IFittingOptions fittingOptions;

Expand All @@ -141,6 +142,19 @@ public HiddenMarkovModel<TDistribution> Model
get { return model; }
}

/// <summary>
/// Gets or sets whether the emission fitting algorithm should
/// present weighted samples or simply the clustered samples to
/// the <see cref="IDistribution.Fit(System.Array)">density estimation
/// methods</see>.
/// </summary>
///
public bool UseWeights
{
get { return useWeights; }
set { useWeights = value; }
}

/// <summary>
/// Gets or sets whether to use Laplace's rule
/// of succession to avoid zero probabilities.
Expand Down Expand Up @@ -204,7 +218,6 @@ public double Run(Array[] observations, int[][] paths)
obs[i] = convert(observations[i], model.Dimension);
}


// Grab model information
int N = observations.Length;
int states = model.States;
Expand All @@ -221,27 +234,55 @@ public double Run(Array[] observations, int[][] paths)
for (int j = 1; j < path.Length; j++)
transitions[path[j - 1], path[j]]++;

// 3. Count emissions for each state
List<double[]>[] clusters = new List<double[]>[model.States];
for (int i = 0; i < clusters.Length; i++)
clusters[i] = new List<double[]>();
if (useWeights)
{
int totalObservations = 0;
for (int i = 0; i < obs.Length; i++)
totalObservations += obs[i].Length;

// Count symbol frequencies per state
for (int i = 0; i < paths.Length; i++)
double[][] weights = new double[states][];
for (int i = 0; i < weights.Length; i++)
weights[i] = new double[totalObservations];

double[][] all = new double[totalObservations][];

for (int i = 0, c = 0; i < paths.Length; i++)
{
for (int t = 0; t < paths[i].Length; t++, c++)
{
int state = paths[i][t];
all[c] = obs[i][t];
weights[state][c] = 1;
}
}

for (int i = 0; i < model.States; i++)
model.Emissions[i].Fit(all, weights[i], fittingOptions);
}
else
{
for (int t = 0; t < paths[i].Length; t++)
// 3. Count emissions for each state
List<double[]>[] clusters = new List<double[]>[model.States];
for (int i = 0; i < clusters.Length; i++)
clusters[i] = new List<double[]>();

// Count symbol frequencies per state
for (int i = 0; i < paths.Length; i++)
{
int state = paths[i][t];
double[] symbol = obs[i][t];
for (int t = 0; t < paths[i].Length; t++)
{
int state = paths[i][t];
double[] symbol = obs[i][t];

clusters[state].Add(symbol);
clusters[state].Add(symbol);
}
}
}

// Estimate probability distributions
for (int i = 0; i < model.States; i++)
if (clusters[i].Count > 0)
model.Emissions[i].Fit(clusters[i].ToArray(), null, fittingOptions);
// Estimate probability distributions
for (int i = 0; i < model.States; i++)
if (clusters[i].Count > 0)
model.Emissions[i].Fit(clusters[i].ToArray(), null, fittingOptions);
}

// 4. Form log-probabilities, using the Laplace
// correction to avoid zero probabilities
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,10 @@
<SpecificVersion>False</SpecificVersion>
<HintPath>..\..\..\Externals\AForge.NET\AForge.Math.dll</HintPath>
</Reference>
<Reference Include="AForge.Neuro, Version=2.2.5.0, Culture=neutral, PublicKeyToken=2094f4ea39731d4f, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
<HintPath>..\..\..\..\..\Assembla\Externals\AForge.NET\AForge.Neuro.dll</HintPath>
</Reference>
<Reference Include="Microsoft.VisualStudio.QualityTools.UnitTestFramework, Version=10.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL" />
<Reference Include="System" />
<Reference Include="System.Core">
Expand Down Expand Up @@ -338,6 +342,10 @@
<Name>Accord.Math</Name>
<Private>True</Private>
</ProjectReference>
<ProjectReference Include="..\..\Accord.Neuro\Accord.Neuro.csproj">
<Project>{179F3045-8757-4F4B-9508-F48327BA11E3}</Project>
<Name>Accord.Neuro</Name>
</ProjectReference>
<ProjectReference Include="..\..\Accord.Statistics\Accord.Statistics.csproj">
<Project>{FD8101DD-C95D-42D6-AD44-AE01C25F2811}</Project>
<Name>Accord.Statistics</Name>
Expand Down Expand Up @@ -374,6 +382,7 @@
<None Include="Accord.snk" />
<None Include="app.config" />
<None Include="Resources\CircleWithWeights.xls" />
<None Include="Resources\intrusion.xlsx" />
<None Include="Resources\sample.xls">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@

namespace Accord.Tests.Statistics
{
using Accord.MachineLearning.Bayes;
using System.Data;
using Accord.MachineLearning.VectorMachines;
using Accord.MachineLearning.VectorMachines.Learning;
using Accord.Math;
using Accord.Statistics.Filters;
using Microsoft.VisualStudio.TestTools.UnitTesting;
using System.Data;
using Accord.Statistics.Formats;
using Accord.Controls;

[TestClass()]
Expand Down Expand Up @@ -259,7 +259,6 @@ public void ApplyTest2()
[TestMethod()]
public void ApplyTest3()
{

string[] names = { "child", "adult", "elder" };

Codification codebook = new Codification("Label", names);
Expand All @@ -284,5 +283,26 @@ public void ApplyTest3()
Assert.AreEqual("adult", labelb);
Assert.AreEqual("elder", labelc);
}

[TestMethod()]
public void ApplyTest4()
{
string path = @"..\..\..\Accord.Tests\Accord.Tests.Statistics\Resources\intrusion.xlsx";

ExcelReader db = new ExcelReader(path, false, true);

DataTable table = db.GetWorksheet("test");

Codification codebook = new Codification(table);

DataTable result = codebook.Apply(table);

Assert.IsNotNull(result);

foreach (DataColumn col in result.Columns)
Assert.AreNotEqual(col.DataType, typeof(string));

Assert.IsTrue(result.Rows.Count > 0);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,12 @@
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
//

using Accord.Statistics.Formats;
using Microsoft.VisualStudio.TestTools.UnitTesting;
using System.Data;
using Accord.Math;

namespace Accord.Tests.Statistics
{
using Accord.Statistics.Formats;
using Microsoft.VisualStudio.TestTools.UnitTesting;
using System.Data;
using Accord.Math;

[TestClass()]
public class ExcelReaderTest
Expand Down
Binary file not shown.

0 comments on commit 1e56577

Please sign in to comment.