Skip to content

Commit

Permalink
Implement the rebalanceClusters method and adapt code accordingly
Browse files Browse the repository at this point in the history
  • Loading branch information
MaxHalford committed Jun 25, 2017
1 parent 36feed0 commit d05d9db
Show file tree
Hide file tree
Showing 10 changed files with 188 additions and 53 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ Essentially, only `GenomeFactory`, `NPops`, `PopSize` and `Model` are required t

### Running a GA

Once you have implemented the `Genome` interface and instantiated a `GA` struct you are good to go. You can call the `GA`'s `Enhance()` method which will apply a model once (see the [models section](#models)). It's your choice if you want to call `Enhance()` method multiple by using a loop or by imposing a time limit.
Once you have implemented the `Genome` interface and instantiated a `GA` struct you are good to go. You can call the `GA`'s `Enhance` method which will apply a model once (see the [models section](#models)). It's your choice if you want to call `Enhance` method multiple by using a loop or by imposing a time limit. The `Enhance` method will return an `error` which you should handle. If your population is evolving when you call `Enhance` it's most likely because `Enhance` did not return a `nil` error.

At any time you have access to the `GA`'s `Best` field which is an internal representation of your genome. The `Best` field itself contains a `Fitness` field and a `Genome` field respectively indicating the best obtained solution and the parameters of that solution.

Expand Down
69 changes: 68 additions & 1 deletion distance.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
package gago

import (
"fmt"
"math"
)

// A Metric returns the distance between two genomes.
type Metric func(a, b Individual) float64

Expand Down Expand Up @@ -48,7 +53,9 @@ func (dm *DistanceMemoizer) GetDistance(a, b Individual) float64 {
return dist
}

// Return the average distance between a Individual and a slice of Individuals.
// calcAvgDistances returns a map that associates the ID of each provided
// Individual with the average distance the Individual has with the rest of the
// Individuals.
func calcAvgDistances(indis Individuals, dm DistanceMemoizer) map[string]float64 {
var avgDistances = make(map[string]float64)
for _, a := range indis {
Expand All @@ -59,3 +66,63 @@ func calcAvgDistances(indis Individuals, dm DistanceMemoizer) map[string]float64
}
return avgDistances
}

func rebalanceClusters(clusters []Individuals, dm DistanceMemoizer, minPerCluster int) ([]Individuals, error) {
// Calculate the number of missing Individuals per cluster for each cluster
// to reach at least minPerCluster Individuals.
var missing = make([]int, len(clusters))
for i, cluster := range clusters {
// Check that the cluster has at least on Individual
if len(cluster) == 0 {
return nil, fmt.Errorf("Cluster %d has 0 individuals", i)
}
// Calculate the number of missing Individual in the cluster to reach minPerCluster
missing[i] = minPerCluster - len(cluster)
}
// Check if there are enough Individuals to rebalance the clusters.
if sumInts(missing) >= 0 {
return nil, fmt.Errorf("Missing %d individuals to be able to rebalance the clusters",
sumInts(missing))
}
// Loop through the clusters that are missing Individuals
for i, cluster := range clusters {
// Check if the cluster is missing Individuals
if missing[i] <= 0 {
continue
}
// Assign new Individuals to the cluster while it is missing some
for missing[i] > 0 {
// Determine the medoid
cluster.SortByDistanceToMedoid(dm)
var medoid = cluster[0]
// Go through the Individuals of the other clusters and find the one
// closest to the computed medoid
var (
cci int // Closest cluster index
cii int // Closest Individual index
minDist = math.Inf(1)
)
for j := range clusters {
// Check that the cluster has Individuals to spare
if i == j || missing[j] >= 0 {
continue
}
// Find the closest Individual to the medoid inside the cluster
for k, indi := range clusters[j] {
var dist = dm.GetDistance(indi, medoid)
if dist < minDist {
cci = j
cii = k
minDist = dist
}
}
}
// Add the closest Individual to the cluster
clusters[i] = append(clusters[i], clusters[cci][cii])
// Remove the closest Individual from the cluster it belonged to
clusters[cci] = append(clusters[cci][:cii], clusters[cci][cii+1:]...)
missing[i]--
}
}
return clusters, nil
}
47 changes: 47 additions & 0 deletions distance_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,50 @@ func TestDistanceMemoizer(t *testing.T) {
t.Error("Wrong calculated distance")
}
}

func TestSortByDistanceToMedoid(t *testing.T) {
var (
indis = Individuals{
Individual{Genome: Vector{3, 3, 3}, Fitness: 0},
Individual{Genome: Vector{2, 2, 2}, Fitness: 1},
Individual{Genome: Vector{5, 5, 5}, Fitness: 2},
}
dm = newDistanceMemoizer(l1Distance)
)
indis.SortByDistanceToMedoid(dm)
for i := range indis {
if indis[i].Fitness != float64(i) {
t.Error("Individuals were not sorted according to their distance to the medoid")
}
}
}

func TestRebalanceClusters(t *testing.T) {
var (
clusters = []Individuals{
Individuals{
Individual{Genome: Vector{1, 1, 1}, ID: "1"},
Individual{Genome: Vector{1, 1, 1}, ID: "2"},
Individual{Genome: Vector{1, 1, 1}, ID: "3"},
Individual{Genome: Vector{2, 2, 2}, ID: "4"}, // Second furthest away from the cluster
Individual{Genome: Vector{3, 3, 3}, ID: "5"}, // Furthest away from the cluster
},
Individuals{
Individual{Genome: Vector{2, 2, 2}, ID: "6"},
},
Individuals{
Individual{Genome: Vector{3, 3, 3}, ID: "7"},
},
}
dm = newDistanceMemoizer(l1Distance)
)
rebalanceClusters(clusters, dm, 2)
// Check the second cluster
if len(clusters[1]) != 2 || clusters[1][1].ID != "4" {
t.Error("rebalanceClusters didn't work as expected")
}
// Check the third cluster
if len(clusters[2]) != 2 || clusters[2][1].ID != "5" {
t.Error("rebalanceClusters didn't work as expected")
}
}
18 changes: 11 additions & 7 deletions ga.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,9 @@ func (ga *GA) Initialize() {
func (ga *GA) Enhance() error {
var start = time.Now()
ga.Generations++
// Migrate the individuals between the populations if there are enough
// populations, there is a migrator and the migration frequency divides the
// generation count
// Migrate the individuals between the populations if there are at least 2
// Populations and that there is a migrator and that the migration frequency
// divides the generation count
if len(ga.Populations) > 1 && ga.Migrator != nil && ga.Generations%ga.MigFrequency == 0 {
ga.Migrator.Apply(ga.Populations, ga.rng)
}
Expand Down Expand Up @@ -168,10 +168,14 @@ func (ga *GA) Enhance() error {

func (pop *Population) speciateEvolveMerge(spec Speciator, model Model) error {
var (
species = spec.Apply(pop.Individuals, pop.rng)
pops = make([]Population, len(species))
species, err = spec.Apply(pop.Individuals, pop.rng)
pops = make([]Population, len(species))
)
// Create a slice of population from the obtained species and evolve each one separately
if err != nil {
return err
}
// Create a subpopulation from each specie so that the evolution Model can
// be applied to it.
for i, specie := range species {
pops[i] = Population{
Individuals: specie,
Expand All @@ -180,7 +184,7 @@ func (pop *Population) speciateEvolveMerge(spec Speciator, model Model) error {
ID: randString(len(pop.ID), pop.rng),
rng: pop.rng,
}
var err = model.Apply(&pops[i])
err = model.Apply(&pops[i])
if err != nil {
return err
}
Expand Down
13 changes: 13 additions & 0 deletions individual.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,19 @@ func (indis Individuals) IsSortedByFitness() bool {
return sort.SliceIsSorted(indis, less)
}

// SortByDistanceToMedoid sorts Individuals according to their distance to the
// medoid. The medoid is the Individual that has the lowest average distance to
// the rest of the Individuals.
func (indis Individuals) SortByDistanceToMedoid(dm DistanceMemoizer) {
var (
avgDists = calcAvgDistances(indis, dm)
less = func(i, j int) bool {
return avgDists[indis[i].ID] < avgDists[indis[j].ID]
}
)
sort.Slice(indis, less)
}

// Extract the fitness of a slice of individuals into a float64 slice.
func (indis Individuals) getFitnesses() []float64 {
var fitnesses = make([]float64, len(indis))
Expand Down
16 changes: 9 additions & 7 deletions selection.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ type SelElitism struct{}

// Apply SelElitism.
func (sel SelElitism) Apply(n int, indis Individuals, rng *rand.Rand) (Individuals, []int, error) {
indis.SortByFitness()
return indis[:n].Clone(rng), newInts(n), nil
}

Expand Down Expand Up @@ -82,27 +83,28 @@ func (sel SelTournament) Validate() error {
// as fitness proportionate selection).
type SelRoulette struct{}

func getWeights(fitnesses []float64) []float64 {
func buildWheel(fitnesses []float64) []float64 {
var (
n = len(fitnesses)
weights = make([]float64, n)
n = len(fitnesses)
wheel = make([]float64, n)
)
for i, v := range fitnesses {
weights[i] = fitnesses[n-1] - v
wheel[i] = fitnesses[n-1] - v + 1
}
return cumsum(divide(weights, sumFloat64s(weights)))
return cumsum(divide(wheel, sumFloat64s(wheel)))
}

// Apply SelRoulette.
func (sel SelRoulette) Apply(n int, indis Individuals, rng *rand.Rand) (Individuals, []int, error) {

var (
selected = make(Individuals, n)
indexes = make([]int, n)
weights = getWeights(indis.getFitnesses())
wheel = buildWheel(indis.getFitnesses())
)
for i := range selected {
var (
index = sort.SearchFloat64s(weights, rand.Float64())
index = sort.SearchFloat64s(wheel, rand.Float64())
winner = indis[index]
)
indexes[i] = index
Expand Down
12 changes: 5 additions & 7 deletions selection_test.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package gago

import (
"fmt"
"testing"
)

Expand Down Expand Up @@ -66,20 +65,19 @@ func TestSelTournament(t *testing.T) {
}
}

func TestGetWeights(t *testing.T) {
func TestBuildWheel(t *testing.T) {
var testCases = []struct {
fitnesses []float64
weights []float64
}{
{[]float64{-10, -8, -5}, []float64{5.0 / 8, 1, 1}},
{[]float64{-2, 0, 2, 3}, []float64{5.0 / 9, 8.0 / 9, 1, 1}},
{[]float64{-10, -8, -5}, []float64{6.0 / 11, 10.0 / 11, 1}},
{[]float64{-2, 0, 2, 3}, []float64{6.0 / 13, 10.0 / 13, 12.0 / 13, 1}},
}
for _, test := range testCases {
var weights = getWeights(test.fitnesses)
var weights = buildWheel(test.fitnesses)
for i := range weights {
if weights[i] != test.weights[i] {
fmt.Println(weights[i], test.weights[i])
t.Error("getWeights didn't work as expected")
t.Error("buildWheel didn't work as expected")
}
}
}
Expand Down
44 changes: 20 additions & 24 deletions speciation.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,49 +2,47 @@ package gago

import (
"errors"
"fmt"
"math"
"math/rand"
"sort"
)

// A Speciator partitions a population into n smaller subpopulations. Each
// subpopulation shares the same random number generator inherited from the
// initial population.
type Speciator interface {
Apply(indis Individuals, rng *rand.Rand) []Individuals
Apply(indis Individuals, rng *rand.Rand) ([]Individuals, error)
Validate() error
}

// SpecKMedoids (k-medoid clustering).
type SpecKMedoids struct {
K int // Number of medoids
K int // Number of medoids
MinPerCluster int
Metric Metric // Dissimimilarity measure
MaxIterations int
}

// Apply SpecKMedoids.
func (spec SpecKMedoids) Apply(indis Individuals, rng *rand.Rand) []Individuals {
func (spec SpecKMedoids) Apply(indis Individuals, rng *rand.Rand) ([]Individuals, error) {
// Check there are at least K Individuals
if len(indis) < spec.K {
return nil, fmt.Errorf("SpecKMedoids: have %d individuals and need at least %d",
len(indis), spec.K)
}
var (
species = make([]Individuals, spec.K)
medoids = make(Individuals, spec.K)
dm = newDistanceMemoizer(spec.Metric)
)
// Make a copy of the provided individuals to avoid side effects
var individuals = indis.Clone(rng)
// Initialize the clusters with the individuals having the lowest average
// distances with the other individuals
var (
avgDists = calcAvgDistances(individuals, dm)
less = func(i, j int) bool {
return avgDists[individuals[i].ID] < avgDists[individuals[j].ID]
}
)
sort.Slice(individuals, less)
copy(medoids, individuals[:spec.K])
indis.SortByDistanceToMedoid(dm)
copy(medoids, indis[:spec.K])
// Keep track of the total distance from the medoid to each of the cluster's members
var total float64
// Assign each individual to the closest initial medoid
for _, indi := range individuals {
for _, indi := range indis {
var i = indi.IdxOfClosest(medoids, dm)
species[i] = append(species[i], indi)
total += dm.GetDistance(medoids[i], indi)
Expand All @@ -58,14 +56,10 @@ func (spec SpecKMedoids) Apply(indis Individuals, rng *rand.Rand) []Individuals
)
// Recompute the new medoid inside each specie
for i, specie := range species {
avgDists = calcAvgDistances(specie, dm)
less = func(i, j int) bool {
return avgDists[specie[i].ID] < avgDists[specie[j].ID]
}
sort.Slice(specie, less)
specie.SortByDistanceToMedoid(dm)
medoids[i] = specie[0]
}
// Reassign each individual to the closest initial medoid
// Reassign each individual to the closest new medoid
for _, indi := range indis {
var i = indi.IdxOfClosest(medoids, dm)
newSpecies[i] = append(newSpecies[i], indi)
Expand All @@ -78,7 +72,9 @@ func (spec SpecKMedoids) Apply(indis Individuals, rng *rand.Rand) []Individuals
copy(species, newSpecies)
total = newTotal
}
return species
// Rebalance the species so that their are at least
rebalanceClusters(species, dm, spec.MinPerCluster)
return species, nil
}

// Validate SpecKMedoids fields.
Expand Down Expand Up @@ -106,7 +102,7 @@ type SpecFitnessInterval struct {
}

// Apply SpecFitnessInterval.
func (spec SpecFitnessInterval) Apply(indis Individuals, rng *rand.Rand) []Individuals {
func (spec SpecFitnessInterval) Apply(indis Individuals, rng *rand.Rand) ([]Individuals, error) {
var (
species = make([]Individuals, spec.K)
n = len(indis)
Expand All @@ -116,7 +112,7 @@ func (spec SpecFitnessInterval) Apply(indis Individuals, rng *rand.Rand) []Indiv
var a, b = i * m, min((i+1)*m, n)
species[i] = indis[a:b]
}
return species
return species, nil
}

// Validate SpecFitnessInterval fields.
Expand Down
Loading

0 comments on commit d05d9db

Please sign in to comment.