Skip to content

Commit

Permalink
added parallelizations
Browse files Browse the repository at this point in the history
added more efficient indexation
added beginning of extended annotation of molecules
  • Loading branch information
mSorok committed Sep 24, 2019
1 parent f6e34b6 commit ea80f89
Show file tree
Hide file tree
Showing 21 changed files with 1,154 additions and 54 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,11 @@ public void run(String... args) throws Exception {


//cleaning the DB before filling it
//mongoTemplate.getDb().drop();
mongoTemplate.getDb().drop();



System.out.println("Code version from 20 August 2019");
System.out.println("Code version from 23rd september 2019");

if (args.length > 0) {
String dataDirectory = args[0];
Expand Down Expand Up @@ -87,10 +87,10 @@ public void run(String... args) throws Exception {



updaterService.updateSourceNaturalProducts();
//compute similarities between natural products
similarityComputationService.generateAllPairs();
similarityComputationService.computeSimilarities();
//similarityComputationService.computeSimilarities();
similarityComputationService.doParallelizedWork();



Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
package de.unijena.cheminf.npopensourcecollector.misc;

import org.springframework.stereotype.Service;

import java.util.Arrays;
import java.util.HashSet;


@Service
public class DatabaseTypeChecker {




private final String[] africa = {"afrodb", "afrocancer", "afromalariadb", "afrotryp", "conmednp", "etm", "mitishamba", "nanpdb", "p-anapl", "sancdb"};
private final String[] china = {"him", "hit", "tcmdb_taiwan", "tcmid", "tipdb"};
private final String[] india = {"imppat", "inpacdb"};
private final String[] europe = {"tppt"};
private final String[] america = {"nubbedb", "uefs", "biofacquim"};

private final HashSet<String> continentAfrica = new HashSet<String>(Arrays.asList(africa));
private final HashSet<String> continentIndia = new HashSet<String>(Arrays.asList(india));
private final HashSet<String> continentChina = new HashSet<String>(Arrays.asList(china));
private final HashSet<String> continentEurope = new HashSet<String>(Arrays.asList(europe));
private final HashSet<String> continentAmerica = new HashSet<String>(Arrays.asList(america));



private final String[] plants = {"uefs","tppt","tmdb","tipdb","tcmid", "tcmdb_taiwan","spektraris","sancdb",
"respect","p-anapl", "npact","nanpdb","mitishamba","inpacdb","imppat", "hit","him","etm","conmednp",
"afrotryp", "afromalariadb","afrocancer","afrodb"};
private final String[] bacteria = {"streptomedb"};
private final String[] fungi = {"lichendatabase"};
private final String[] animals = {};
private final String[] marine = {};
private final String[] mixed = {"nubbedb","npcare","npatlas","npass","analyticon_all_np", "biofacquim"};

private final HashSet<String> taxPlants = new HashSet<String>(Arrays.asList(plants));
private final HashSet<String> taxBacteria = new HashSet<String>(Arrays.asList(bacteria));
private final HashSet<String> taxFungi = new HashSet<String>(Arrays.asList(fungi));
private final HashSet<String> taxAnimals = new HashSet<String>(Arrays.asList(animals));
private final HashSet<String> taxMarine = new HashSet<String>(Arrays.asList(marine));
private final HashSet<String> taxMixed = new HashSet<String>(Arrays.asList(mixed));


public String checkContinent(String sourceDB){

if(continentAfrica.contains(sourceDB)){
return "africa";
}
else if(continentChina.contains(sourceDB)){
return "china";
}
else if(continentIndia.contains(sourceDB)){
return "india";
}
else if(continentEurope.contains(sourceDB)){
return "europe";
}
else if(continentAmerica.contains(sourceDB)){
return "southamerica";
}
else {
return "nogeo";
}
}


public String checkKingdom(String sourceDB){
if(taxPlants.contains(sourceDB)){
return "plants";
}
else if(taxBacteria.contains(sourceDB)){
return "bacteria";
}
else if(taxAnimals.contains(sourceDB)){
return "animals";
}
else if(taxFungi.contains(sourceDB)){
return "fungi";
}
else if(taxMarine.contains(sourceDB)){
return "marine";
}
else if(taxMixed.contains(sourceDB)){
return "mixed";
}
else{
return "notax";
}

}


}
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,6 @@ public IAtomContainer checkMolecule(IAtomContainer molecule){
}


//Remove aromaticity
String smi;
SmilesGenerator sg = new SmilesGenerator(SmiFlavor.Unique);
SmilesParser sp = new SmilesParser(DefaultChemObjectBuilder.getInstance());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package de.unijena.cheminf.npopensourcecollector.mongocollections;

import org.springframework.data.annotation.Id;
import org.springframework.data.mongodb.core.mapping.Document;

@Document
public class NPDatabase {

@Id
public String id;

String name;

String localFileName;

String url;

String comments;

Integer nb_unique_molecules;


public String getId() {
return id;
}

public String getName() {
return name;
}

public void setName(String name) {
this.name = name;
}

public String getUrl() {
return url;
}

public void setUrl(String url) {
this.url = url;
}

public String getComments() {
return comments;
}

public void setComments(String comments) {
this.comments = comments;
}

public Integer getNb_unique_molecules() {
return nb_unique_molecules;
}

public void setNb_unique_molecules(Integer nb_unique_molecules) {
this.nb_unique_molecules = nb_unique_molecules;
}

public String getLocalFileName() {
return localFileName;
}

public void setLocalFileName(String localFileName) {
this.localFileName = localFileName;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
package de.unijena.cheminf.npopensourcecollector.mongocollections;

import org.springframework.data.mongodb.repository.MongoRepository;

public interface NPDatabaseRepository extends MongoRepository<NPDatabase, String> {
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ public class NPSimilarity {
@Id
public String id;

public UniqueNaturalProduct uniqueNaturalProduct1;
public String uniqueNaturalProductID1;

public UniqueNaturalProduct uniqueNaturalProduct2;
public String uniqueNaturalProductID2;

public Double tanimoto;

Expand All @@ -26,24 +26,24 @@ public void setId(String id) {
this.id = id;
}

public UniqueNaturalProduct getUniqueNaturalProduct1() {
return uniqueNaturalProduct1;
public Double getTanimoto() {
return tanimoto;
}

public void setUniqueNaturalProduct1(UniqueNaturalProduct uniqueNaturalProduct1) {
this.uniqueNaturalProduct1 = uniqueNaturalProduct1;
public String getUniqueNaturalProductID1() {
return uniqueNaturalProductID1;
}

public UniqueNaturalProduct getUniqueNaturalProduct2() {
return uniqueNaturalProduct2;
public void setUniqueNaturalProductID1(String uniqueNaturalProductID1) {
this.uniqueNaturalProductID1 = uniqueNaturalProductID1;
}

public void setUniqueNaturalProduct2(UniqueNaturalProduct uniqueNaturalProduct2) {
this.uniqueNaturalProduct2 = uniqueNaturalProduct2;
public String getUniqueNaturalProductID2() {
return uniqueNaturalProductID2;
}

public Double getTanimoto() {
return tanimoto;
public void setUniqueNaturalProductID2(String uniqueNaturalProductID2) {
this.uniqueNaturalProductID2 = uniqueNaturalProductID2;
}

public void setTanimoto(Double tanimoto) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@
import org.springframework.data.mongodb.core.index.Indexed;
import org.springframework.data.mongodb.core.mapping.Document;

import java.util.ArrayList;


@Document
public class SourceNaturalProduct {

@Id
public String id;

@Indexed
public String source;

public String originalSmiles;
Expand All @@ -21,8 +24,10 @@ public class SourceNaturalProduct {

public String originalInchiKey;

@Indexed
public String simpleInchi;

@Indexed
public String simpleInchiKey;

public Integer heavyAtomNumber;
Expand All @@ -37,6 +42,20 @@ public class SourceNaturalProduct {

public UniqueNaturalProduct uniqueNaturalProduct;

public ArrayList<String> citation;

public ArrayList<Integer> taxid;

public ArrayList<String> organismText;

public String continent;

public ArrayList<String> geographicLocation;

public String name;

public ArrayList<String> synonyms;




Expand Down Expand Up @@ -162,4 +181,60 @@ public UniqueNaturalProduct getUniqueNaturalProduct() {
public void setUniqueNaturalProduct(UniqueNaturalProduct uniqueNaturalProduct) {
this.uniqueNaturalProduct = uniqueNaturalProduct;
}

public ArrayList<String> getCitation() {
return citation;
}

public void setCitation(ArrayList<String> citation) {
this.citation = citation;
}

public ArrayList<Integer> getTaxid() {
return taxid;
}

public void setTaxid(ArrayList<Integer> taxid) {
this.taxid = taxid;
}

public String getContinent() {
return continent;
}

public void setContinent(String continent) {
this.continent = continent;
}

public ArrayList<String> getOrganismText() {
return organismText;
}

public void setOrganismText(ArrayList<String> organismText) {
this.organismText = organismText;
}

public String getName() {
return name;
}

public void setName(String name) {
this.name = name;
}

public ArrayList<String> getSynonyms() {
return synonyms;
}

public void setSynonyms(ArrayList<String> synonyms) {
this.synonyms = synonyms;
}

public ArrayList<String> getGeographicLocation() {
return geographicLocation;
}

public void setGeographicLocation(ArrayList<String> geographicLocation) {
this.geographicLocation = geographicLocation;
}
}
Loading

0 comments on commit ea80f89

Please sign in to comment.