Commit a57a25d6 authored by Antoine RICHARD's avatar Antoine RICHARD
Browse files

init cross validation process but cannot load datasets

parent 75a6677d
9f9a679fce7a0a55f330f489eed9dff6
\ No newline at end of file
ee16734fb3c8f4b43526d8207a300940ae0299db
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<modelVersion>4.0.0</modelVersion>
<groupId>mulan</groupId>
<artifactId>mulan</artifactId>
<version>1.5.0</version>
</project>
674b354c054befbb86912da4160e59a3
\ No newline at end of file
5c7c4642f44835c75fc027d2dfb746339817c74a
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<metadata>
<groupId>mulan</groupId>
<artifactId>mulan</artifactId>
<versioning>
<release>1.5.0</release>
<versions>
<version>1.5.0</version>
</versions>
<lastUpdated>20181106132241</lastUpdated>
</versioning>
</metadata>
e180b7209e005b3e4b2d2ea0cd4fc42d
\ No newline at end of file
00991546943d2ef2aeae586b0e6b2f41d437069f
\ No newline at end of file
......@@ -7,6 +7,13 @@
<version>1.0-SNAPSHOT</version>
<name>api</name>
<url>http://maven.apache.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
......@@ -14,5 +21,75 @@
<version>3.8.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>javax.xml.bind</groupId>
<artifactId>jaxb-api</artifactId>
<version>2.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>2.11.2</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.11.2</version>
</dependency>
<dependency>
<groupId>nz.ac.waikato.cms.weka</groupId>
<artifactId>weka-dev</artifactId>
<version>3.7.10</version>
</dependency>
<dependency>
<groupId>mulan</groupId>
<artifactId>mulan</artifactId>
<version>1.5.0</version>
</dependency>
</dependencies>
<repositories>
<repository>
<id>lib</id>
<url>file:${project.basedir}/lib</url>
</repository>
</repositories>
<build>
<plugins>
<plugin>
<artifactId>maven-dependency-plugin</artifactId>
<version>3.1.1</version>
<executions>
<execution>
<phase>compile</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/lib</outputDirectory>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<version>3.1.1</version>
<configuration>
<archive>
<manifest>
<addClasspath>true</addClasspath>
<classpathPrefix>lib/</classpathPrefix>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>
</project>
/*
* This file is part of "Transparent Algorithms Performances".
*
* "Transparent Algorithms Performances" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "Transparent Algorithms Performances" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "Transparent Algorithms Performances". If not, see <https://www.gnu.org/licenses/>.
*/
package com.lamsade;
//import com.lamsade.learners.FuzzyBayes;
//import com.lamsade.learners.HistBayes;
import java.io.File;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.HashMap;
import mulan.classifier.MultiLabelLearnerBase;
import mulan.classifier.meta.RAkEL;
import mulan.classifier.transformation.LabelPowerset;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import mulan.data.MultiLabelInstances;
import mulan.evaluation.Evaluator;
import mulan.evaluation.MultipleEvaluation;
import weka.classifiers.bayes.NaiveBayes;
import weka.classifiers.rules.JRip;
import weka.classifiers.trees.J48;
import weka.core.Utils;
/**
* Hello world!
* App to make cross validation of machine learning algorithms.
*
* @author A. Richard
*/
public class App
{
{
private static final Logger LOG = LogManager.getLogger(App.class);
/**
* @param args the command line arguments
*/
public static void main( String[] args )
{
System.out.println( "Hello World!" );
try{
// Get potential options
int numFolds = 10; // default value
try{
String folds = Utils.getOption("folds", args);
if(folds !=null && !folds.isEmpty())
numFolds = Integer.parseInt(folds);
}
catch(Exception e){
LOG.error(e);
}
// Get result file name
String outFilename = "../results/crossvalidation.csv"; // default value
try{
String tmp = Utils.getOption("output", args);
if(tmp != null && !tmp.isEmpty())
outFilename = tmp;
}
catch(Exception e){
LOG.error(e);
}
// Set datasets to use
HashMap<String, MultiLabelInstances> datasets = new HashMap<>();
String[] dataset_names = {
"emotions", // 72 numerical attributes and 6 labels
"CAL500", // 68 numerical attributes and 174 labels
//"scene", // 294 numerical attributes and 6 labels
"genbase", // 1186 nominal attributes and 27 labels
//"bibtex", // 1836 nominal attributes and 159 labels
"birds", // 2 nominal and 258 numerical attributes and 19 labels
//"medical", // 1449 nominal attributes and 45 labels
"flags", // 9 nominal and 10 numeric attributes and 7 labels
// "consultations" // 2 nominal and 2 numeric attributes and 15 labels
};
for(String dataset_name : dataset_names){
datasets.put(
dataset_name,
new MultiLabelInstances(
"../data/"+dataset_name+".arff",
"../data/"+dataset_name+".xml"
)
);
}
// Setup output
String[] columns = {
"Hamming Loss",
"Micro-averaged Precision",
"Micro-averaged Recall",
"Micro-averaged F-Measure",
"Macro-averaged Precision",
"Macro-averaged Recall",
"Macro-averaged F-Measure"
};
// Init ouput file
File f = new File(outFilename);
if(!f.exists())
f.getParentFile().mkdirs();
else
f.delete();
f.createNewFile();
// Write Headers
String output = "Learner;Dataset";
for(int i = 0 ; i < columns.length ; i++)
output += ";" + columns[i].replace(" ", "_")
+ ";" + columns[i].replace(" ", "_") + "_std"
;
output += "\n";
PrintWriter writer = new PrintWriter(f.getPath(), "UTF-8");
writer.print(output);
writer.flush();
output = "";
// For each dataset
for(String datasetName : datasets.keySet()){
// Get dataset
MultiLabelInstances dataset = datasets.get(datasetName);
LOG.info("load dataset: {"
+ "name: " + datasetName + ", "
+ "nb instances: " + dataset.getNumInstances() + ", "
+ "nb attributes: " + dataset.getFeatureAttributes().size() + ", "
+ "nb labels: " + dataset.getNumLabels() + ", "
+ "cardinality: " + dataset.getCardinality()
+ "}"
);
// Init Learners
HashMap<String, MultiLabelLearnerBase> learners = new HashMap<>();
//learners.put("FuzzyBayes", new FuzzyBayes());
//learners.put("HistBayes", new HistBayes());
//learners.put("MLkNN", new MLkNN()); // k-Nearest Neighboors
//learners.put("BPMLL", new BPMLL());
learners.put("RAkEL+C4.5", new RAkEL(new LabelPowerset(new J48()))); // Decision Tree
//learners.put("RAkEL+LMT", new RAkEL(new LabelPowerset(new LMT()))); // Decision Tree
learners.put("RAkEL+Ripper", new RAkEL(new LabelPowerset(new JRip()))); // Rules
//learners.put("RAkEL+PART", new RAkEL(new LabelPowerset(new PART()))); // DT + Rules
learners.put("RAkEL+NaiveBayes", new RAkEL(new LabelPowerset(new NaiveBayes()))); // Bayes Net
//learners.put("RAkEL+KStar", new RAkEL(new LabelPowerset(new KStar()))); // instance based
//learners.put("RAkEL+SMO", new RAkEL(new LabelPowerset(new SMO()))); // SVM
// For each selected learner
for(String learnerName : learners.keySet()){
// Get learner
MultiLabelLearnerBase learner = learners.get(learnerName);
LOG.info("load leaner : {"
+ "name : " + learnerName + ", "
+ "tech_info : \"" + learner.getTechnicalInformation().toString() + "\""
+ "}"
);
// Evaluate Learner
Evaluator eval = new Evaluator();
MultipleEvaluation results;
LOG.info("launch " + learnerName + " evaluation with " + datasetName + " dataset");
try{
results = eval.crossValidate(learner, dataset, numFolds);
// Add learner name and dataset name to output
output += learnerName + ";" + datasetName;
// Add learner results to output
for(int i = 0 ; i < columns.length ; i++)
output += ";" + results.getMean(columns[i])
+ ";" + results.getStd(columns[i]);
output += "\n";
// Write into output file
writer.print(output);
writer.flush();
// Clean output
output = "";
}catch(IllegalArgumentException e){
LOG.error(e + " - StackTrace: " + Arrays.toString(e.getStackTrace()));
}
}
}
writer.close();
}
catch(Exception e){
LOG.error(e + " - StackTrace: " + Arrays.toString(e.getStackTrace()));
}
}
}
<?xml version="1.0" encoding="UTF-8"?>
<Configuration xmlns="http://logging.apache.org/log4j/2.0/config">
<Properties>
<Property name="basePath">logs</Property>
</Properties>
<Appenders>
<Console name="STDOUT" target="SYSTEM_OUT">
<PatternLayout pattern="%d{HH:mm:ss.SSS} %highlight{%-5level %logger{36}.%M() @%L - %msg%n}{FATAL=red bold, ERROR=red, WARN=yellow, INFO=black, DEBUG=blue, TRACE=gray}" disableAnsi="false"/>
</Console>
</Appenders>
<Loggers>
<Logger name="com.jcg" level="debug" />
<Root level="info">
<AppenderRef ref="STDOUT" />
</Root>
</Loggers>
</Configuration>
This source diff could not be displayed because it is too large. You can view the blob instead.
<?xml version="1.0" encoding="utf-8"?>
<labels xmlns="http://mulan.sourceforge.net/labels">
<label name="Angry-Agressive"></label>
<label name="NOT-Emotion-Angry-Agressive"></label>
<label name="Emotion-Arousing-Awakening"></label>
<label name="NOT-Emotion-Arousing-Awakening"></label>
<label name="Emotion-Bizarre-Weird"></label>
<label name="NOT-Emotion-Bizarre-Weird"></label>
<label name="Emotion-Calming-Soothing"></label>
<label name="NOT-Emotion-Calming-Soothing"></label>
<label name="Emotion-Carefree-Lighthearted"></label>
<label name="NOT-Emotion-Carefree-Lighthearted"></label>
<label name="Emotion-Cheerful-Festive"></label>
<label name="NOT-Emotion-Cheerful-Festive"></label>
<label name="Emotion-Emotional-Passionate"></label>
<label name="NOT-Emotion-Emotional-Passionate"></label>
<label name="Emotion-Exciting-Thrilling"></label>
<label name="NOT-Emotion-Exciting-Thrilling"></label>
<label name="Emotion-Happy"></label>
<label name="NOT-Emotion-Happy"></label>
<label name="Emotion-Laid-back-Mellow"></label>
<label name="NOT-Emotion-Laid-back-Mellow"></label>
<label name="Emotion-Light-Playful"></label>
<label name="NOT-Emotion-Light-Playful"></label>
<label name="Emotion-Loving-Romantic"></label>
<label name="NOT-Emotion-Loving-Romantic"></label>
<label name="Emotion-Pleasant-Comfortable"></label>
<label name="NOT-Emotion-Pleasant-Comfortable"></label>
<label name="Emotion-Positive-Optimistic"></label>
<label name="NOT-Emotion-Positive-Optimistic"></label>
<label name="Emotion-Powerful-Strong"></label>
<label name="NOT-Emotion-Powerful-Strong"></label>
<label name="Emotion-Sad"></label>
<label name="NOT-Emotion-Sad"></label>
<label name="Emotion-Tender-Soft"></label>
<label name="NOT-Emotion-Tender-Soft"></label>
<label name="Emotion-Touching-Loving"></label>
<label name="NOT-Emotion-Touching-Loving"></label>
<label name="Genre--_Alternative"></label>
<label name="Genre--_Alternative_Folk"></label>
<label name="Genre--_Bebop"></label>
<label name="Genre--_Brit_Pop"></label>
<label name="Genre--_Classic_Rock"></label>
<label name="Genre--_Contemporary_Blues"></label>
<label name="Genre--_Contemporary_RandB"></label>
<label name="Genre--_Cool_Jazz"></label>
<label name="Genre--_Country_Blues"></label>
<label name="Genre--_Dance_Pop"></label>
<label name="Genre--_Electric_Blues"></label>
<label name="Genre--_Funk"></label>
<label name="Genre--_Gospel"></label>
<label name="Genre--_Metal-Hard_Rock"></label>
<label name="Genre--_Punk"></label>
<label name="Genre--_Roots_Rock"></label>
<label name="Genre--_Singer-Songwriter"></label>
<label name="Genre--_Soft_Rock"></label>
<label name="Genre--_Soul"></label>
<label name="Genre--_Swing"></label>
<label name="Genre-Bluegrass"></label>
<label name="Genre-Blues"></label>
<label name="Genre-Country"></label>
<label name="Genre-Electronica"></label>
<label name="Genre-Folk"></label>
<label name="Genre-Hip_Hop-Rap"></label>
<label name="Genre-Jazz"></label>
<label name="Genre-Pop"></label>
<label name="Genre-RandB"></label>
<label name="Genre-Rock"></label>
<label name="Genre-World"></label>
<label name="Instrument_-_Acoustic_Guitar"></label>
<label name="Instrument_-_Ambient_Sounds"></label>
<label name="Instrument_-_Backing_vocals"></label>
<label name="Instrument_-_Bass"></label>
<label name="Instrument_-_Drum_Machine"></label>
<label name="Instrument_-_Drum_Set"></label>
<label name="Instrument_-_Electric_Guitar_(clean)"></label>
<label name="Instrument_-_Electric_Guitar_(distorted)"></label>
<label name="Instrument_-_Female_Lead_Vocals"></label>
<label name="Instrument_-_Hand_Drums"></label>
<label name="Instrument_-_Harmonica"></label>
<label name="Instrument_-_Horn_Section"></label>
<label name="Instrument_-_Male_Lead_Vocals"></label>
<label name="Instrument_-_Organ"></label>
<label name="Instrument_-_Piano"></label>
<label name="Instrument_-_Samples"></label>
<label name="Instrument_-_Saxophone"></label>
<label name="Instrument_-_Sequencer"></label>
<label name="Instrument_-_String_Ensemble"></label>
<label name="Instrument_-_Synthesizer"></label>
<label name="Instrument_-_Tambourine"></label>
<label name="Instrument_-_Trombone"></label>
<label name="Instrument_-_Trumpet"></label>
<label name="Instrument_-_Violin-Fiddle"></label>
<label name="Song-Catchy-Memorable"></label>
<label name="NOT-Song-Catchy-Memorable"></label>
<label name="Song-Changing_Energy_Level"></label>
<label name="NOT-Song-Changing_Energy_Level"></label>
<label name="Song-Fast_Tempo"></label>
<label name="NOT-Song-Fast_Tempo"></label>
<label name="Song-Heavy_Beat"></label>
<label name="NOT-Song-Heavy_Beat"></label>
<label name="Song-High_Energy"></label>
<label name="NOT-Song-High_Energy"></label>
<label name="Song-Like"></label>
<label name="NOT-Song-Like"></label>
<label name="Song-Positive_Feelings"></label>
<label name="NOT-Song-Positive_Feelings"></label>
<label name="Song-Quality"></label>
<label name="NOT-Song-Quality"></label>
<label name="Song-Recommend"></label>
<label name="NOT-Song-Recommend"></label>
<label name="Song-Recorded"></label>
<label name="NOT-Song-Recorded"></label>
<label name="Song-Texture_Acoustic"></label>
<label name="Song-Texture_Electric"></label>
<label name="Song-Texture_Synthesized"></label>
<label name="Song-Tonality"></label>
<label name="NOT-Song-Tonality"></label>
<label name="Song-Very_Danceable"></label>
<label name="NOT-Song-Very_Danceable"></label>
<label name="Usage-At_a_party"></label>
<label name="Usage-At_work"></label>
<label name="Usage-Cleaning_the_house"></label>
<label name="Usage-Driving"></label>
<label name="Usage-Exercising"></label>
<label name="Usage-Getting_ready_to_go_out"></label>
<label name="Usage-Going_to_sleep"></label>
<label name="Usage-Hanging_with_friends"></label>
<label name="Usage-Intensely_Listening"></label>
<label name="Usage-Reading"></label>
<label name="Usage-Romancing"></label>
<label name="Usage-Sleeping"></label>
<label name="Usage-Studying"></label>
<label name="Usage-Waking_up"></label>
<label name="Usage-With_the_family"></label>
<label name="Vocals-Aggressive"></label>
<label name="Vocals-Altered_with_Effects"></label>
<label name="Vocals-Breathy"></label>
<label name="Vocals-Call_and_Response"></label>
<label name="Vocals-Duet"></label>
<label name="Vocals-Emotional"></label>
<label name="Vocals-Falsetto"></label>
<label name="Vocals-Gravelly"></label>
<label name="Vocals-High-pitched"></label>
<label name="Vocals-Low-pitched"></label>
<label name="Vocals-Monotone"></label>
<label name="Vocals-Rapping"></label>
<label name="Vocals-Screaming"></label>
<label name="Vocals-Spoken"></label>
<label name="Vocals-Strong"></label>
<label name="Vocals-Vocal_Harmonies"></label>
<label name="Genre-Best--_Alternative"></label>
<label name="Genre-Best--_Classic_Rock"></label>
<label name="Genre-Best--_Metal-Hard_Rock"></label>
<label name="Genre-Best--_Punk"></label>
<label name="Genre-Best--_Soft_Rock"></label>
<label name="Genre-Best--_Soul"></label>
<label name="Genre-Best-Blues"></label>
<label name="Genre-Best-Country"></label>
<label name="Genre-Best-Electronica"></label>
<label name="Genre-Best-Folk"></label>
<label name="Genre-Best-Hip_Hop-Rap"></label>
<label name="Genre-Best-Jazz"></label>
<label name="Genre-Best-Pop"></label>
<label name="Genre-Best-RandB"></label>
<label name="Genre-Best-Rock"></label>
<label name="Genre-Best-World"></label>
<label name="Instrument_-_Acoustic_Guitar-Solo"></label>
<label name="Instrument_-_Electric_Guitar_(clean)-Solo"></label>
<label name="Instrument_-_Electric_Guitar_(distorted)-Solo"></label>
<label name="Instrument_-_Female_Lead_Vocals-Solo"></label>
<label name="Instrument_-_Harmonica-Solo"></label>
<label name="Instrument_-_Male_Lead_Vocals-Solo"></label>
<label name="Instrument_-_Piano-Solo"></label>
<label name="Instrument_-_Saxophone-Solo"></label>
<label name="Instrument_-_Trumpet-Solo"></label>
</labels>
This diff is collapsed.
<?xml version="1.0" encoding="utf-8"?>
<labels xmlns="http://mulan.sourceforge.net/labels">
<label name="TAG_2005"></label>
<label name="TAG_2006"></label>
<label name="TAG_2007"></label>
<label name="TAG_agdetection"></label>
<label name="TAG_algorithms"></label>
<label name="TAG_amperometry"></label>
<label name="TAG_analysis"></label>
<label name="TAG_and"></label>
<label name="TAG_annotation"></label>
<label name="TAG_antibody"></label>
<label name="TAG_apob"></label>
<label name="TAG_architecture"></label>
<label name="TAG_article"></label>
<label name="TAG_bettasplendens"></label>
<label name="TAG_bibteximport"></label>
<label name="TAG_book"></label>
<label name="TAG_children"></label>
<label name="TAG_classification"></label>
<label name="TAG_clustering"></label>
<label name="TAG_cognition"></label>
<label name="TAG_collaboration"></label>
<label name="TAG_collaborative"></label>
<label name="TAG_community"></label>
<label name="TAG_competition"></label>
<label name="TAG_complex"></label>
<label name="TAG_complexity"></label>
<label name="TAG_compounds"></label>
<label name="TAG_computer"></label>
<label name="TAG_computing"></label>
<label name="TAG_concept"></label>
<label name="TAG_context"></label>
<label name="TAG_cortex"></label>
<label name="TAG_critical"></label>
<label name="TAG_data"></label>
<label name="TAG_datamining"></label>
<label name="TAG_date"></label>
<label name="TAG_design"></label>
<label name="TAG_development"></label>
<label name="TAG_diffusion"></label>
<label name="TAG_diplomathesis"></label>
<label name="TAG_disability"></label>
<label name="TAG_dynamics"></label>
<label name="TAG_education"></label>
<label name="TAG_elearning"></label>
<label name="TAG_electrochemistry"></label>
<label name="TAG_elisa"></label>
<label name="TAG_empirical"></label>
<label name="TAG_energy"></label>
<label name="TAG_engineering"></label>
<label name="TAG_epitope"></label>
<label name="TAG_equation"></label>
<label name="TAG_evaluation"></label>
<label name="TAG_evolution"></label>
<label name="TAG_fca"></label>