Commit 6b31a8f4 authored by Antoine RICHARD's avatar Antoine RICHARD
Browse files

init test plan

parent bf158472
......@@ -157,7 +157,7 @@ public class App
// Init Learners
HashMap<String, MultiLabelLearnerBase> learners = new HashMap<>();
learners.put("FuzzyBayes", new FuzzyBayes());
//learners.put("FuzzyBayes", new FuzzyBayes());
learners.put("HistBayes", new HistBayes());
//learners.put("MLkNN", new MLkNN()); // k-Nearest Neighboors
//learners.put("BPMLL", new BPMLL());
......
@article{boutell2004,
title={Learning multi-label scene classification},
author={Boutell, Matthew R and Luo, Jiebo and Shen, Xipeng and Brown, Christopher M},
journal={Pattern recognition},
volume={37},
number={9},
pages={1757--1771},
year={2004},
publisher={Elsevier}
}
@inproceedings{briggs2013,
author={F. Briggs and Y. Huang and R. Raich and K. Eftaxias and Z. Lei and W. Cukierski and S. F. Hadley and A. Hadley and M. Betts and X. Z. Fern and J. Irvine and L. Neal and A. Thomas and G. Fodor and G. Tsoumakas and H. W. Ng and T. N. T. Nguyen and H. Huttunen and P. Ruusuvuori and T. Manninen and A. Diment and T. Virtanen and J. Marzat and J. Defretin and D. Callender and C. Hurlburt and K. Larrey and M. Milakov},
booktitle={2013 IEEE International Workshop on Machine Learning for Signal Processing (MLSP)},
title={The 9th annual MLSP competition: New methods for acoustic classification of multiple simultaneous bird species in a noisy environment},
year={2013},
pages={1-8}
}
@inproceedings{cleary1995,
author = {John G. Cleary and Leonard E. Trigg},
booktitle = {12th International Conference on Machine Learning},
pages = {108-114},
title = {K*: An Instance-based Learner Using an Entropic Distance Measure},
year = {1995}
}
@inproceedings{cohen1995,
author = {William W. Cohen},
booktitle = {Twelfth International Conference on Machine Learning},
pages = {115-123},
publisher = {Morgan Kaufmann},
title = {Fast Effective Rule Induction},
year = {1995}
}
@inproceedings{diplaris2005,
title={Protein classification with multiple algorithms},
author={Diplaris, Sotiris and Tsoumakas, Grigorios and Mitkas, Pericles A and Vlahavas, Ioannis},
booktitle={Panhellenic Conference on Informatics},
pages={448--456},
year={2005},
organization={Springer}
}
@inproceedings{frank1998,
author = {Eibe Frank and Ian H. Witten},
booktitle = {Fifteenth International Conference on Machine Learning},
editor = {J. Shavlik},
pages = {144-151},
publisher = {Morgan Kaufmann},
title = {Generating Accurate Rule Sets Without Global Optimization},
year = {1998}
}
@inproceedings{hastie1998,
author = {Trevor Hastie and Robert Tibshirani},
booktitle = {Advances in Neural Information Processing Systems},
editor = {Michael I. Jordan and Michael J. Kearns and Sara A. Solla},
publisher = {MIT Press},
title = {Classification by Pairwise Coupling},
volume = {10},
year = {1998}
}
@inproceedings{john1995,
address = {San Mateo},
author = {George H. John and Pat Langley},
booktitle = {Eleventh Conference on Uncertainty in Artificial Intelligence},
pages = {338-345},
publisher = {Morgan Kaufmann},
title = {Estimating Continuous Distributions in Bayesian Classifiers},
year = {1995}
}
@inproceedings{katakis2008,
title={Multilabel text classification for automated tag suggestion},
author={Katakis, Ioannis and Tsoumakas, Grigorios and Vlahavas, Ioannis},
booktitle={Proceedings of the ECML/PKDD},
volume={18},
year={2008}
}
@article{keerthi2001,
author = {S.S. Keerthi and S.K. Shevade and C. Bhattacharyya and K.R.K. Murthy},
journal = {Neural Computation},
number = {3},
pages = {637-649},
title = {Improvements to Platt's SMO Algorithm for SVM Classifier Design},
volume = {13},
year = {2001}
}
@article{landwehr2005,
author = {Niels Landwehr and Mark Hall and Eibe Frank},
journal = {Machine Learning},
number = {1-2},
pages = {161-205},
title = {Logistic Model Trees},
volume = {95},
year = {2005}
}
@inproceedings{pestian2007,
title={A shared task involving multi-label classification of clinical free text},
author={Pestian, John P and Brew, Christopher and Matykiewicz, Pawe{\l} and Hovermale, Dj J and Johnson, Neil and Cohen, K Bretonnel and Duch, W{\l}odzis{\l}aw},
booktitle={Proceedings of the Workshop on BioNLP 2007: Biological, Translational, and Clinical Language Processing},
pages={97--104},
year={2007},
organization={Association for Computational Linguistics}
}
@incollection{platt1998,
author = {J. Platt},
booktitle = {Advances in Kernel Methods - Support Vector Learning},
editor = {B. Schoelkopf and C. Burges and A. Smola},
publisher = {MIT Press},
title = {Fast Training of Support Vector Machines using Sequential Minimal Optimization},
year = {1998}
}
@book{quinlan1993,
address = {San Mateo, CA},
author = {Ross Quinlan},
publisher = {Morgan Kaufmann Publishers},
title = {C4.5: Programs for Machine Learning},
year = {1993}
}
@article{schapire2000,
author = {Robert E. Schapire and Yoram Singer},
journal = {Machine Learning},
number = {2/3},
pages = {135-168},
title = {BoosTexter: A boosting-based system for text categorization},
volume = {39},
year = {2000}
}
@inproceedings{sumner2005,
author = {Marc Sumner and Eibe Frank and Mark Hall},
booktitle = {9th European Conference on Principles and Practice of Knowledge Discovery in Databases},
pages = {675-683},
publisher = {Springer},
title = {Speeding up Logistic Model Tree Induction},
year = {2005}
}
@inproceedings{trohidis2008,
title={Multi-label classification of music into emotions.},
author={Trohidis, Konstantinos and Tsoumakas, Grigorios and Kalliris, George and Vlahavas, Ioannis P},
booktitle={ISMIR},
volume={8},
pages={325--330},
year={2008}
}
@article{tsoumakas2007,
title={Multi-label classification: An overview},
author={Tsoumakas, Grigorios and Katakis, Ioannis},
journal={International Journal of Data Warehousing and Mining (IJDWM)},
volume={3},
number={3},
pages={1--13},
year={2007},
publisher={IGI Global}
}
@article{tsoumakas2011mulan,
author = "Grigorios Tsoumakas and Eleftherios Spyromitros-Xioufis and Jozef Vilcek and Ioannis Vlahavas",
title = "Mulan: A Java Library for Multi-Label Learning",
journal = "Journal of Machine Learning Research",
volume = "12",
year = "2011",
pages = "2411--2414",
}
@article{tsoumakas2011rakel,
author = {Grigorios Tsoumakas and Ioannis Katakis and Ioannis Vlahavas},
journal = {IEEE Transactions on Knowledge and Data Engineering},
number = {7},
pages = {1079-1089},
title = {Random k-Labelsets for Multi-Label Classification},
volume = {23},
year = {2011}
}
@article{turnbull2008,
title={Semantic annotation and retrieval of music and sound effects},
author={Turnbull, Douglas and Barrington, Luke and Torres, David and Lanckriet, Gert},
journal={IEEE Transactions on Audio, Speech, and Language Processing},
volume={16},
number={2},
pages={467--476},
year={2008},
publisher={IEEE}
}
@article{zhang2006,
author = {Zhang, M.L., Zhou, Z.H.},
journal = {IEEE Transactions on Knowledge and Data Engineering},
pages = {1338-1351},
title = {Multi-label neural networks with applications to functional genomics and text categorization},
volume = {18},
year = {2006}
}
@article{zhang2007,
address = {New York, NY, USA},
author = {Min-Ling Zhang and Zhi-Hua Zhou},
journal = {Pattern Recogn.},
number = {7},
pages = {2038--2048},
publisher = {Elsevier Science Inc.},
title = {ML-KNN: A lazy learning approach to multi-label learning},
volume = {40},
year = {2007},
ISSN = {0031-3203}
}
---
title: "Transparency vs Performances"
output:
html_document:
toc: true
code_folding: "hide"
bibliography: bibliography.bib
---
```{r include=FALSE}
library(ggplot2)
```
# Motivations
In these experiments we aimed to observe
variations of performances
of different multi-label classification
algorithms,
according to their degree of transparency.
These experiments was made using
the Mulan Java Library, developped by @tsoumakas2011mulan,
which contains plethora of classification algorithms,
but also tools to evaluate them.
# Performances
```{r loadCsv}
results <- read.csv("results/crossvalidation.csv", sep=";")
```
## Hamming Loss
According to @schapire2000, Hamming Loss of a
classifier $H$ on a dataset $D$ is defined as follow :
$$HammingLoss(H,D) = \frac{1}{|D|}\sum_{i=1}^{|D|}\frac{Y_i \Delta Z_i}{|L|}$$
Where $\Delta$ is corresponding to symmetric difference of
two sets of labels (equivalent to XOR operator).
The hamming loss is corresponding to the mean of errors by labels.
```{r hammingLoss}
ggplot(
results,
aes(
x=Dataset,
y=Hamming_Loss,
fill=Learner
)
) +
geom_bar(
stat = "identity",
color = "black",
position = position_dodge()
) +
geom_errorbar(
aes(
ymin=Hamming_Loss - Hamming_Loss_std,
ymax=Hamming_Loss + Hamming_Loss_std
),
width=.2,
position = position_dodge(.9)
) +
ylim(0.0,1.0) +
ggtitle("Hamming loss of multi-label classifier systems by dataset") +
theme(plot.title = element_text(size=14, face="bold", hjust=0.5))
```
# References
#!/usr/bin/env Rscript
rmarkdown::render("experiment.Rmd")
#!/usr/bin/env sh
cd api
mvn clean package
java -cp target/api-1.0-SNAPSHOT.jar com.lamsade.App
cd ../
./renderer.R
......@@ -8,6 +8,7 @@ let
# rmarkdown related packages
knitr
rmarkdown
ggplot2
# Rstudio related packages
# servr
];
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment