diff --git a/.gitignore b/.gitignore index 1262e3f..a09a5b8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,9 @@ # PDF files *.pdf +# Word files +*.docx + # TeX files *.tex diff --git a/.travis.yml b/.travis.yml index 119a233..27aca9a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,6 +7,7 @@ install: true script: true + after_success: - sudo apt-get install texlive-latex-extra - cd docs @@ -18,6 +19,13 @@ after_success: - cd .. && mv standalone/* . - make travis && cd .. +# Run tests on master & dev +branches: + only: + - master + - dev + +# Only deploy on master deploy: provider: releases api_key: "$GH_TOKEN" @@ -28,9 +36,6 @@ deploy: branch: - master -# Only run CI on master -branches: - only: - - master - - dev - +# Stop bothering me +notifications: + email: false diff --git a/docs/Makefile b/docs/Makefile index 0a55624..b101270 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -1,48 +1,69 @@ PDF := persimmon.pdf # PDF Main Target +MARKDOWN := introduction.md state_of_the_art.md objectives.md risk_analysis.md \ + implementation.md postmortem.md # Markdown files +BODY := body.tex # Markdown files will be converted to this intermediate step +#APPENDICES := appendixX.md # Markdown Appendices +#APPENDIX := appendix.tex # And appendices to this intermediate step +# METADATA := metadata.yaml # Metadata files (Author, Date, Title, etc..) +BIBLIOGRAPHY := persimmon.bib # BibLaTeX bibliography CSL := emerald-harvard.csl # CSL file used for citations TEMPLATE := template.tex # LaTeX template for producing PDF -BIBLIOGRAPHY := persimmon.bib # BibLaTeX bibliography -MARKDOWN := chapter0.md chapter1.md chapter2.md chapter3.md # Markdown files -GRAPHS := $(wildcard graphs/*.tex) -IMAGES := $(GRAPHS:.tex=.pdf) -IMAGES += $(wildcard graphs/*.png) # Standalone pictures to be inserted -LATEXHEADERS := latexheaders.tex # Additional LaTeX headers -METADATA := metadata.yaml # Metadata files (Author, Date, Title, etc..) - -all: pdf - -pdf: $(MARKDOWN) $(BIBLIOGRAPHY) $(CSL) $(TEMPLATE) $(IMAGES) $(METADATA) - pandoc --standalone --smart --latex-engine xelatex --template $(TEMPLATE) \ - --top-level-division chapter --bibliography $(BIBLIOGRAPHY) --csl $(CSL) \ - --include-in-header $(LATEXHEADERS) $(METADATA) $(MARKDOWN) -o $(PDF) - -travis: $(IMAGES) - pandoc --standalone --smart --latex-engine xelatex --template $(TEMPLATE) \ - --chapters --bibliography $(BIBLIOGRAPHY) --csl $(CSL) \ - --include-in-header $(LATEXHEADERS) $(METADATA) $(MARKDOWN) -o $(PDF) - -# For standalone images (Not used) + +GRAPHS := $(wildcard graphs/*.tex) # Latex diagrams +IMAGES := $(wildcard graphs/*.png) # .png images +IMAGES += $(GRAPHS:.tex=.pdf) # Generated PDF Images + +all: $(PDF) + +# Main PDF, travis ci and book to print version +$(PDF): $(BODY) $(TEMPLATE) $(IMAGES) # TODO: Add abstract + pandoc --smart --standalone --latex-engine xelatex --template $(TEMPLATE) \ + --metadata author:"Álvaro Bermejo" \ + --metadata date:"$(shell date +"%d/%m/%Y") ($(shell git describe --abbrev=0 --tags))" \ + --metadata title:"Persimmon" --metadata fontsize:"12pt" --toc \ + --metadata subtitle:"A scikitlearn visual programming interface" \ + --metadata mainlang:"English" --metada keywords:"Machine Learning","Visual Programming" \ + --metadata papersize:"A4" --metadata sansfont:"Helvetica Neue LT Com" \ + --metadata colorlinks --metadata documentclass:"scrreprt" \ + --top-level-division chapter $(BODY) -o $@ + +# Main PDF, travis ci and book to print version +travis: $(BODY) $(APPENDIX) $(TEMPLATE) $(IMAGES) + pandoc --smart --standalone --latex-engine xelatex --template $(TEMPLATE) \ + --metadata author:"Álvaro Bermejo" \ + --metadata date:"$(shell date +"%d/%m/%Y") ($(shell git describe --abbrev=0 --tags))" \ + --metadata title:"Persimmon" --metadata fontsize:"12pt" --toc \ + --metadata subtitle:"A sklearn visual programming interface" \ + --metadata mainlang:"English" --metada keywords:"Machine Learning","Visual Programming" \ + --metadata papersize:"A4" \ + --metadata colorlinks --metadata documentclass:"scrreprt" \ + --chapters $(BODY) $(APPENDIX) -o $(PDF) + + +book_complu: $(BODY) $(APPENDIX) $(TEMPLATE) $(IMAGES) + pandoc --smart --standalone --latex-engine xelatex --template $(TEMPLATE) \ + --metadata author:"Álvaro Bermejo" --metadata date:"Director: Pablo Moreno Ger" \ + --metadata title:"Persimmon" --metadata fontsize:"12pt" --toc \ + --metadata subtitle:"A scikitlearn visual programming interface" \ + --metadata mainlang:"English" \ + --metadata papersize:"A4" --metadata sansfont:"Helvetica Neue LT Com" \ + --metadata documentclass:"scrbook" --metadata institute:"Universidad Complutense" \ + --top-level-division chapter $(BODY) $(APPENDIX) -o bool_$(PDF) + + +# For standalone images graphs/%.pdf: graphs/%.tex - xelatex $< - -## Splitted creation (Not currently working) -#CHAPTERS := $(MARKDOWN:.md=.tex) # LaTeX Chapters -#GRAPHS := $(wildcard graphs/*.tex) -#IMAGES += $(GRAPHS:.tex=.pdf) -# -##splitted: $(CHAPTERS) $(BIBLIOGRAPHY) $(CSL) $(TEMPLATE) -# pandoc --standalone --smart --latex-engine xelatex --template $(TEMPLATE) \ -# --top-level-division chapter --bibliography $(BIBLIOGRAPHY) --csl $(CSL) \ -# $(CHAPTERS) -o $(PDF) -# -#%.tex: metadata.yaml %.md -# pandoc --no-tex-ligatures metadata.yaml $*.md -o $@ -# -## For standalone images (Not used) -#graphs/%.pdf: graphs/%.tex -# xelatex $< -# + xelatex $< > /dev/null # TODO: actually output in graphs directory + mv $*.pdf graphs/ + +# Body and Appendices Middle Steps creation +$(BODY): $(MARKDOWN) + pandoc --no-tex-ligatures --bibliography $(BIBLIOGRAPHY) --csl $(CSL) \ + metadata.yaml $(MARKDOWN) -o $@ + +$(APPENDIX): $(APPENDICES) + pandoc --no-tex-ligatures $(APPENDICES) -o $@ clean: - rm -f *.pdf chapter?.tex *.log *.aux *.png + rm -f $(BODY) $(APPENDIX) graphs/*.pdf *.pdf *.log *.aux diff --git a/docs/appendixX.md b/docs/appendixX.md new file mode 100644 index 0000000..f17aa18 --- /dev/null +++ b/docs/appendixX.md @@ -0,0 +1,30 @@ +Appendix X: How was this document made? +======================================= + +This document was written on Markdown, and converted to PDF +using Pandoc. + +Process +------- +Document is written on Pandoc's extended Markdown, and can be broken amongst +different files. Images are inserted with regular Markdown syntax for images. +A YAML file with metadata information is passed to pandoc, containing things +such as Author, Title, font, etc... The use of this information depends on +what output we are creating and the template/reference we are using. + + +Diagrams +-------- +Diagrams are were created with LaTeX packages such as tikz or pgfgantt, they +can be inserted directly as PDF, but if we desire to output to formats other +than LaTeX is more convenient to convert them to .png filesi with tools such +as `pdftoppm`. + + +References +------------ +References are handled by pandoc-citeproc, we can write our bibliography in +a myriad of different formats: bibTeX, bibLaTeX, JSON, YAML, etc..., then +we reference in our markdown, and that reference works for multiple formats + + diff --git a/docs/chapter0.md b/docs/chapter0.md deleted file mode 100644 index 3d92ad9..0000000 --- a/docs/chapter0.md +++ /dev/null @@ -1,44 +0,0 @@ -Introduction -============ - -Description ------------ -Persimmon is a visual programming interface for sklearn. - -This projects touches on a variety of CS topics, such as User Experience (Main -topic as the project is driven by the users feedback and engagement with the -project), Software Engineering (We have to interact with already built software, -using interfaces and organizing code through object-oriented techniques), -Compilers (Language parsing and transpilers) and a number of tangentially -related topics such as Machine Learning, I/O, preprocessing, etc. - -Motivation ----------- -After learning about Machine Learning this past year I was able to win a -Kaggle like [^1] competition and got an internship working for a company on the -algorithmic trading sector. - -There, amongst other duties, I aided with moving the codebase from MATLAB to -Python, and during that process I realised many of my co-workers struggled with -the move, as they were not computer scientists, but came from a variety of -backgrounds such as Maths, Physics, Electric Engineering, Statistics, etc... - -Yet they were the whole of the department, as this topic requires a high level -of theoretical maths knowledge, and so happens that these subjects tend to not -have a lot of general programming skills, they mostly work with specialized -languages, tailored to these tasks such as MATLAB, R, Julia, etc, and moving to -a general purpose language such as Python involves learning about a plethora of -additional topics. - -The situation is even more complicated for newcomers to Machine Learning from -these backgrounds, as they not only have the programming barrier but also have -to overcome the difficulties of the algorithms themselves, something Computer -Scientists also struggle with (In many cases even more because their weaker -maths skills) - -So this project servers a double purpose, it helps with the programming barrier, -and it aids with the Machine Learning process as it allows the learner to focus -on the connections, intuitions and mathematical basis and not on the -implementation details and the quirks of the concrete language. - -[^1]: [Kaggle.com](https://www.kaggle.com/) diff --git a/docs/graphs/early_interface.png b/docs/graphs/early_interface.png new file mode 100644 index 0000000..be80248 Binary files /dev/null and b/docs/graphs/early_interface.png differ diff --git a/docs/graphs/filechooser.png b/docs/graphs/filechooser.png new file mode 100644 index 0000000..a3a48b8 Binary files /dev/null and b/docs/graphs/filechooser.png differ diff --git a/docs/graphs/objectives.tex b/docs/graphs/objectives.tex index 0e3db83..3f7699c 100644 --- a/docs/graphs/objectives.tex +++ b/docs/graphs/objectives.tex @@ -9,7 +9,7 @@ \draw (-1, 2.8) rectangle (1, 3.5) node[midway, gray] {Parity}; \draw [->, gray, thick] (0, 3.6) -- (0, 4.3); \draw (-1, 4.4) rectangle (1, 5.1) node[midway, gray] {Compilation}; - \draw [red, dashed] (-3, 5.5) -- (3, 5.5) node[below left] {Out of scope}; + \draw [red, dashed] (4, 5.35) -- (-4, 5.35) node[above right] {Out of scope}; \draw [->, gray, thick] (-0.1, 5.2) -- (-2, 5.9); \draw [->, gray, thick] (0.1, 5.2) -- (2, 5.9); \draw (-3, 6) rectangle (-1, 6.7) node[midway, gray] {Web}; diff --git a/docs/implementation.md b/docs/implementation.md new file mode 100644 index 0000000..b34286d --- /dev/null +++ b/docs/implementation.md @@ -0,0 +1,11 @@ +Implementation +============== + +First Iteration +--------------- + +![Early "static" interface](graphs/early_interface.png) + +![File chooser](graphs/filechooser.png) + + diff --git a/docs/introduction.md b/docs/introduction.md new file mode 100644 index 0000000..a28dde6 --- /dev/null +++ b/docs/introduction.md @@ -0,0 +1,48 @@ +Introduction +============ + +Description +----------- +Persimmon is a visual programming interface for sklearn. + +This projects involves a variety of Computer Science topics, such as User +Experience (Main topic as the project is driven by the users feedback and +engagement with the project), Machine Learning (We don't write the algorithms, +but need extensive knowledge of them to surface all their options) Software +Engineering (We have to interact with already built software, using interfaces +and organizing code through object-oriented techniques), Compilers (Language +parsing and transpilers) and a number of tangentially related topics such as +Machine Learning, I/O, preprocessing of data, etc. + +Motivation +---------- +After learning about Machine Learning on university last year I was able to get +an internship working for a company on the algorithmic trading sector.. + +There, amongst other duties, I aided with moving the codebase from MATLAB to +Python, and during that process I realised many of my co-workers struggled with +the switch. All of the were not computer scientists, but instead came from a +variety of backgrounds such as Maths, Physics, Electric Engineering, +Statistics or Aerospacial Engineering. + +Yet they were the whole of the department, their work requires a very high +level of theoretical maths knowledge, and so happens that these experts from +these fields tend to not have a lot of general programming skills, they mostly +work with specialized languages, tailored to these tasks such as MATLAB, R, +Julia, etc, and moving to a general purpose language such as Python involves +learning about a plethora of additional topics, such as Object Oriented +Programming, custom complex Datastructures or CPU cache optimization. + +The situation is even more complicated for newcomers to Machine Learning, as +they not only have the programming barrier but also have to overcome the +difficulties of the algorithms themselves, something Computer Scientists also +struggle with (In many cases even more because their weaker maths skills). + +So this project serves a double purpose, it helps with the programming barrier, +and it aids with the Machine Learning process as it allows the learner to focus +on the connections, intuitions and mathematical basis and not on the +implementation details and the quirks of the concrete language. + +This hypothesis that visual learning can improve understanding is supported by +numerous sources such as [@fry2007visualizing] and [@principles]. + diff --git a/docs/latexheaders.tex b/docs/latexheaders.tex deleted file mode 100644 index 6b69687..0000000 --- a/docs/latexheaders.tex +++ /dev/null @@ -1,2 +0,0 @@ -\usepackage{tikz} -\usepackage{pgfgantt} diff --git a/docs/metadata.yaml b/docs/metadata.yaml index ada89aa..f9e2be3 100644 --- a/docs/metadata.yaml +++ b/docs/metadata.yaml @@ -1,10 +1,18 @@ --- -author: "Álvaro Bermejo" +author: Álvaro Bermejo +date: 2017-01-01 title: "Persimmon: a visual interface for sklearn" -papersize: "A4" -fontsize: "12pt" -mainlang: "English" -toc: yes +subtitle: A scikit-learn visual prgramming interface +version: 0.2 +sansfont: Helvetica Neue LT Com +colorlinks: True +documentclass: scrreprt +institute: Universidad Complutense and University of Hertforshire +papersize: A4 +fontsize: 12pt +mainlang: English +toc: True +top-level-division: chapter --- diff --git a/docs/chapter2.md b/docs/objectives.md similarity index 68% rename from docs/chapter2.md rename to docs/objectives.md index 4323ec1..87ddfad 100644 --- a/docs/chapter2.md +++ b/docs/objectives.md @@ -1,11 +1,12 @@ Objectives ========== -The best way we can describe the project is by dividing the objectives. -and the best way to understand the progression of those and their relation + +The best way to describe the project is by dividing the objectives. +And the best way to understand the progression of those and their relation is with a diagram. -![Objectives Tree](objectives.pdf) +![Objectives Tree](graphs/objectives.pdf) **Capped** is more than a minimum viable product, a extensive proof-of-concept, with a few limited algorithms and the ability of inputing `.csv` files. with a @@ -16,11 +17,15 @@ buttons. interaction. we don't really care much about having the same number of underlying algorithms because that's not the focus of the project. -And the final objective is **compilation**, the ability to get the python +And the final objective is **Compilation**, the ability to get the python source code from the visual representation. also improving the interface to have a better flow, such as in unreal blueprints, which provide a very intuitive interface [@shah2014mastering]. +This milestone would bring Persimmon utility outside just the realm of +learning, as it would be a convenince tool for the exploratory work of any +ML solution (Business case, a Kaggle[^kaggle] competition, etc... + Out of scope, but further applications of the system are **web/junyper** integration that means the system would be accesible from a website interface, and script **synthesization**, which is the opposite of compilation, meaning @@ -28,9 +33,10 @@ the ability to visualize on persimmon a python source file. Now that we understand the objectives we can draw a much detailed gantt diagram. -![Gantt Diagram](gantt.pdf) +![Gantt Diagram](graphs/gantt.pdf) We ommited previous months that included idea refinement but are not interesting for us. +[^kaggle]: [Kaggle.com](https://www.kaggle.com/) diff --git a/docs/postmortem.md b/docs/postmortem.md new file mode 100644 index 0000000..f7482ab --- /dev/null +++ b/docs/postmortem.md @@ -0,0 +1,6 @@ +Postmortem +========== + +Bibliography +============ + diff --git a/docs/chapter3.md b/docs/risk_analysis.md similarity index 95% rename from docs/chapter3.md rename to docs/risk_analysis.md index 2b61dfd..317b8a9 100644 --- a/docs/chapter3.md +++ b/docs/risk_analysis.md @@ -22,7 +22,7 @@ Prevention & Mitigation ----------------------- | Risk Factor | Low Impact | Medium Impact | High Impact | -|--------------|--------------------|----------------------|------------------| +|:------------ |:------------------:|:--------------------:|:----------------:| | Requirements | Not defined enough | Change at late stage | Unreachable goal | | Technology | Performance issues | Interoperability | Major errors | @@ -52,6 +52,3 @@ analysis of the capabilities of the platform must be done before starting the project, identifying possible faults and providing possible solutions and or alternatives. - -Bibliography -============ diff --git a/docs/chapter1.md b/docs/state_of_the_art.md similarity index 58% rename from docs/chapter1.md rename to docs/state_of_the_art.md index 346db5e..bd1fe3b 100644 --- a/docs/chapter1.md +++ b/docs/state_of_the_art.md @@ -1,14 +1,46 @@ -State-of-the-art +State of the art ================ -In this chapter we will try to explain the main ideas and reasons behind our -research project and the technologies behind it. -First of all we knew we wanted to do something related to data science so we -started looking around what we could do that is both different and improves on -what already exists. -We found that there weren't many open source software with a high level of -complexity that at the same time were easy to use for beginners so anyone even -without having a deep knowledge in data science could use it. -Here is where we thought about contacting the "_e-learning UCM_" research group + +> Those who forget the past are condemned to +> repeat their mistakes in the future. +Some dude a long time ago. + +Before we start working on Persimmon let's take a look at the previous works +for both inspiration and avoiding pitfalls. + +Azure ML +-------- +The most obvious inspiration and arguably the most successful, is undeniable +that Microsoft product managed to hit the market with a product nobody know +they wanted but everybody needed. As a platform has a lot that we like, +a lot of different preprocessing steps, runs on the cloud, has a web interface. + + +But some of these features are also shortcomings, the web interface feels a +bit clunky, low FPS and lack of native support means that dragging and dropping +don't feel as smooth as they should. Cloud support is very good, but for +sensitive such as financial or medical data a self hosted version is a must. +The variety of algorithms is interesting, but the lack of ability to extend +them is a pity, we know that azure is written on R, but because is closed +source we can't extend the code in any meaningful way. + + +Unreal Engine 4 +--------------- +This one may be a bit unexpected, but the inspiration here comes from the +Blueprint system. Arguably the best visual programming interface to come on the +last years, it strives to be a complete programming language, even going as far +as presenting conditionals as blocks. + + +The flow of the interface is impressive, when one cable is dragged from a block +a prompt appears with only the blocks that make sense to be connected to the +previous block. This little feature makes creating complex programs a breeze, +allowing the user to forget about the exact details of the API. + + + +Here is where we thought about contacting the *"e-learning UCM"* research group at our university because we saw an opportunity to bring the power of the data science to the educational world, in this case via the educative games. We thought that was a perfect idea because we will try to help them with a tool diff --git a/docs/template.tex b/docs/template.tex index bba5b90..721e464 100644 --- a/docs/template.tex +++ b/docs/template.tex @@ -192,6 +192,13 @@ $if(title)$ \title{$title$$if(thanks)$\thanks{$thanks$}$endif$} + % Let's make titles better + %\newfontfamily{\titlefont} + % [UprightFont = {* 43 Light Extended}, + % BoldFont = {* 107 Extra Black Condensed}]{Helvetica Neue LT Com} + + \usepackage{titling} + \renewcommand{\maketitlehooka}{\sffamily} $endif$ $if(subtitle)$ \providecommand{\subtitle}[1]{} @@ -222,14 +229,16 @@ $endif$ $if(abstract)$ \begin{abstract} - $abstract$ + $abstract$ \end{abstract} $endif$ + + {\bfseries Keywords:}$for(keywords)$ $keywords$ $sep$,$endfor$. $for(include-before)$ - $include-before$ - + $include-before$ $endfor$ + $if(toc)${ $if(colorlinks)$ \hypersetup{linkcolor=$if(toccolor)$$toccolor$$else$black$endif$} diff --git a/persimmon/persimmon.kv b/persimmon/persimmon.kv new file mode 100644 index 0000000..31ac249 --- /dev/null +++ b/persimmon/persimmon.kv @@ -0,0 +1,19 @@ +#:kivy 1.0.9 + +: + FileChooser: + FileChooserIconLayout + +: + BoxLayout: + orientation: 'vertical' + #pos: root.pos + BoxLayout: + size_hint_y: None + Button: + text: 'Cancel' + on_release: root.cancel() + Button: + text: 'Load' + on_release: root.load(filechooser.path, filechooser.selection) + diff --git a/persimmon/persimmon.py b/persimmon/persimmon.py new file mode 100644 index 0000000..fad869b --- /dev/null +++ b/persimmon/persimmon.py @@ -0,0 +1,27 @@ +import kivy +kivy.require("1.9.1") + +from kivy.app import App +from kivy.uix.label import Label +from kivy.uix.floatlayout import FloatLayout +from kivy.uix.filechooser import FileChooserIconView +from kivy.uix.popup import Popup +from kivy.uix.button import Button +from kivy.properties import ObjectProperty + +class MainScreen(FloatLayout): + layout_content = ObjectProperty(None) + + def __init__(self, **kwargs): + super(MainScreen, self).__init__(**kwargs) + +class FileView(Popup): + load = ObjectProperty(None) + cancel = ObjectProperty(None) + +class Persimmon(App): + def build(self): + return MainScreen() + +if __name__ == '__main__': + Persimmon().run() diff --git a/persimmon/proveApp.py b/persimmon/proveApp.py index 9b81e3b..b2bc4c4 100644 --- a/persimmon/proveApp.py +++ b/persimmon/proveApp.py @@ -67,5 +67,3 @@ class proveApp(App): def build(self): return MainScreen() -prove = proveApp() -prove.run() \ No newline at end of file diff --git a/setup.py b/setup.py index 52316a3..4e5aacc 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ version = '0.1', description = 'A visual interface for sklearn', url = 'http://github.com/alvarber/Persimmon', - author = ['Álvaro Bermejo', 'Pedro García'], + author = 'Álvaro Bermejo', author_email = 'alvaro.garcia95@hotmail.com', license = 'MIT', packages = find_packages(),