Add slides

master
J. Fernando Sánchez 6 years ago
parent 3f287c88b3
commit 83b7b60897

1
slides/.gitignore vendored

@ -0,0 +1 @@
reveal.js

Binary file not shown.

After

Width:  |  Height:  |  Size: 85 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 122 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 41 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 39 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 178 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 37 KiB

@ -0,0 +1,60 @@
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN" "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:inkspace="http://www.inkscape.org/namespaces/inkscape" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 640 520">
<defs id="defs_block">
<filter height="1.504" id="filter_blur" width="1.1575" x="-0.07875" y="-0.252">
<feGaussianBlur id="feGaussianBlur3780" stdDeviation="4.2"/>
</filter>
</defs>
<title>blockdiag</title>
<desc/>
<rect fill="rgb(0,0,0)" height="40" stroke="rgb(0,0,0)" style="filter:url(#filter_blur);opacity:0.7;fill-opacity:1" width="128" x="67" y="46"/>
<rect fill="rgb(0,0,0)" height="40" stroke="rgb(0,0,0)" style="filter:url(#filter_blur);opacity:0.7;fill-opacity:1" width="128" x="67" y="126"/>
<rect fill="rgb(0,0,0)" height="40" stroke="rgb(0,0,0)" style="filter:url(#filter_blur);opacity:0.7;fill-opacity:1" width="128" x="67" y="206"/>
<rect fill="rgb(0,0,0)" height="40" stroke="rgb(0,0,0)" style="filter:url(#filter_blur);opacity:0.7;fill-opacity:1" width="128" x="67" y="286"/>
<rect fill="rgb(0,0,0)" height="40" stroke="rgb(0,0,0)" style="filter:url(#filter_blur);opacity:0.7;fill-opacity:1" width="128" x="259" y="286"/>
<rect fill="rgb(0,0,0)" height="40" stroke="rgb(0,0,0)" style="filter:url(#filter_blur);opacity:0.7;fill-opacity:1" width="128" x="67" y="366"/>
<rect fill="rgb(0,0,0)" height="40" stroke="rgb(0,0,0)" style="filter:url(#filter_blur);opacity:0.7;fill-opacity:1" width="128" x="259" y="366"/>
<rect fill="rgb(0,0,0)" height="40" stroke="rgb(0,0,0)" style="filter:url(#filter_blur);opacity:0.7;fill-opacity:1" width="128" x="451" y="366"/>
<rect fill="rgb(0,0,0)" height="40" stroke="rgb(0,0,0)" style="filter:url(#filter_blur);opacity:0.7;fill-opacity:1" width="128" x="451" y="446"/>
<rect fill="rgb(255,255,255)" height="40" stroke="rgb(0,0,0)" width="128" x="64" y="40"/>
<text fill="rgb(0,0,0)" font-family="sans-serif" font-size="8" font-style="normal" font-weight="normal" text-anchor="middle" textLength="44" x="128" y="64">ubuntu@SHA</text>
<rect fill="rgb(255,255,255)" height="40" stroke="rgb(0,0,0)" width="128" x="64" y="120"/>
<text fill="rgb(0,0,0)" font-family="sans-serif" font-size="8" font-style="normal" font-weight="normal" text-anchor="middle" textLength="58" x="128" y="144">base-notebook</text>
<rect fill="rgb(255,255,255)" height="40" stroke="rgb(0,0,0)" width="128" x="64" y="200"/>
<text fill="rgb(0,0,0)" font-family="sans-serif" font-size="8" font-style="normal" font-weight="normal" text-anchor="middle" textLength="71" x="128" y="224">minimal-notebook</text>
<rect fill="rgb(255,255,255)" height="40" stroke="rgb(0,0,0)" width="128" x="64" y="280"/>
<text fill="rgb(0,0,0)" font-family="sans-serif" font-size="8" font-style="normal" font-weight="normal" text-anchor="middle" textLength="62" x="128" y="304">scipy-notebook</text>
<rect fill="rgb(255,255,255)" height="40" stroke="rgb(0,0,0)" width="128" x="256" y="280"/>
<text fill="rgb(0,0,0)" font-family="sans-serif" font-size="8" font-style="normal" font-weight="normal" text-anchor="middle" textLength="44" x="320" y="304">r-notebook</text>
<rect fill="rgb(255,255,255)" height="40" stroke="rgb(0,0,0)" width="128" x="64" y="360"/>
<text fill="rgb(0,0,0)" font-family="sans-serif" font-size="8" font-style="normal" font-weight="normal" text-anchor="middle" textLength="84" x="128" y="384">tensorflow-notebook</text>
<rect fill="rgb(255,255,255)" height="40" stroke="rgb(0,0,0)" width="128" x="256" y="360"/>
<text fill="rgb(0,0,0)" font-family="sans-serif" font-size="8" font-style="normal" font-weight="normal" text-anchor="middle" textLength="88" x="320" y="384">datascience-notebook</text>
<rect fill="rgb(255,255,255)" height="40" stroke="rgb(0,0,0)" width="128" x="448" y="360"/>
<text fill="rgb(0,0,0)" font-family="sans-serif" font-size="8" font-style="normal" font-weight="normal" text-anchor="middle" textLength="71" x="512" y="384">pyspark-notebook</text>
<rect fill="rgb(255,255,255)" height="40" stroke="rgb(0,0,0)" width="128" x="448" y="440"/>
<text fill="rgb(0,0,0)" font-family="sans-serif" font-size="8" font-style="normal" font-weight="normal" text-anchor="middle" textLength="80" x="512" y="464">all-spark-notebook</text>
<path d="M 128 80 L 128 112" fill="none" stroke="rgb(0,0,0)"/>
<polygon fill="rgb(0,0,0)" points="128,119 124,112 132,112 128,119" stroke="rgb(0,0,0)"/>
<path d="M 128 160 L 128 192" fill="none" stroke="rgb(0,0,0)"/>
<polygon fill="rgb(0,0,0)" points="128,199 124,192 132,192 128,199" stroke="rgb(0,0,0)"/>
<path d="M 128 240 L 128 260" fill="none" stroke="rgb(0,0,0)"/>
<path d="M 128 260 L 320 260" fill="none" stroke="rgb(0,0,0)"/>
<path d="M 320 260 L 320 272" fill="none" stroke="rgb(0,0,0)"/>
<polygon fill="rgb(0,0,0)" points="320,279 316,272 324,272 320,279" stroke="rgb(0,0,0)"/>
<path d="M 128 240 L 128 272" fill="none" stroke="rgb(0,0,0)"/>
<polygon fill="rgb(0,0,0)" points="128,279 124,272 132,272 128,279" stroke="rgb(0,0,0)"/>
<path d="M 128 320 L 128 352" fill="none" stroke="rgb(0,0,0)"/>
<polygon fill="rgb(0,0,0)" points="128,359 124,352 132,352 128,359" stroke="rgb(0,0,0)"/>
<path d="M 128 320 L 128 340" fill="none" stroke="rgb(0,0,0)"/>
<path d="M 128 340 L 320 340" fill="none" stroke="rgb(0,0,0)"/>
<path d="M 320 340 L 320 352" fill="none" stroke="rgb(0,0,0)"/>
<polygon fill="rgb(0,0,0)" points="320,359 316,352 324,352 320,359" stroke="rgb(0,0,0)"/>
<path d="M 128 320 L 128 340" fill="none" stroke="rgb(0,0,0)"/>
<path d="M 128 340 L 512 340" fill="none" stroke="rgb(0,0,0)"/>
<path d="M 512 340 L 512 352" fill="none" stroke="rgb(0,0,0)"/>
<polygon fill="rgb(0,0,0)" points="512,359 508,352 516,352 512,359" stroke="rgb(0,0,0)"/>
<path d="M 512 400 L 512 432" fill="none" stroke="rgb(0,0,0)"/>
<polygon fill="rgb(0,0,0)" points="512,439 508,432 516,432 512,439" stroke="rgb(0,0,0)"/>
</svg>

After

Width:  |  Height:  |  Size: 5.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 58 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 58 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 332 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 176 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 109 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 119 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 80 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 141 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 90 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 314 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 121 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 222 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 171 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 109 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 36 KiB

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 560 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 41 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 27 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 190 KiB

@ -0,0 +1 @@
slides.html

@ -0,0 +1,6 @@
#!/bin/sh
#pandoc -t beamer slides.md --slide-level=2 -o slides.pdf -V theme:metropolis --toc --toc-depth 1 "$@"
pandoc -t revealjs -s --dpi 120 slides.md --css=style.css --slide-level=2 -o slides.html -V theme=white -V height=800 "$@"
echo "Syncing $PWD"
rsync -Paz "$PWD/" -e 'ssh -p 2222' root@slides.todevnull.com:/data/html/
echo 'Done'

@ -0,0 +1,3 @@
#!/bin/sh
echo 'slides.md' | entr ./run.sh

@ -0,0 +1,377 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="generator" content="pandoc">
<meta name="author" content="J. Fernando Sánchez (jf.sanchez@upm)">
<meta name="dcterms.date" content="2018-01-01">
<title>Docker for research</title>
<meta name="apple-mobile-web-app-capable" content="yes">
<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent">
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no, minimal-ui">
<link rel="stylesheet" href="reveal.js/css/reveal.css">
<style type="text/css">
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
span.underline{text-decoration: underline;}
div.column{display: inline-block; vertical-align: top; width: 50%;}
</style>
<style type="text/css">
a.sourceLine { display: inline-block; line-height: 1.25; }
a.sourceLine { pointer-events: none; color: inherit; text-decoration: inherit; }
a.sourceLine:empty { height: 1.2em; position: absolute; }
.sourceCode { overflow: visible; }
code.sourceCode { white-space: pre; position: relative; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
code.sourceCode { white-space: pre-wrap; }
a.sourceLine { text-indent: -1em; padding-left: 1em; }
}
pre.numberSource a.sourceLine
{ position: relative; }
pre.numberSource a.sourceLine:empty
{ position: absolute; }
pre.numberSource a.sourceLine::before
{ content: attr(data-line-number);
position: absolute; left: -5em; text-align: right; vertical-align: baseline;
border: none; pointer-events: all;
-webkit-touch-callout: none; -webkit-user-select: none;
-khtml-user-select: none; -moz-user-select: none;
-ms-user-select: none; user-select: none;
padding: 0 4px; width: 4em;
color: #aaaaaa;
}
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; }
div.sourceCode
{ }
@media screen {
a.sourceLine::before { text-decoration: underline; }
}
code span.al { color: #ff0000; font-weight: bold; } /* Alert */
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
code span.at { color: #7d9029; } /* Attribute */
code span.bn { color: #40a070; } /* BaseN */
code span.bu { } /* BuiltIn */
code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
code span.ch { color: #4070a0; } /* Char */
code span.cn { color: #880000; } /* Constant */
code span.co { color: #60a0b0; font-style: italic; } /* Comment */
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
code span.do { color: #ba2121; font-style: italic; } /* Documentation */
code span.dt { color: #902000; } /* DataType */
code span.dv { color: #40a070; } /* DecVal */
code span.er { color: #ff0000; font-weight: bold; } /* Error */
code span.ex { } /* Extension */
code span.fl { color: #40a070; } /* Float */
code span.fu { color: #06287e; } /* Function */
code span.im { } /* Import */
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
code span.kw { color: #007020; font-weight: bold; } /* Keyword */
code span.op { color: #666666; } /* Operator */
code span.ot { color: #007020; } /* Other */
code span.pp { color: #bc7a00; } /* Preprocessor */
code span.sc { color: #4070a0; } /* SpecialChar */
code span.ss { color: #bb6688; } /* SpecialString */
code span.st { color: #4070a0; } /* String */
code span.va { color: #19177c; } /* Variable */
code span.vs { color: #4070a0; } /* VerbatimString */
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
</style>
<link rel="stylesheet" href="reveal.js/css/theme/white.css" id="theme">
<link rel="stylesheet" href="style.css"/>
<!-- Printing and PDF exports -->
<script>
var link = document.createElement( 'link' );
link.rel = 'stylesheet';
link.type = 'text/css';
link.href = window.location.search.match( /print-pdf/gi ) ? 'reveal.js/css/print/pdf.css' : 'reveal.js/css/print/paper.css';
document.getElementsByTagName( 'head' )[0].appendChild( link );
</script>
<!--[if lt IE 9]>
<script src="reveal.js/lib/js/html5shiv.js"></script>
<![endif]-->
</head>
<body>
<div class="reveal">
<div class="slides">
<section id="title-slide">
<h1 class="title">Docker for research</h1>
<p class="subtitle">… and data analysis</p>
<p class="author">J. Fernando Sánchez (<a href="mailto:jf.sanchez@upm">jf.sanchez@upm</a>)</p>
<p class="date">2018</p>
</section>
<section><section id="intro" class="title-slide slide level1 white" data-background="img/intro.jpg"><h1>Intro</h1></section><section id="before-we-begin" class="slide level2">
<h2>Before we begin</h2>
<p>Code available at:</p>
<p><a href="https://github.com/balkian/lab-in-a-box" class="uri">https://github.com/balkian/lab-in-a-box</a></p>
<p>Live demos at:</p>
<p><strong><a href="https://github.todevnull.com" class="uri">https://github.todevnull.com</a></strong></p>
<p><a href="https://lab.todevnull.com" class="uri">https://lab.todevnull.com</a></p>
<p><a href="https://hub.todevnull.com" class="uri">https://hub.todevnull.com</a></p>
<p>Feel free to log in, but try not to break them for now 😉</p>
</section><section id="my-name-is-fernando-and" class="slide level2">
<h2>My name is Fernando and…</h2>
<p><img data-src="img/im-a-researcher.jpg" /></p>
</section><section id="at-grupo-de-sistemas-inteligentes" class="slide level2">
<h2>At Grupo de Sistemas Inteligentes</h2>
<div class="columns">
<div class="column" style="width:50%;">
<p><img data-src="img/gsi.png" /></p>
</div><div class="column" style="width:50%;">
<ul>
<li>Machine Learning and Big Data</li>
<li>NLP and Sentiment Analysis</li>
<li>Social Network Analysis</li>
<li>Agents and Simulation</li>
<li>Linked Data and Semantic Technologies</li>
</ul>
</div>
</div>
<p><a href="http://www.gsi.dit.upm.es" class="uri">http://www.gsi.dit.upm.es</a></p>
</section><section id="and-i-docker" class="slide level2">
<h2>And I ❤ Docker</h2>
<div class="columns">
<div class="column" style="width:50%;">
<p><img data-src="img/docker.jpg" /></p>
</div><div class="column" style="width:50%;">
<ul>
<li>Docker+research for 3+ years</li>
<li>Advocate for ~2 years</li>
<li>Internal infrastructure: ansible, k8s and docker</li>
<li>Teach (with) it</li>
</ul>
</div>
</div>
</section><section id="about-this-talk" class="slide level2">
<h2>About this talk</h2>
<p>Takeaway: <strong><em>you can set up a multi-user data analysis environment with isolation in minutes</em></strong></p>
<p>Plus: using docker to perform and share experiments is even easier</p>
<p>Related Meetups:</p>
<p><a href="https://www.meetup.com/Docker-Madrid/events/240357800/">Big Data and Machine Learning with Docker</a></p>
<p><a href="https://www.meetup.com/Docker-Madrid/events/237067604/">Using Docker in Machine Learning Projects</a></p>
</section></section>
<section><section id="for-researchers" class="title-slide slide level1 white" data-background="img/research.jpg" style="color:white"><h1>For researchers</h1></section><section id="experiment-publish-repeat" class="slide level2">
<h2>Experiment, publish, repeat</h2>
<p><img data-src="img/peerreview.jpg" /></p>
</section><section id="reproducibility" class="slide level2">
<h2>Reproducibility</h2>
<figure>
<img data-src="img/goodluck.png" alt="@ianholmes" /><figcaption><a href="https://twitter.com/ianholmes/status/288689712636493824">@ianholmes</a></figcaption>
</figure>
</section><section id="obstacles" class="slide level2">
<h2>Obstacles</h2>
<div class="columns">
<div class="column" style="width:50%;">
<ul>
<li><strong>Missing data</strong></li>
<li>Bleeding edge tools and libraries</li>
<li>Throwaway software
<ul>
<li>Hacky</li>
<li>Little to no documentation</li>
</ul></li>
<li>Multiple languages</li>
</ul>
</div><div class="column" style="width:50%;">
<figure>
<img data-src="img/will_it_work.png" alt="https://xkcd.com/1742/" style="height:80.0%" /><figcaption><a href="https://xkcd.com/1742/" class="uri">https://xkcd.com/1742/</a></figcaption>
</figure>
</div>
</div>
</section><section id="obstacles-1" class="slide level2">
<h2>Obstacles</h2>
<p><img data-src="img/noidea-pc.png" /></p>
</section><section id="is-it-a-problem" class="slide level2">
<h2>Is it a problem?</h2>
<figure>
<img data-src="img/reproducibility.jpg" alt="https://www.nature.com/" style="height:80.0%" /><figcaption><a href="https://www.nature.com/news/1-500-scientists-lift-the-lid-on-reproducibility-1.19970">https://www.nature.com/</a></figcaption>
</figure>
</section><section id="jupyter-notebooks" class="slide level2">
<h2>Jupyter notebooks</h2>
<p><img data-src="img/jupyter-screenshot.png" /></p>
</section><section id="jupyter-architecture" class="slide level2">
<h2>Jupyter architecture</h2>
<figure>
<img data-src="img/jupyter-architecture.png" alt="http://jupyter.readthedocs.io" /><figcaption><a href="http://jupyter.readthedocs.io" class="uri">http://jupyter.readthedocs.io</a></figcaption>
</figure>
</section><section id="docker-to-the-rescue" class="slide level2">
<h2>Docker to the rescue</h2>
<figure>
<img data-src="img/dockerrescue.png" alt="towardsdatascience.com" /><figcaption><a href="https://towardsdatascience.com/how-docker-can-help-you-become-a-more-effective-data-scientist-7fc048ef91d5">towardsdatascience.com</a></figcaption>
</figure>
</section><section id="jupyterdocker-stacks" class="slide level2">
<h2>Jupyter/docker-stacks</h2>
<p><img data-src="img/dockerstacks.png" style="height:50.0%" /></p>
</section><section id="reproducible-environment" class="slide level2">
<h2>Reproducible environment</h2>
<div class="sourceCode" id="cb1"><pre class="sourceCode bash"><code class="sourceCode bash"><a class="sourceLine" id="cb1-1" data-line-number="1"><span class="ex">docker</span> run --rm -p 8888:8888 \</a>
<a class="sourceLine" id="cb1-2" data-line-number="2"> -v <span class="va">$(</span><span class="ex">WDIR</span><span class="va">)</span>/:/home/jovyan/work/ \</a>
<a class="sourceLine" id="cb1-3" data-line-number="3"> jupyter/scipy-notebook</a></code></pre></div>
</section><section id="and-friendly-too" class="slide level2">
<h2>And friendly, too</h2>
<div class="sourceCode" id="cb2"><pre class="sourceCode yaml"><code class="sourceCode yaml"><a class="sourceLine" id="cb2-1" data-line-number="1"><span class="fu">version:</span><span class="at"> </span><span class="st">&#39;2&#39;</span></a>
<a class="sourceLine" id="cb2-2" data-line-number="2"><span class="fu">services:</span></a>
<a class="sourceLine" id="cb2-3" data-line-number="3"> <span class="fu">jupyter:</span></a>
<a class="sourceLine" id="cb2-4" data-line-number="4"> <span class="fu">image:</span><span class="at"> jupyter/scipy-notebook</span></a>
<a class="sourceLine" id="cb2-5" data-line-number="5"> <span class="fu">volumes:</span></a>
<a class="sourceLine" id="cb2-6" data-line-number="6"> <span class="kw">-</span> <span class="st">&quot;./.nbconfig:/home/jovyan/.jupyter/nbconfig&quot;</span></a>
<a class="sourceLine" id="cb2-7" data-line-number="7"> <span class="kw">-</span> <span class="st">&quot;./work:/home/jovyan/work/&quot;</span></a>
<a class="sourceLine" id="cb2-8" data-line-number="8"> <span class="fu">ports:</span></a>
<a class="sourceLine" id="cb2-9" data-line-number="9"> <span class="kw">-</span> <span class="st">&quot;8888:8888&quot;&quot;</span></a></code></pre></div>
<div class="sourceCode" id="cb3"><pre class="sourceCode bash"><code class="sourceCode bash"><a class="sourceLine" id="cb3-1" data-line-number="1"><span class="ex">docker-compose</span> up</a></code></pre></div>
</section><section id="related-projects" class="slide level2">
<h2>Related projects</h2>
<ul>
<li>Using docker images to share trained systems</li>
</ul>
<figure>
<img data-src="img/gym.png" alt="https://gym.openai.com" height="500" /><figcaption><a href="https://gym.openai.com" class="uri">https://gym.openai.com</a></figcaption>
</figure>
</section></section>
<section><section id="for-small-groups" class="title-slide slide level1 white" data-background="img/group.jpeg"><h1>For small groups</h1></section><section id="requirements" class="slide level2">
<h2>Requirements</h2>
<ul>
<li>Shared environments</li>
<li>Resource sharing</li>
<li>Easy configuration</li>
<li>Versioning</li>
<li>Backups</li>
</ul>
<p>And <strong>little to no overhead</strong></p>
</section><section id="isolation" class="slide level2">
<h2>Isolation</h2>
<p><img data-src="img/noidea.jpg" /></p>
</section><section id="jupyterhub" class="slide level2">
<h2>Jupyterhub</h2>
<div class="columns">
<div class="column" style="width:60%;">
<figure>
<img data-src="img/jhub-parts.png" alt="http://jupyterhub.readthedocs.io/" height="500" /><figcaption><a href="http://jupyterhub.readthedocs.io/" class="uri">http://jupyterhub.readthedocs.io/</a></figcaption>
</figure>
</div><div class="column" style="width:40%;">
<h4 id="authenticators">Authenticators</h4>
<ul>
<li>Local</li>
<li>OAuth</li>
<li>LDAP</li>
<li>JWT</li>
</ul>
<h4 id="spawners">Spawners</h4>
<ul>
<li>Local</li>
<li>Docker</li>
<li>Kubernetes</li>
<li>Marathon</li>
</ul>
</div>
</div>
</section><section id="more-infrastructure" class="slide level2">
<h2>More infrastructure</h2>
<p><img data-src="img/docker-gitlab.jpg" class="noborder" height="250" /> <img data-src="img/nextcloud.jpg" class="noborder" height="250" /></p>
<p><img data-src="img/sharelatex.jpg" class="noborder" height="250" /></p>
</section></section>
<section><section id="demo" class="title-slide slide level1" data-background="img/party.jpg"><h1>Demo</h1></section><section id="its-demo-time" class="slide level2">
<h2>Its demo time</h2>
<p><img data-src="img/demogods.jpg" style="height:80.0%" /></p>
<p><a href="https://github.todevnull.com" class="uri">https://github.todevnull.com</a> <a href="https://github.com/balkian/lab-in-a-box" class="uri">https://github.com/balkian/lab-in-a-box</a></p>
</section></section>
<section><section id="other-tools" class="title-slide slide level1"><h1>Other tools</h1></section><section id="zeppelin" class="slide level2">
<h2>Zeppelin</h2>
<ul>
<li>Alternative to Jupyter</li>
</ul>
<figure>
<img data-src="img/zeppelin.png" alt="https://zeppelin.apache.org/" /><figcaption><a href="https://zeppelin.apache.org/" class="uri">https://zeppelin.apache.org/</a></figcaption>
</figure>
</section><section id="cocalc" class="slide level2">
<h2>CoCalc</h2>
<ul>
<li>Alternative to Jupyter</li>
</ul>
<figure>
<img data-src="img/cocalc.png" alt="https://cocalc.org/" height="500" /><figcaption><a href="https://cocalc.org/" class="uri">https://cocalc.org/</a></figcaption>
</figure>
</section><section id="docker-nvidia" class="slide level2">
<h2>Docker-Nvidia</h2>
<ul>
<li>CUDA for docker</li>
</ul>
<figure>
<img data-src="img/dockernvidia.png" alt="https://github.com/NVIDIA/nvidia-docker" /><figcaption><a href="https://github.com/NVIDIA/nvidia-docker" class="uri">https://github.com/NVIDIA/nvidia-docker</a></figcaption>
</figure>
</section><section id="jupyter-binder" class="slide level2">
<h2>Jupyter Binder</h2>
<ul>
<li>Custom Jupyter from git repositories</li>
</ul>
<figure>
<img data-src="img/binder.png" alt="https://mybinder.org/" height="500" /><figcaption><a href="https://mybinder.org/" class="uri">https://mybinder.org/</a></figcaption>
</figure>
</section><section id="knowledge-repo" class="slide level2">
<h2>Knowledge-Repo</h2>
<figure>
<img data-src="img/knowledgerepo.png" alt="http://knowledge-repo.readthedocs.io/" /><figcaption><a href="http://knowledge-repo.readthedocs.io/" class="uri">http://knowledge-repo.readthedocs.io/</a></figcaption>
</figure>
</section></section>
<section><section id="conclusions" class="title-slide slide level1"><h1>Conclusions</h1></section><section id="lessons-learned" class="slide level2">
<h2>Lessons learned</h2>
<ul>
<li>Docker + Docker-compose
<ul>
<li>Reproducible environments (partially)</li>
<li>Reduced tooling / experience</li>
<li>Ephemeral containers force you to automate/document installation</li>
</ul></li>
<li>Jupyterhub
<ul>
<li>Shared environments</li>
<li>Web interface (zero knowledge)</li>
</ul></li>
</ul>
</section><section id="whats-missing" class="slide level2">
<h2>Whats missing?</h2>
<ul>
<li>Roles and permissions</li>
<li><p>Backups</p></li>
<li>Ideas:
<ul>
<li>Kubernetes?</li>
<li>OpenShift?</li>
</ul></li>
</ul>
</section><section id="thanks-for-listening" class="slide level2">
<h2>Thanks for listening!</h2>
<p><a href="https://github.com/balkian/lab-in-a-box" class="uri">https://github.com/balkian/lab-in-a-box</a></p>
<p><a href="mailto:jf.sanchez@upm.es">jf.sanchez@upm.es</a></p>
</section></section>
</div>
</div>
<script src="reveal.js/lib/js/head.min.js"></script>
<script src="reveal.js/js/reveal.js"></script>
<script>
// Full list of configuration options available at:
// https://github.com/hakimel/reveal.js#configuration
Reveal.initialize({
// Push each slide change to the browser history
history: true,
height: 800,
// Optional reveal.js plugins
dependencies: [
{ src: 'reveal.js/lib/js/classList.js', condition: function() { return !document.body.classList; } },
{ src: 'reveal.js/plugin/zoom-js/zoom.js', async: true },
{ src: 'reveal.js/plugin/notes/notes.js', async: true }
]
});
</script>
</body>
</html>

@ -0,0 +1,306 @@
---
title: Docker for research
subtitle: ... and data analysis
author: J. Fernando Sánchez (<jf.sanchez@upm>)
tags: [Docker, CI, research]
date: 2018
abstract: Talk about docker for research and data analysis
---
# Intro { .white data-background="img/intro.jpg"}
## Before we begin
Code available at:
<https://github.com/balkian/lab-in-a-box>
Live demos at:
**<https://github.todevnull.com>**
<https://lab.todevnull.com>
<https://hub.todevnull.com>
Feel free to log in, but try not to break them for now 😉
## My name is Fernando and...
![](img/im-a-researcher.jpg)
## At Grupo de Sistemas Inteligentes
:::::::::::::: {.columns}
::: {.column width="50%"}
![](img/gsi.png)
:::
::: {.column width="50%"}
- Machine Learning and Big Data
- NLP and Sentiment Analysis
- Social Network Analysis
- Agents and Simulation
- Linked Data and Semantic Technologies
:::
:::::::::::::::
<http://www.gsi.dit.upm.es>
## And I ❤ Docker
:::::::::::::: {.columns}
::: {.column width="50%"}
![](img/docker.jpg)
:::
::: {.column width="50%"}
* Docker+research for 3+ years
* Advocate for ~2 years
* Internal infrastructure: ansible, k8s and docker
* Teach (with) it
:::
::::::::::::::
## About this talk
Takeaway: ***you can set up a multi-user data analysis environment with isolation in minutes***
Plus: using docker to perform and share experiments is even easier
Related Meetups:
[Big Data and Machine Learning with Docker](https://www.meetup.com/Docker-Madrid/events/240357800/)
[Using Docker in Machine Learning Projects](https://www.meetup.com/Docker-Madrid/events/237067604/)
# For researchers {.white data-background="img/research.jpg" style="color:white"}
<!-- ## Research is about data -->
<!-- ![The scientific method](img/scientificmethod.png){.noborder height="500px"} -->
## Experiment, publish, repeat
![](img/peerreview.jpg)
## Reproducibility
![[\@ianholmes](https://twitter.com/ianholmes/status/288689712636493824)](img/goodluck.png)
## Obstacles
:::::::::::::: {.columns}
::: {.column width="50%"}
* **Missing data**
* Bleeding edge tools and libraries
* Throwaway software
* Hacky
* Little to no documentation
* Multiple languages
:::
::: {.column width="50%"}
![<https://xkcd.com/1742/>](img/will_it_work.png){ height=80% }
:::
::::::::::::::
## Obstacles
![](img/noidea-pc.png)
## Is it a problem?
![[https://www.nature.com/](https://www.nature.com/news/1-500-scientists-lift-the-lid-on-reproducibility-1.19970)](img/reproducibility.jpg){ height=80% }
## Jupyter notebooks
![](img/jupyter-screenshot.png)
## Jupyter architecture
![<http://jupyter.readthedocs.io>](img/jupyter-architecture.png)
## Docker to the rescue
![[towardsdatascience.com](https://towardsdatascience.com/how-docker-can-help-you-become-a-more-effective-data-scientist-7fc048ef91d5)](img/dockerrescue.png)
## Jupyter/docker-stacks
![](img/dockerstacks.png){ height=50% }
## Reproducible environment
```bash
docker run --rm -p 8888:8888 \
-v $(WDIR)/:/home/jovyan/work/ \
jupyter/scipy-notebook
```
## And friendly, too
```yaml
version: '2'
services:
jupyter:
image: jupyter/scipy-notebook
volumes:
- "./.nbconfig:/home/jovyan/.jupyter/nbconfig"
- "./work:/home/jovyan/work/"
ports:
- "8888:8888""
```
```bash
docker-compose up
```
## Related projects
* Using docker images to share trained systems
![<https://gym.openai.com>](img/gym.png){ height=500px }
# For small groups { .white data-background="img/group.jpeg" }
## Requirements
* Shared environments
* Resource sharing
* Easy configuration
* Versioning
* Backups
And **little to no overhead**
## Isolation
![](img/noidea.jpg)
## Jupyterhub
:::::::::::::: {.columns}
::: {.column width="60%"}
![<http://jupyterhub.readthedocs.io/>](img/jhub-parts.png){ height=500px }
:::
::: {.column width="40%"}
#### Authenticators
* Local
* OAuth
* LDAP
* JWT
#### Spawners
* Local
* Docker
* Kubernetes
* Marathon
:::
:::::::::::::::
## More infrastructure
![](img/docker-gitlab.jpg){.noborder height="250px"}
![](img/nextcloud.jpg){.noborder height="250px"}
![](img/sharelatex.jpg){.noborder height="250px"}
# Demo { data-background="img/party.jpg"}
## It's demo time
![](img/demogods.jpg){ height=80% }
<https://github.todevnull.com>
<https://github.com/balkian/lab-in-a-box>
# Other tools
## Zeppelin
* Alternative to Jupyter
![<https://zeppelin.apache.org/>](img/zeppelin.png)
## CoCalc
* Alternative to Jupyter
![<https://cocalc.org/>](img/cocalc.png){ height=500px }
## Docker-Nvidia
* CUDA for docker
![<https://github.com/NVIDIA/nvidia-docker>](img/dockernvidia.png)
## Jupyter Binder
* Custom Jupyter from git repositories
![<https://mybinder.org/>](img/binder.png){ height=500px }
## Knowledge-Repo
![<http://knowledge-repo.readthedocs.io/>](img/knowledgerepo.png)
# Conclusions
## Lessons learned
* Docker + Docker-compose
* Reproducible environments (partially)
* Reduced tooling / experience
* Ephemeral containers force you to automate/document installation
* Jupyterhub
* Shared environments
* Web interface (zero knowledge)
## What's missing?
* Roles and permissions
* Backups
* Ideas:
* Kubernetes?
* OpenShift?
## Thanks for listening!
<https://github.com/balkian/lab-in-a-box>
<jf.sanchez@upm.es>

@ -0,0 +1,11 @@
.white h1 {
color: black;
-webkit-text-stroke: 3px white;
text-stroke: 3px white
}
img.noborder {
border: none !important;
-webkit-box-shadow: none !important;
-moz-box-shadow: none !important;
box-shadow: none !important;
}
Loading…
Cancel
Save