mirror of
https://github.com/balkian/lab-in-a-box.git
synced 2024-11-21 06:42:27 +00:00
378 lines
17 KiB
HTML
378 lines
17 KiB
HTML
<!DOCTYPE html>
|
||
<html>
|
||
<head>
|
||
<meta charset="utf-8">
|
||
<meta name="generator" content="pandoc">
|
||
<meta name="author" content="J. Fernando Sánchez (jf.sanchez@upm)">
|
||
<meta name="dcterms.date" content="2018-01-01">
|
||
<title>Docker for research</title>
|
||
<meta name="apple-mobile-web-app-capable" content="yes">
|
||
<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent">
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no, minimal-ui">
|
||
<link rel="stylesheet" href="reveal.js/css/reveal.css">
|
||
<style type="text/css">
|
||
code{white-space: pre-wrap;}
|
||
span.smallcaps{font-variant: small-caps;}
|
||
span.underline{text-decoration: underline;}
|
||
div.column{display: inline-block; vertical-align: top; width: 50%;}
|
||
</style>
|
||
<style type="text/css">
|
||
a.sourceLine { display: inline-block; line-height: 1.25; }
|
||
a.sourceLine { pointer-events: none; color: inherit; text-decoration: inherit; }
|
||
a.sourceLine:empty { height: 1.2em; position: absolute; }
|
||
.sourceCode { overflow: visible; }
|
||
code.sourceCode { white-space: pre; position: relative; }
|
||
div.sourceCode { margin: 1em 0; }
|
||
pre.sourceCode { margin: 0; }
|
||
@media screen {
|
||
div.sourceCode { overflow: auto; }
|
||
}
|
||
@media print {
|
||
code.sourceCode { white-space: pre-wrap; }
|
||
a.sourceLine { text-indent: -1em; padding-left: 1em; }
|
||
}
|
||
pre.numberSource a.sourceLine
|
||
{ position: relative; }
|
||
pre.numberSource a.sourceLine:empty
|
||
{ position: absolute; }
|
||
pre.numberSource a.sourceLine::before
|
||
{ content: attr(data-line-number);
|
||
position: absolute; left: -5em; text-align: right; vertical-align: baseline;
|
||
border: none; pointer-events: all;
|
||
-webkit-touch-callout: none; -webkit-user-select: none;
|
||
-khtml-user-select: none; -moz-user-select: none;
|
||
-ms-user-select: none; user-select: none;
|
||
padding: 0 4px; width: 4em;
|
||
color: #aaaaaa;
|
||
}
|
||
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; }
|
||
div.sourceCode
|
||
{ }
|
||
@media screen {
|
||
a.sourceLine::before { text-decoration: underline; }
|
||
}
|
||
code span.al { color: #ff0000; font-weight: bold; } /* Alert */
|
||
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
|
||
code span.at { color: #7d9029; } /* Attribute */
|
||
code span.bn { color: #40a070; } /* BaseN */
|
||
code span.bu { } /* BuiltIn */
|
||
code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
|
||
code span.ch { color: #4070a0; } /* Char */
|
||
code span.cn { color: #880000; } /* Constant */
|
||
code span.co { color: #60a0b0; font-style: italic; } /* Comment */
|
||
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
|
||
code span.do { color: #ba2121; font-style: italic; } /* Documentation */
|
||
code span.dt { color: #902000; } /* DataType */
|
||
code span.dv { color: #40a070; } /* DecVal */
|
||
code span.er { color: #ff0000; font-weight: bold; } /* Error */
|
||
code span.ex { } /* Extension */
|
||
code span.fl { color: #40a070; } /* Float */
|
||
code span.fu { color: #06287e; } /* Function */
|
||
code span.im { } /* Import */
|
||
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
|
||
code span.kw { color: #007020; font-weight: bold; } /* Keyword */
|
||
code span.op { color: #666666; } /* Operator */
|
||
code span.ot { color: #007020; } /* Other */
|
||
code span.pp { color: #bc7a00; } /* Preprocessor */
|
||
code span.sc { color: #4070a0; } /* SpecialChar */
|
||
code span.ss { color: #bb6688; } /* SpecialString */
|
||
code span.st { color: #4070a0; } /* String */
|
||
code span.va { color: #19177c; } /* Variable */
|
||
code span.vs { color: #4070a0; } /* VerbatimString */
|
||
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
|
||
</style>
|
||
<link rel="stylesheet" href="reveal.js/css/theme/white.css" id="theme">
|
||
<link rel="stylesheet" href="style.css"/>
|
||
<!-- Printing and PDF exports -->
|
||
<script>
|
||
var link = document.createElement( 'link' );
|
||
link.rel = 'stylesheet';
|
||
link.type = 'text/css';
|
||
link.href = window.location.search.match( /print-pdf/gi ) ? 'reveal.js/css/print/pdf.css' : 'reveal.js/css/print/paper.css';
|
||
document.getElementsByTagName( 'head' )[0].appendChild( link );
|
||
</script>
|
||
<!--[if lt IE 9]>
|
||
<script src="reveal.js/lib/js/html5shiv.js"></script>
|
||
<![endif]-->
|
||
</head>
|
||
<body>
|
||
<div class="reveal">
|
||
<div class="slides">
|
||
|
||
<section id="title-slide">
|
||
<h1 class="title">Docker for research</h1>
|
||
<p class="subtitle">… and data analysis</p>
|
||
<p class="author">J. Fernando Sánchez (<a href="mailto:jf.sanchez@upm">jf.sanchez@upm</a>)</p>
|
||
<p class="date">2018</p>
|
||
</section>
|
||
|
||
<section><section id="intro" class="title-slide slide level1 white" data-background="img/intro.jpg"><h1>Intro</h1></section><section id="before-we-begin" class="slide level2">
|
||
<h2>Before we begin</h2>
|
||
<p>Code available at:</p>
|
||
<p><a href="https://github.com/balkian/lab-in-a-box" class="uri">https://github.com/balkian/lab-in-a-box</a></p>
|
||
<p>Live demos at:</p>
|
||
<p><strong><a href="https://github.todevnull.com" class="uri">https://github.todevnull.com</a></strong></p>
|
||
<p><a href="https://lab.todevnull.com" class="uri">https://lab.todevnull.com</a></p>
|
||
<p><a href="https://hub.todevnull.com" class="uri">https://hub.todevnull.com</a></p>
|
||
<p>Feel free to log in, but try not to break them for now 😉</p>
|
||
</section><section id="my-name-is-fernando-and" class="slide level2">
|
||
<h2>My name is Fernando and…</h2>
|
||
<p><img data-src="img/im-a-researcher.jpg" /></p>
|
||
</section><section id="at-grupo-de-sistemas-inteligentes" class="slide level2">
|
||
<h2>At Grupo de Sistemas Inteligentes</h2>
|
||
<div class="columns">
|
||
<div class="column" style="width:50%;">
|
||
<p><img data-src="img/gsi.png" /></p>
|
||
</div><div class="column" style="width:50%;">
|
||
<ul>
|
||
<li>Machine Learning and Big Data</li>
|
||
<li>NLP and Sentiment Analysis</li>
|
||
<li>Social Network Analysis</li>
|
||
<li>Agents and Simulation</li>
|
||
<li>Linked Data and Semantic Technologies</li>
|
||
</ul>
|
||
</div>
|
||
</div>
|
||
<p><a href="http://www.gsi.dit.upm.es" class="uri">http://www.gsi.dit.upm.es</a></p>
|
||
</section><section id="and-i-docker" class="slide level2">
|
||
<h2>And I ❤ Docker</h2>
|
||
<div class="columns">
|
||
<div class="column" style="width:50%;">
|
||
<p><img data-src="img/docker.jpg" /></p>
|
||
</div><div class="column" style="width:50%;">
|
||
<ul>
|
||
<li>Docker+research for 3+ years</li>
|
||
<li>Advocate for ~2 years</li>
|
||
<li>Internal infrastructure: ansible, k8s and docker</li>
|
||
<li>Teach (with) it</li>
|
||
</ul>
|
||
</div>
|
||
</div>
|
||
</section><section id="about-this-talk" class="slide level2">
|
||
<h2>About this talk</h2>
|
||
<p>Takeaway: <strong><em>you can set up a multi-user data analysis environment with isolation in minutes</em></strong></p>
|
||
<p>Plus: using docker to perform and share experiments is even easier</p>
|
||
<p>Related Meetups:</p>
|
||
<p><a href="https://www.meetup.com/Docker-Madrid/events/240357800/">Big Data and Machine Learning with Docker</a></p>
|
||
<p><a href="https://www.meetup.com/Docker-Madrid/events/237067604/">Using Docker in Machine Learning Projects</a></p>
|
||
</section></section>
|
||
<section><section id="for-researchers" class="title-slide slide level1 white" data-background="img/research.jpg" style="color:white"><h1>For researchers</h1></section><section id="experiment-publish-repeat" class="slide level2">
|
||
<h2>Experiment, publish, repeat</h2>
|
||
<p><img data-src="img/peerreview.jpg" /></p>
|
||
</section><section id="reproducibility" class="slide level2">
|
||
<h2>Reproducibility</h2>
|
||
<figure>
|
||
<img data-src="img/goodluck.png" alt="@ianholmes" /><figcaption><a href="https://twitter.com/ianholmes/status/288689712636493824">@ianholmes</a></figcaption>
|
||
</figure>
|
||
</section><section id="obstacles" class="slide level2">
|
||
<h2>Obstacles</h2>
|
||
<div class="columns">
|
||
<div class="column" style="width:50%;">
|
||
<ul>
|
||
<li><strong>Missing data</strong></li>
|
||
<li>Bleeding edge tools and libraries</li>
|
||
<li>Throwaway software
|
||
<ul>
|
||
<li>Hacky</li>
|
||
<li>Little to no documentation</li>
|
||
</ul></li>
|
||
<li>Multiple languages</li>
|
||
</ul>
|
||
</div><div class="column" style="width:50%;">
|
||
<figure>
|
||
<img data-src="img/will_it_work.png" alt="https://xkcd.com/1742/" style="height:80.0%" /><figcaption><a href="https://xkcd.com/1742/" class="uri">https://xkcd.com/1742/</a></figcaption>
|
||
</figure>
|
||
</div>
|
||
</div>
|
||
</section><section id="obstacles-1" class="slide level2">
|
||
<h2>Obstacles</h2>
|
||
<p><img data-src="img/noidea-pc.png" /></p>
|
||
</section><section id="is-it-a-problem" class="slide level2">
|
||
<h2>Is it a problem?</h2>
|
||
<figure>
|
||
<img data-src="img/reproducibility.jpg" alt="https://www.nature.com/" style="height:80.0%" /><figcaption><a href="https://www.nature.com/news/1-500-scientists-lift-the-lid-on-reproducibility-1.19970">https://www.nature.com/</a></figcaption>
|
||
</figure>
|
||
</section><section id="jupyter-notebooks" class="slide level2">
|
||
<h2>Jupyter notebooks</h2>
|
||
<p><img data-src="img/jupyter-screenshot.png" /></p>
|
||
</section><section id="jupyter-architecture" class="slide level2">
|
||
<h2>Jupyter architecture</h2>
|
||
<figure>
|
||
<img data-src="img/jupyter-architecture.png" alt="http://jupyter.readthedocs.io" /><figcaption><a href="http://jupyter.readthedocs.io" class="uri">http://jupyter.readthedocs.io</a></figcaption>
|
||
</figure>
|
||
</section><section id="docker-to-the-rescue" class="slide level2">
|
||
<h2>Docker to the rescue</h2>
|
||
<figure>
|
||
<img data-src="img/dockerrescue.png" alt="towardsdatascience.com" /><figcaption><a href="https://towardsdatascience.com/how-docker-can-help-you-become-a-more-effective-data-scientist-7fc048ef91d5">towardsdatascience.com</a></figcaption>
|
||
</figure>
|
||
</section><section id="jupyterdocker-stacks" class="slide level2">
|
||
<h2>Jupyter/docker-stacks</h2>
|
||
<p><img data-src="img/dockerstacks.png" style="height:50.0%" /></p>
|
||
</section><section id="reproducible-environment" class="slide level2">
|
||
<h2>Reproducible environment</h2>
|
||
<div class="sourceCode" id="cb1"><pre class="sourceCode bash"><code class="sourceCode bash"><a class="sourceLine" id="cb1-1" data-line-number="1"><span class="ex">docker</span> run --rm -p 8888:8888 \</a>
|
||
<a class="sourceLine" id="cb1-2" data-line-number="2"> -v <span class="va">$(</span><span class="ex">WDIR</span><span class="va">)</span>/:/home/jovyan/work/ \</a>
|
||
<a class="sourceLine" id="cb1-3" data-line-number="3"> jupyter/scipy-notebook</a></code></pre></div>
|
||
</section><section id="and-friendly-too" class="slide level2">
|
||
<h2>And friendly, too</h2>
|
||
<div class="sourceCode" id="cb2"><pre class="sourceCode yaml"><code class="sourceCode yaml"><a class="sourceLine" id="cb2-1" data-line-number="1"><span class="fu">version:</span><span class="at"> </span><span class="st">'2'</span></a>
|
||
<a class="sourceLine" id="cb2-2" data-line-number="2"><span class="fu">services:</span></a>
|
||
<a class="sourceLine" id="cb2-3" data-line-number="3"> <span class="fu">jupyter:</span></a>
|
||
<a class="sourceLine" id="cb2-4" data-line-number="4"> <span class="fu">image:</span><span class="at"> jupyter/scipy-notebook</span></a>
|
||
<a class="sourceLine" id="cb2-5" data-line-number="5"> <span class="fu">volumes:</span></a>
|
||
<a class="sourceLine" id="cb2-6" data-line-number="6"> <span class="kw">-</span> <span class="st">"./.nbconfig:/home/jovyan/.jupyter/nbconfig"</span></a>
|
||
<a class="sourceLine" id="cb2-7" data-line-number="7"> <span class="kw">-</span> <span class="st">"./work:/home/jovyan/work/"</span></a>
|
||
<a class="sourceLine" id="cb2-8" data-line-number="8"> <span class="fu">ports:</span></a>
|
||
<a class="sourceLine" id="cb2-9" data-line-number="9"> <span class="kw">-</span> <span class="st">"8888:8888""</span></a></code></pre></div>
|
||
<div class="sourceCode" id="cb3"><pre class="sourceCode bash"><code class="sourceCode bash"><a class="sourceLine" id="cb3-1" data-line-number="1"><span class="ex">docker-compose</span> up</a></code></pre></div>
|
||
</section><section id="related-projects" class="slide level2">
|
||
<h2>Related projects</h2>
|
||
<ul>
|
||
<li>Using docker images to share trained systems</li>
|
||
</ul>
|
||
<figure>
|
||
<img data-src="img/gym.png" alt="https://gym.openai.com" height="500" /><figcaption><a href="https://gym.openai.com" class="uri">https://gym.openai.com</a></figcaption>
|
||
</figure>
|
||
</section></section>
|
||
<section><section id="for-small-groups" class="title-slide slide level1 white" data-background="img/group.jpeg"><h1>For small groups</h1></section><section id="requirements" class="slide level2">
|
||
<h2>Requirements</h2>
|
||
<ul>
|
||
<li>Shared environments</li>
|
||
<li>Resource sharing</li>
|
||
<li>Easy configuration</li>
|
||
<li>Versioning</li>
|
||
<li>Backups</li>
|
||
</ul>
|
||
<p>And <strong>little to no overhead</strong></p>
|
||
</section><section id="isolation" class="slide level2">
|
||
<h2>Isolation</h2>
|
||
<p><img data-src="img/noidea.jpg" /></p>
|
||
</section><section id="jupyterhub" class="slide level2">
|
||
<h2>Jupyterhub</h2>
|
||
<div class="columns">
|
||
<div class="column" style="width:60%;">
|
||
<figure>
|
||
<img data-src="img/jhub-parts.png" alt="http://jupyterhub.readthedocs.io/" height="500" /><figcaption><a href="http://jupyterhub.readthedocs.io/" class="uri">http://jupyterhub.readthedocs.io/</a></figcaption>
|
||
</figure>
|
||
</div><div class="column" style="width:40%;">
|
||
<h4 id="authenticators">Authenticators</h4>
|
||
<ul>
|
||
<li>Local</li>
|
||
<li>OAuth</li>
|
||
<li>LDAP</li>
|
||
<li>JWT</li>
|
||
</ul>
|
||
<h4 id="spawners">Spawners</h4>
|
||
<ul>
|
||
<li>Local</li>
|
||
<li>Docker</li>
|
||
<li>Kubernetes</li>
|
||
<li>Marathon</li>
|
||
</ul>
|
||
</div>
|
||
</div>
|
||
</section><section id="more-infrastructure" class="slide level2">
|
||
<h2>More infrastructure</h2>
|
||
<p><img data-src="img/docker-gitlab.jpg" class="noborder" height="250" /> <img data-src="img/nextcloud.jpg" class="noborder" height="250" /></p>
|
||
<p><img data-src="img/sharelatex.jpg" class="noborder" height="250" /></p>
|
||
</section></section>
|
||
<section><section id="demo" class="title-slide slide level1" data-background="img/party.jpg"><h1>Demo</h1></section><section id="its-demo-time" class="slide level2">
|
||
<h2>It’s demo time</h2>
|
||
<p><img data-src="img/demogods.jpg" style="height:80.0%" /></p>
|
||
<p><a href="https://github.todevnull.com" class="uri">https://github.todevnull.com</a> <a href="https://github.com/balkian/lab-in-a-box" class="uri">https://github.com/balkian/lab-in-a-box</a></p>
|
||
</section></section>
|
||
<section><section id="other-tools" class="title-slide slide level1"><h1>Other tools</h1></section><section id="zeppelin" class="slide level2">
|
||
<h2>Zeppelin</h2>
|
||
<ul>
|
||
<li>Alternative to Jupyter</li>
|
||
</ul>
|
||
<figure>
|
||
<img data-src="img/zeppelin.png" alt="https://zeppelin.apache.org/" /><figcaption><a href="https://zeppelin.apache.org/" class="uri">https://zeppelin.apache.org/</a></figcaption>
|
||
</figure>
|
||
</section><section id="cocalc" class="slide level2">
|
||
<h2>CoCalc</h2>
|
||
<ul>
|
||
<li>Alternative to Jupyter</li>
|
||
</ul>
|
||
<figure>
|
||
<img data-src="img/cocalc.png" alt="https://cocalc.org/" height="500" /><figcaption><a href="https://cocalc.org/" class="uri">https://cocalc.org/</a></figcaption>
|
||
</figure>
|
||
</section><section id="docker-nvidia" class="slide level2">
|
||
<h2>Docker-Nvidia</h2>
|
||
<ul>
|
||
<li>CUDA for docker</li>
|
||
</ul>
|
||
<figure>
|
||
<img data-src="img/dockernvidia.png" alt="https://github.com/NVIDIA/nvidia-docker" /><figcaption><a href="https://github.com/NVIDIA/nvidia-docker" class="uri">https://github.com/NVIDIA/nvidia-docker</a></figcaption>
|
||
</figure>
|
||
</section><section id="jupyter-binder" class="slide level2">
|
||
<h2>Jupyter Binder</h2>
|
||
<ul>
|
||
<li>Custom Jupyter from git repositories</li>
|
||
</ul>
|
||
<figure>
|
||
<img data-src="img/binder.png" alt="https://mybinder.org/" height="500" /><figcaption><a href="https://mybinder.org/" class="uri">https://mybinder.org/</a></figcaption>
|
||
</figure>
|
||
</section><section id="knowledge-repo" class="slide level2">
|
||
<h2>Knowledge-Repo</h2>
|
||
<figure>
|
||
<img data-src="img/knowledgerepo.png" alt="http://knowledge-repo.readthedocs.io/" /><figcaption><a href="http://knowledge-repo.readthedocs.io/" class="uri">http://knowledge-repo.readthedocs.io/</a></figcaption>
|
||
</figure>
|
||
</section></section>
|
||
<section><section id="conclusions" class="title-slide slide level1"><h1>Conclusions</h1></section><section id="lessons-learned" class="slide level2">
|
||
<h2>Lessons learned</h2>
|
||
<ul>
|
||
<li>Docker + Docker-compose
|
||
<ul>
|
||
<li>Reproducible environments (partially)</li>
|
||
<li>Reduced tooling / experience</li>
|
||
<li>Ephemeral containers force you to automate/document installation</li>
|
||
</ul></li>
|
||
<li>Jupyterhub
|
||
<ul>
|
||
<li>Shared environments</li>
|
||
<li>Web interface (zero knowledge)</li>
|
||
</ul></li>
|
||
</ul>
|
||
</section><section id="whats-missing" class="slide level2">
|
||
<h2>What’s missing?</h2>
|
||
<ul>
|
||
<li>Roles and permissions</li>
|
||
<li><p>Backups</p></li>
|
||
<li>Ideas:
|
||
<ul>
|
||
<li>Kubernetes?</li>
|
||
<li>OpenShift?</li>
|
||
</ul></li>
|
||
</ul>
|
||
</section><section id="thanks-for-listening" class="slide level2">
|
||
<h2>Thanks for listening!</h2>
|
||
<p><a href="https://github.com/balkian/lab-in-a-box" class="uri">https://github.com/balkian/lab-in-a-box</a></p>
|
||
<p><a href="mailto:jf.sanchez@upm.es">jf.sanchez@upm.es</a></p>
|
||
</section></section>
|
||
</div>
|
||
</div>
|
||
|
||
<script src="reveal.js/lib/js/head.min.js"></script>
|
||
<script src="reveal.js/js/reveal.js"></script>
|
||
|
||
<script>
|
||
|
||
// Full list of configuration options available at:
|
||
// https://github.com/hakimel/reveal.js#configuration
|
||
Reveal.initialize({
|
||
// Push each slide change to the browser history
|
||
history: true,
|
||
height: 800,
|
||
|
||
// Optional reveal.js plugins
|
||
dependencies: [
|
||
{ src: 'reveal.js/lib/js/classList.js', condition: function() { return !document.body.classList; } },
|
||
{ src: 'reveal.js/plugin/zoom-js/zoom.js', async: true },
|
||
{ src: 'reveal.js/plugin/notes/notes.js', async: true }
|
||
]
|
||
});
|
||
</script>
|
||
</body>
|
||
</html>
|