Add slides
@ -0,0 +1 @@
|
||||
reveal.js
|
After Width: | Height: | Size: 85 KiB |
After Width: | Height: | Size: 52 KiB |
After Width: | Height: | Size: 122 KiB |
After Width: | Height: | Size: 32 KiB |
After Width: | Height: | Size: 41 KiB |
After Width: | Height: | Size: 39 KiB |
After Width: | Height: | Size: 47 KiB |
After Width: | Height: | Size: 178 KiB |
After Width: | Height: | Size: 37 KiB |
@ -0,0 +1,60 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN" "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" xmlns:inkspace="http://www.inkscape.org/namespaces/inkscape" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 640 520">
|
||||
<defs id="defs_block">
|
||||
<filter height="1.504" id="filter_blur" width="1.1575" x="-0.07875" y="-0.252">
|
||||
<feGaussianBlur id="feGaussianBlur3780" stdDeviation="4.2"/>
|
||||
</filter>
|
||||
</defs>
|
||||
<title>blockdiag</title>
|
||||
<desc/>
|
||||
<rect fill="rgb(0,0,0)" height="40" stroke="rgb(0,0,0)" style="filter:url(#filter_blur);opacity:0.7;fill-opacity:1" width="128" x="67" y="46"/>
|
||||
<rect fill="rgb(0,0,0)" height="40" stroke="rgb(0,0,0)" style="filter:url(#filter_blur);opacity:0.7;fill-opacity:1" width="128" x="67" y="126"/>
|
||||
<rect fill="rgb(0,0,0)" height="40" stroke="rgb(0,0,0)" style="filter:url(#filter_blur);opacity:0.7;fill-opacity:1" width="128" x="67" y="206"/>
|
||||
<rect fill="rgb(0,0,0)" height="40" stroke="rgb(0,0,0)" style="filter:url(#filter_blur);opacity:0.7;fill-opacity:1" width="128" x="67" y="286"/>
|
||||
<rect fill="rgb(0,0,0)" height="40" stroke="rgb(0,0,0)" style="filter:url(#filter_blur);opacity:0.7;fill-opacity:1" width="128" x="259" y="286"/>
|
||||
<rect fill="rgb(0,0,0)" height="40" stroke="rgb(0,0,0)" style="filter:url(#filter_blur);opacity:0.7;fill-opacity:1" width="128" x="67" y="366"/>
|
||||
<rect fill="rgb(0,0,0)" height="40" stroke="rgb(0,0,0)" style="filter:url(#filter_blur);opacity:0.7;fill-opacity:1" width="128" x="259" y="366"/>
|
||||
<rect fill="rgb(0,0,0)" height="40" stroke="rgb(0,0,0)" style="filter:url(#filter_blur);opacity:0.7;fill-opacity:1" width="128" x="451" y="366"/>
|
||||
<rect fill="rgb(0,0,0)" height="40" stroke="rgb(0,0,0)" style="filter:url(#filter_blur);opacity:0.7;fill-opacity:1" width="128" x="451" y="446"/>
|
||||
<rect fill="rgb(255,255,255)" height="40" stroke="rgb(0,0,0)" width="128" x="64" y="40"/>
|
||||
<text fill="rgb(0,0,0)" font-family="sans-serif" font-size="8" font-style="normal" font-weight="normal" text-anchor="middle" textLength="44" x="128" y="64">ubuntu@SHA</text>
|
||||
<rect fill="rgb(255,255,255)" height="40" stroke="rgb(0,0,0)" width="128" x="64" y="120"/>
|
||||
<text fill="rgb(0,0,0)" font-family="sans-serif" font-size="8" font-style="normal" font-weight="normal" text-anchor="middle" textLength="58" x="128" y="144">base-notebook</text>
|
||||
<rect fill="rgb(255,255,255)" height="40" stroke="rgb(0,0,0)" width="128" x="64" y="200"/>
|
||||
<text fill="rgb(0,0,0)" font-family="sans-serif" font-size="8" font-style="normal" font-weight="normal" text-anchor="middle" textLength="71" x="128" y="224">minimal-notebook</text>
|
||||
<rect fill="rgb(255,255,255)" height="40" stroke="rgb(0,0,0)" width="128" x="64" y="280"/>
|
||||
<text fill="rgb(0,0,0)" font-family="sans-serif" font-size="8" font-style="normal" font-weight="normal" text-anchor="middle" textLength="62" x="128" y="304">scipy-notebook</text>
|
||||
<rect fill="rgb(255,255,255)" height="40" stroke="rgb(0,0,0)" width="128" x="256" y="280"/>
|
||||
<text fill="rgb(0,0,0)" font-family="sans-serif" font-size="8" font-style="normal" font-weight="normal" text-anchor="middle" textLength="44" x="320" y="304">r-notebook</text>
|
||||
<rect fill="rgb(255,255,255)" height="40" stroke="rgb(0,0,0)" width="128" x="64" y="360"/>
|
||||
<text fill="rgb(0,0,0)" font-family="sans-serif" font-size="8" font-style="normal" font-weight="normal" text-anchor="middle" textLength="84" x="128" y="384">tensorflow-notebook</text>
|
||||
<rect fill="rgb(255,255,255)" height="40" stroke="rgb(0,0,0)" width="128" x="256" y="360"/>
|
||||
<text fill="rgb(0,0,0)" font-family="sans-serif" font-size="8" font-style="normal" font-weight="normal" text-anchor="middle" textLength="88" x="320" y="384">datascience-notebook</text>
|
||||
<rect fill="rgb(255,255,255)" height="40" stroke="rgb(0,0,0)" width="128" x="448" y="360"/>
|
||||
<text fill="rgb(0,0,0)" font-family="sans-serif" font-size="8" font-style="normal" font-weight="normal" text-anchor="middle" textLength="71" x="512" y="384">pyspark-notebook</text>
|
||||
<rect fill="rgb(255,255,255)" height="40" stroke="rgb(0,0,0)" width="128" x="448" y="440"/>
|
||||
<text fill="rgb(0,0,0)" font-family="sans-serif" font-size="8" font-style="normal" font-weight="normal" text-anchor="middle" textLength="80" x="512" y="464">all-spark-notebook</text>
|
||||
<path d="M 128 80 L 128 112" fill="none" stroke="rgb(0,0,0)"/>
|
||||
<polygon fill="rgb(0,0,0)" points="128,119 124,112 132,112 128,119" stroke="rgb(0,0,0)"/>
|
||||
<path d="M 128 160 L 128 192" fill="none" stroke="rgb(0,0,0)"/>
|
||||
<polygon fill="rgb(0,0,0)" points="128,199 124,192 132,192 128,199" stroke="rgb(0,0,0)"/>
|
||||
<path d="M 128 240 L 128 260" fill="none" stroke="rgb(0,0,0)"/>
|
||||
<path d="M 128 260 L 320 260" fill="none" stroke="rgb(0,0,0)"/>
|
||||
<path d="M 320 260 L 320 272" fill="none" stroke="rgb(0,0,0)"/>
|
||||
<polygon fill="rgb(0,0,0)" points="320,279 316,272 324,272 320,279" stroke="rgb(0,0,0)"/>
|
||||
<path d="M 128 240 L 128 272" fill="none" stroke="rgb(0,0,0)"/>
|
||||
<polygon fill="rgb(0,0,0)" points="128,279 124,272 132,272 128,279" stroke="rgb(0,0,0)"/>
|
||||
<path d="M 128 320 L 128 352" fill="none" stroke="rgb(0,0,0)"/>
|
||||
<polygon fill="rgb(0,0,0)" points="128,359 124,352 132,352 128,359" stroke="rgb(0,0,0)"/>
|
||||
<path d="M 128 320 L 128 340" fill="none" stroke="rgb(0,0,0)"/>
|
||||
<path d="M 128 340 L 320 340" fill="none" stroke="rgb(0,0,0)"/>
|
||||
<path d="M 320 340 L 320 352" fill="none" stroke="rgb(0,0,0)"/>
|
||||
<polygon fill="rgb(0,0,0)" points="320,359 316,352 324,352 320,359" stroke="rgb(0,0,0)"/>
|
||||
<path d="M 128 320 L 128 340" fill="none" stroke="rgb(0,0,0)"/>
|
||||
<path d="M 128 340 L 512 340" fill="none" stroke="rgb(0,0,0)"/>
|
||||
<path d="M 512 340 L 512 352" fill="none" stroke="rgb(0,0,0)"/>
|
||||
<polygon fill="rgb(0,0,0)" points="512,359 508,352 516,352 512,359" stroke="rgb(0,0,0)"/>
|
||||
<path d="M 512 400 L 512 432" fill="none" stroke="rgb(0,0,0)"/>
|
||||
<polygon fill="rgb(0,0,0)" points="512,439 508,432 516,432 512,439" stroke="rgb(0,0,0)"/>
|
||||
</svg>
|
After Width: | Height: | Size: 5.9 KiB |
After Width: | Height: | Size: 58 KiB |
After Width: | Height: | Size: 58 KiB |
After Width: | Height: | Size: 332 KiB |
After Width: | Height: | Size: 176 KiB |
After Width: | Height: | Size: 109 KiB |
After Width: | Height: | Size: 119 KiB |
After Width: | Height: | Size: 80 KiB |
After Width: | Height: | Size: 30 KiB |
After Width: | Height: | Size: 141 KiB |
After Width: | Height: | Size: 90 KiB |
After Width: | Height: | Size: 13 KiB |
After Width: | Height: | Size: 314 KiB |
After Width: | Height: | Size: 121 KiB |
After Width: | Height: | Size: 222 KiB |
After Width: | Height: | Size: 50 KiB |
After Width: | Height: | Size: 171 KiB |
After Width: | Height: | Size: 109 KiB |
After Width: | Height: | Size: 36 KiB |
After Width: | Height: | Size: 560 KiB |
After Width: | Height: | Size: 41 KiB |
After Width: | Height: | Size: 22 KiB |
After Width: | Height: | Size: 27 KiB |
After Width: | Height: | Size: 190 KiB |
@ -0,0 +1 @@
|
||||
slides.html
|
@ -0,0 +1,6 @@
|
||||
#!/bin/sh
|
||||
#pandoc -t beamer slides.md --slide-level=2 -o slides.pdf -V theme:metropolis --toc --toc-depth 1 "$@"
|
||||
pandoc -t revealjs -s --dpi 120 slides.md --css=style.css --slide-level=2 -o slides.html -V theme=white -V height=800 "$@"
|
||||
echo "Syncing $PWD"
|
||||
rsync -Paz "$PWD/" -e 'ssh -p 2222' root@slides.todevnull.com:/data/html/
|
||||
echo 'Done'
|
@ -0,0 +1,3 @@
|
||||
#!/bin/sh
|
||||
echo 'slides.md' | entr ./run.sh
|
||||
|
@ -0,0 +1,306 @@
|
||||
---
|
||||
title: Docker for research
|
||||
subtitle: ... and data analysis
|
||||
author: J. Fernando Sánchez (<jf.sanchez@upm>)
|
||||
tags: [Docker, CI, research]
|
||||
date: 2018
|
||||
abstract: Talk about docker for research and data analysis
|
||||
|
||||
---
|
||||
|
||||
# Intro { .white data-background="img/intro.jpg"}
|
||||
|
||||
## Before we begin
|
||||
|
||||
Code available at:
|
||||
|
||||
<https://github.com/balkian/lab-in-a-box>
|
||||
|
||||
Live demos at:
|
||||
|
||||
**<https://github.todevnull.com>**
|
||||
|
||||
<https://lab.todevnull.com>
|
||||
|
||||
<https://hub.todevnull.com>
|
||||
|
||||
|
||||
Feel free to log in, but try not to break them for now 😉
|
||||
|
||||
|
||||
## My name is Fernando and...
|
||||
|
||||
|
||||
![](img/im-a-researcher.jpg)
|
||||
|
||||
|
||||
|
||||
## At Grupo de Sistemas Inteligentes
|
||||
|
||||
|
||||
:::::::::::::: {.columns}
|
||||
::: {.column width="50%"}
|
||||
![](img/gsi.png)
|
||||
:::
|
||||
::: {.column width="50%"}
|
||||
- Machine Learning and Big Data
|
||||
- NLP and Sentiment Analysis
|
||||
- Social Network Analysis
|
||||
- Agents and Simulation
|
||||
- Linked Data and Semantic Technologies
|
||||
:::
|
||||
:::::::::::::::
|
||||
|
||||
<http://www.gsi.dit.upm.es>
|
||||
|
||||
## And I ❤ Docker
|
||||
|
||||
|
||||
:::::::::::::: {.columns}
|
||||
::: {.column width="50%"}
|
||||
![](img/docker.jpg)
|
||||
|
||||
:::
|
||||
::: {.column width="50%"}
|
||||
* Docker+research for 3+ years
|
||||
* Advocate for ~2 years
|
||||
* Internal infrastructure: ansible, k8s and docker
|
||||
* Teach (with) it
|
||||
:::
|
||||
|
||||
::::::::::::::
|
||||
|
||||
## About this talk
|
||||
|
||||
Takeaway: ***you can set up a multi-user data analysis environment with isolation in minutes***
|
||||
|
||||
Plus: using docker to perform and share experiments is even easier
|
||||
|
||||
Related Meetups:
|
||||
|
||||
[Big Data and Machine Learning with Docker](https://www.meetup.com/Docker-Madrid/events/240357800/)
|
||||
|
||||
[Using Docker in Machine Learning Projects](https://www.meetup.com/Docker-Madrid/events/237067604/)
|
||||
|
||||
# For researchers {.white data-background="img/research.jpg" style="color:white"}
|
||||
|
||||
<!-- ## Research is about data -->
|
||||
|
||||
<!-- ![The scientific method](img/scientificmethod.png){.noborder height="500px"} -->
|
||||
|
||||
|
||||
## Experiment, publish, repeat
|
||||
|
||||
|
||||
![](img/peerreview.jpg)
|
||||
|
||||
|
||||
## Reproducibility
|
||||
|
||||
|
||||
![[\@ianholmes](https://twitter.com/ianholmes/status/288689712636493824)](img/goodluck.png)
|
||||
|
||||
## Obstacles
|
||||
|
||||
:::::::::::::: {.columns}
|
||||
::: {.column width="50%"}
|
||||
|
||||
* **Missing data**
|
||||
* Bleeding edge tools and libraries
|
||||
* Throwaway software
|
||||
* Hacky
|
||||
* Little to no documentation
|
||||
* Multiple languages
|
||||
|
||||
:::
|
||||
::: {.column width="50%"}
|
||||
![<https://xkcd.com/1742/>](img/will_it_work.png){ height=80% }
|
||||
:::
|
||||
|
||||
::::::::::::::
|
||||
|
||||
|
||||
## Obstacles
|
||||
|
||||
![](img/noidea-pc.png)
|
||||
|
||||
## Is it a problem?
|
||||
|
||||
![[https://www.nature.com/](https://www.nature.com/news/1-500-scientists-lift-the-lid-on-reproducibility-1.19970)](img/reproducibility.jpg){ height=80% }
|
||||
|
||||
|
||||
## Jupyter notebooks
|
||||
|
||||
![](img/jupyter-screenshot.png)
|
||||
|
||||
## Jupyter architecture
|
||||
|
||||
![<http://jupyter.readthedocs.io>](img/jupyter-architecture.png)
|
||||
|
||||
|
||||
## Docker to the rescue
|
||||
|
||||
![[towardsdatascience.com](https://towardsdatascience.com/how-docker-can-help-you-become-a-more-effective-data-scientist-7fc048ef91d5)](img/dockerrescue.png)
|
||||
|
||||
|
||||
## Jupyter/docker-stacks
|
||||
|
||||
![](img/dockerstacks.png){ height=50% }
|
||||
|
||||
## Reproducible environment
|
||||
|
||||
```bash
|
||||
docker run --rm -p 8888:8888 \
|
||||
-v $(WDIR)/:/home/jovyan/work/ \
|
||||
jupyter/scipy-notebook
|
||||
```
|
||||
|
||||
## And friendly, too
|
||||
|
||||
|
||||
```yaml
|
||||
version: '2'
|
||||
services:
|
||||
jupyter:
|
||||
image: jupyter/scipy-notebook
|
||||
volumes:
|
||||
- "./.nbconfig:/home/jovyan/.jupyter/nbconfig"
|
||||
- "./work:/home/jovyan/work/"
|
||||
ports:
|
||||
- "8888:8888""
|
||||
```
|
||||
|
||||
```bash
|
||||
docker-compose up
|
||||
```
|
||||
|
||||
## Related projects
|
||||
|
||||
* Using docker images to share trained systems
|
||||
|
||||
![<https://gym.openai.com>](img/gym.png){ height=500px }
|
||||
|
||||
# For small groups { .white data-background="img/group.jpeg" }
|
||||
|
||||
|
||||
## Requirements
|
||||
|
||||
* Shared environments
|
||||
* Resource sharing
|
||||
* Easy configuration
|
||||
* Versioning
|
||||
* Backups
|
||||
|
||||
And **little to no overhead**
|
||||
|
||||
|
||||
## Isolation
|
||||
|
||||
![](img/noidea.jpg)
|
||||
|
||||
## Jupyterhub
|
||||
|
||||
|
||||
:::::::::::::: {.columns}
|
||||
::: {.column width="60%"}
|
||||
|
||||
![<http://jupyterhub.readthedocs.io/>](img/jhub-parts.png){ height=500px }
|
||||
|
||||
:::
|
||||
::: {.column width="40%"}
|
||||
|
||||
#### Authenticators
|
||||
|
||||
* Local
|
||||
* OAuth
|
||||
* LDAP
|
||||
* JWT
|
||||
|
||||
#### Spawners
|
||||
|
||||
* Local
|
||||
* Docker
|
||||
* Kubernetes
|
||||
* Marathon
|
||||
|
||||
:::
|
||||
:::::::::::::::
|
||||
|
||||
## More infrastructure
|
||||
|
||||
![](img/docker-gitlab.jpg){.noborder height="250px"}
|
||||
![](img/nextcloud.jpg){.noborder height="250px"}
|
||||
|
||||
![](img/sharelatex.jpg){.noborder height="250px"}
|
||||
|
||||
# Demo { data-background="img/party.jpg"}
|
||||
|
||||
## It's demo time
|
||||
|
||||
|
||||
![](img/demogods.jpg){ height=80% }
|
||||
|
||||
<https://github.todevnull.com>
|
||||
<https://github.com/balkian/lab-in-a-box>
|
||||
|
||||
# Other tools
|
||||
|
||||
## Zeppelin
|
||||
|
||||
* Alternative to Jupyter
|
||||
|
||||
![<https://zeppelin.apache.org/>](img/zeppelin.png)
|
||||
|
||||
## CoCalc
|
||||
|
||||
* Alternative to Jupyter
|
||||
|
||||
![<https://cocalc.org/>](img/cocalc.png){ height=500px }
|
||||
|
||||
|
||||
## Docker-Nvidia
|
||||
|
||||
* CUDA for docker
|
||||
|
||||
![<https://github.com/NVIDIA/nvidia-docker>](img/dockernvidia.png)
|
||||
|
||||
## Jupyter Binder
|
||||
|
||||
* Custom Jupyter from git repositories
|
||||
|
||||
![<https://mybinder.org/>](img/binder.png){ height=500px }
|
||||
|
||||
|
||||
## Knowledge-Repo
|
||||
|
||||
![<http://knowledge-repo.readthedocs.io/>](img/knowledgerepo.png)
|
||||
|
||||
|
||||
|
||||
# Conclusions
|
||||
|
||||
## Lessons learned
|
||||
|
||||
* Docker + Docker-compose
|
||||
* Reproducible environments (partially)
|
||||
* Reduced tooling / experience
|
||||
* Ephemeral containers force you to automate/document installation
|
||||
* Jupyterhub
|
||||
* Shared environments
|
||||
* Web interface (zero knowledge)
|
||||
|
||||
## What's missing?
|
||||
|
||||
* Roles and permissions
|
||||
* Backups
|
||||
|
||||
* Ideas:
|
||||
* Kubernetes?
|
||||
* OpenShift?
|
||||
|
||||
## Thanks for listening!
|
||||
|
||||
<https://github.com/balkian/lab-in-a-box>
|
||||
|
||||
<jf.sanchez@upm.es>
|
||||
|
@ -0,0 +1,11 @@
|
||||
.white h1 {
|
||||
color: black;
|
||||
-webkit-text-stroke: 3px white;
|
||||
text-stroke: 3px white
|
||||
}
|
||||
img.noborder {
|
||||
border: none !important;
|
||||
-webkit-box-shadow: none !important;
|
||||
-moz-box-shadow: none !important;
|
||||
box-shadow: none !important;
|
||||
}
|