diff --git a/slides/.gitignore b/slides/.gitignore new file mode 100644 index 0000000..6b5747f --- /dev/null +++ b/slides/.gitignore @@ -0,0 +1 @@ +reveal.js \ No newline at end of file diff --git a/slides/img/binder.png b/slides/img/binder.png new file mode 100644 index 0000000..d0cfdd2 Binary files /dev/null and b/slides/img/binder.png differ diff --git a/slides/img/cocalc.png b/slides/img/cocalc.png new file mode 100644 index 0000000..8c33d25 Binary files /dev/null and b/slides/img/cocalc.png differ diff --git a/slides/img/demogods.jpg b/slides/img/demogods.jpg new file mode 100644 index 0000000..aa25a21 Binary files /dev/null and b/slides/img/demogods.jpg differ diff --git a/slides/img/demotime.jpg b/slides/img/demotime.jpg new file mode 100644 index 0000000..15419e6 Binary files /dev/null and b/slides/img/demotime.jpg differ diff --git a/slides/img/docker-gitlab.jpg b/slides/img/docker-gitlab.jpg new file mode 100644 index 0000000..8648666 Binary files /dev/null and b/slides/img/docker-gitlab.jpg differ diff --git a/slides/img/docker.jpg b/slides/img/docker.jpg new file mode 100644 index 0000000..fba8b91 Binary files /dev/null and b/slides/img/docker.jpg differ diff --git a/slides/img/dockernvidia.png b/slides/img/dockernvidia.png new file mode 100644 index 0000000..f44e80c Binary files /dev/null and b/slides/img/dockernvidia.png differ diff --git a/slides/img/dockerrescue.png b/slides/img/dockerrescue.png new file mode 100644 index 0000000..6e8e4a2 Binary files /dev/null and b/slides/img/dockerrescue.png differ diff --git a/slides/img/dockerstacks.png b/slides/img/dockerstacks.png new file mode 100644 index 0000000..1bbe8f2 Binary files /dev/null and b/slides/img/dockerstacks.png differ diff --git a/slides/img/dockerstacks.svg b/slides/img/dockerstacks.svg new file mode 100644 index 0000000..3dab3ed --- /dev/null +++ b/slides/img/dockerstacks.svg @@ -0,0 +1,60 @@ + + + + + + + + + blockdiag + + + + + + + + + + + + ubuntu@SHA + + base-notebook + + minimal-notebook + + scipy-notebook + + r-notebook + + tensorflow-notebook + + datascience-notebook + + pyspark-notebook + + all-spark-notebook + + + + + + + + + + + + + + + + + + + + + + + diff --git a/slides/img/goodluck.png b/slides/img/goodluck.png new file mode 100644 index 0000000..1140483 Binary files /dev/null and b/slides/img/goodluck.png differ diff --git a/slides/img/group.jpeg b/slides/img/group.jpeg new file mode 100644 index 0000000..f6e3734 Binary files /dev/null and b/slides/img/group.jpeg differ diff --git a/slides/img/gsi.png b/slides/img/gsi.png new file mode 100644 index 0000000..f419885 Binary files /dev/null and b/slides/img/gsi.png differ diff --git a/slides/img/gym.png b/slides/img/gym.png new file mode 100644 index 0000000..bc9ae1f Binary files /dev/null and b/slides/img/gym.png differ diff --git a/slides/img/im-a-researcher.jpg b/slides/img/im-a-researcher.jpg new file mode 100644 index 0000000..dbf2e16 Binary files /dev/null and b/slides/img/im-a-researcher.jpg differ diff --git a/slides/img/intro.jpg b/slides/img/intro.jpg new file mode 100644 index 0000000..268b012 Binary files /dev/null and b/slides/img/intro.jpg differ diff --git a/slides/img/jhub-parts.png b/slides/img/jhub-parts.png new file mode 100644 index 0000000..0082650 Binary files /dev/null and b/slides/img/jhub-parts.png differ diff --git a/slides/img/jupyter-architecture.png b/slides/img/jupyter-architecture.png new file mode 100644 index 0000000..3b01005 Binary files /dev/null and b/slides/img/jupyter-architecture.png differ diff --git a/slides/img/jupyter-screenshot.png b/slides/img/jupyter-screenshot.png new file mode 100644 index 0000000..78ce270 Binary files /dev/null and b/slides/img/jupyter-screenshot.png differ diff --git a/slides/img/knowledgerepo.png b/slides/img/knowledgerepo.png new file mode 100644 index 0000000..588edd0 Binary files /dev/null and b/slides/img/knowledgerepo.png differ diff --git a/slides/img/nextcloud.jpg b/slides/img/nextcloud.jpg new file mode 100644 index 0000000..d12625d Binary files /dev/null and b/slides/img/nextcloud.jpg differ diff --git a/slides/img/noidea-pc.png b/slides/img/noidea-pc.png new file mode 100644 index 0000000..e420449 Binary files /dev/null and b/slides/img/noidea-pc.png differ diff --git a/slides/img/noidea.jpg b/slides/img/noidea.jpg new file mode 100644 index 0000000..5f66dba Binary files /dev/null and b/slides/img/noidea.jpg differ diff --git a/slides/img/party.jpg b/slides/img/party.jpg new file mode 100644 index 0000000..1451a3f Binary files /dev/null and b/slides/img/party.jpg differ diff --git a/slides/img/peerreview.jpg b/slides/img/peerreview.jpg new file mode 100644 index 0000000..793118e Binary files /dev/null and b/slides/img/peerreview.jpg differ diff --git a/slides/img/reproducibility.jpg b/slides/img/reproducibility.jpg new file mode 100644 index 0000000..8c095b2 Binary files /dev/null and b/slides/img/reproducibility.jpg differ diff --git a/slides/img/research.jpg b/slides/img/research.jpg new file mode 100644 index 0000000..84d3043 Binary files /dev/null and b/slides/img/research.jpg differ diff --git a/slides/img/scientificmethod.png b/slides/img/scientificmethod.png new file mode 100644 index 0000000..125ffb9 Binary files /dev/null and b/slides/img/scientificmethod.png differ diff --git a/slides/img/scientificmethod.svg b/slides/img/scientificmethod.svg new file mode 100644 index 0000000..9a73a1e --- /dev/null +++ b/slides/img/scientificmethod.svg @@ -0,0 +1,2731 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/slides/img/sharelatex.jpg b/slides/img/sharelatex.jpg new file mode 100644 index 0000000..4eaf277 Binary files /dev/null and b/slides/img/sharelatex.jpg differ diff --git a/slides/img/will_it_work.png b/slides/img/will_it_work.png new file mode 100644 index 0000000..800dad3 Binary files /dev/null and b/slides/img/will_it_work.png differ diff --git a/slides/img/wisdom_of_the_ancients.png b/slides/img/wisdom_of_the_ancients.png new file mode 100644 index 0000000..64b384b Binary files /dev/null and b/slides/img/wisdom_of_the_ancients.png differ diff --git a/slides/img/zeppelin.png b/slides/img/zeppelin.png new file mode 100644 index 0000000..5995bf2 Binary files /dev/null and b/slides/img/zeppelin.png differ diff --git a/slides/index.html b/slides/index.html new file mode 120000 index 0000000..8790fa4 --- /dev/null +++ b/slides/index.html @@ -0,0 +1 @@ +slides.html \ No newline at end of file diff --git a/slides/run.sh b/slides/run.sh new file mode 100755 index 0000000..9bc358e --- /dev/null +++ b/slides/run.sh @@ -0,0 +1,6 @@ +#!/bin/sh +#pandoc -t beamer slides.md --slide-level=2 -o slides.pdf -V theme:metropolis --toc --toc-depth 1 "$@" +pandoc -t revealjs -s --dpi 120 slides.md --css=style.css --slide-level=2 -o slides.html -V theme=white -V height=800 "$@" +echo "Syncing $PWD" +rsync -Paz "$PWD/" -e 'ssh -p 2222' root@slides.todevnull.com:/data/html/ +echo 'Done' diff --git a/slides/run_watch.sh b/slides/run_watch.sh new file mode 100755 index 0000000..95e38aa --- /dev/null +++ b/slides/run_watch.sh @@ -0,0 +1,3 @@ +#!/bin/sh +echo 'slides.md' | entr ./run.sh + diff --git a/slides/slides.html b/slides/slides.html new file mode 100644 index 0000000..8d3cbad --- /dev/null +++ b/slides/slides.html @@ -0,0 +1,377 @@ + + + + + + + + Docker for research + + + + + + + + + + + + + +
+
+ +
+

Docker for research

+

… and data analysis

+

J. Fernando Sánchez (jf.sanchez@upm)

+

2018

+
+ +

Intro

+

Before we begin

+

Code available at:

+

https://github.com/balkian/lab-in-a-box

+

Live demos at:

+

https://github.todevnull.com

+

https://lab.todevnull.com

+

https://hub.todevnull.com

+

Feel free to log in, but try not to break them for now 😉

+
+

My name is Fernando and…

+

+
+

At Grupo de Sistemas Inteligentes

+
+
+

+
+
    +
  • Machine Learning and Big Data
  • +
  • NLP and Sentiment Analysis
  • +
  • Social Network Analysis
  • +
  • Agents and Simulation
  • +
  • Linked Data and Semantic Technologies
  • +
+
+
+

http://www.gsi.dit.upm.es

+
+

And I ❤ Docker

+
+
+

+
+
    +
  • Docker+research for 3+ years
  • +
  • Advocate for ~2 years
  • +
  • Internal infrastructure: ansible, k8s and docker
  • +
  • Teach (with) it
  • +
+
+
+
+

About this talk

+

Takeaway: you can set up a multi-user data analysis environment with isolation in minutes

+

Plus: using docker to perform and share experiments is even easier

+

Related Meetups:

+

Big Data and Machine Learning with Docker

+

Using Docker in Machine Learning Projects

+
+

For researchers

+

Experiment, publish, repeat

+

+
+

Reproducibility

+
+@ianholmes
@ianholmes
+
+
+

Obstacles

+
+
+
    +
  • Missing data
  • +
  • Bleeding edge tools and libraries
  • +
  • Throwaway software +
      +
    • Hacky
    • +
    • Little to no documentation
    • +
  • +
  • Multiple languages
  • +
+
+
+
+

Obstacles

+

+
+

Is it a problem?

+
+https://www.nature.com/
https://www.nature.com/
+
+
+

Jupyter notebooks

+

+
+

Jupyter architecture

+
+http://jupyter.readthedocs.io
http://jupyter.readthedocs.io
+
+
+

Docker to the rescue

+
+towardsdatascience.com
towardsdatascience.com
+
+
+

Jupyter/docker-stacks

+

+
+

Reproducible environment

+ +
+

And friendly, too

+ + +
+

For small groups

+

Requirements

+
    +
  • Shared environments
  • +
  • Resource sharing
  • +
  • Easy configuration
  • +
  • Versioning
  • +
  • Backups
  • +
+

And little to no overhead

+
+

Isolation

+

+
+

Jupyterhub

+
+
+

Authenticators

+
    +
  • Local
  • +
  • OAuth
  • +
  • LDAP
  • +
  • JWT
  • +
+

Spawners

+
    +
  • Local
  • +
  • Docker
  • +
  • Kubernetes
  • +
  • Marathon
  • +
+
+
+
+

More infrastructure

+

+

+
+

Demo

+

It’s demo time

+

+

https://github.todevnull.com https://github.com/balkian/lab-in-a-box

+
+

Other tools

+

Zeppelin

+
    +
  • Alternative to Jupyter
  • +
+
+https://zeppelin.apache.org/
https://zeppelin.apache.org/
+
+
+

CoCalc

+
    +
  • Alternative to Jupyter
  • +
+
+https://cocalc.org/
https://cocalc.org/
+
+
+

Docker-Nvidia

+
    +
  • CUDA for docker
  • +
+
+https://github.com/NVIDIA/nvidia-docker
https://github.com/NVIDIA/nvidia-docker
+
+
+

Jupyter Binder

+
    +
  • Custom Jupyter from git repositories
  • +
+
+https://mybinder.org/
https://mybinder.org/
+
+
+

Knowledge-Repo

+
+http://knowledge-repo.readthedocs.io/
http://knowledge-repo.readthedocs.io/
+
+
+

Conclusions

+

Lessons learned

+
    +
  • Docker + Docker-compose +
      +
    • Reproducible environments (partially)
    • +
    • Reduced tooling / experience
    • +
    • Ephemeral containers force you to automate/document installation
    • +
  • +
  • Jupyterhub +
      +
    • Shared environments
    • +
    • Web interface (zero knowledge)
    • +
  • +
+
+

What’s missing?

+
    +
  • Roles and permissions
  • +
  • Backups

  • +
  • Ideas: +
      +
    • Kubernetes?
    • +
    • OpenShift?
    • +
  • +
+
+

Thanks for listening!

+

https://github.com/balkian/lab-in-a-box

+

jf.sanchez@upm.es

+
+
+
+ + + + + + + diff --git a/slides/slides.md b/slides/slides.md new file mode 100644 index 0000000..4e00cce --- /dev/null +++ b/slides/slides.md @@ -0,0 +1,306 @@ +--- +title: Docker for research +subtitle: ... and data analysis +author: J. Fernando Sánchez () +tags: [Docker, CI, research] +date: 2018 +abstract: Talk about docker for research and data analysis + +--- + +# Intro { .white data-background="img/intro.jpg"} + +## Before we begin + +Code available at: + + + +Live demos at: + +**** + + + + + + +Feel free to log in, but try not to break them for now 😉 + + +## My name is Fernando and... + + +![](img/im-a-researcher.jpg) + + + +## At Grupo de Sistemas Inteligentes + + +:::::::::::::: {.columns} +::: {.column width="50%"} +![](img/gsi.png) +::: +::: {.column width="50%"} +- Machine Learning and Big Data +- NLP and Sentiment Analysis +- Social Network Analysis +- Agents and Simulation +- Linked Data and Semantic Technologies +::: +::::::::::::::: + + + +## And I ❤ Docker + + +:::::::::::::: {.columns} +::: {.column width="50%"} +![](img/docker.jpg) + +::: +::: {.column width="50%"} +* Docker+research for 3+ years +* Advocate for ~2 years +* Internal infrastructure: ansible, k8s and docker +* Teach (with) it +::: + +:::::::::::::: + +## About this talk + +Takeaway: ***you can set up a multi-user data analysis environment with isolation in minutes*** + +Plus: using docker to perform and share experiments is even easier + +Related Meetups: + +[Big Data and Machine Learning with Docker](https://www.meetup.com/Docker-Madrid/events/240357800/) + +[Using Docker in Machine Learning Projects](https://www.meetup.com/Docker-Madrid/events/237067604/) + +# For researchers {.white data-background="img/research.jpg" style="color:white"} + + + + + + +## Experiment, publish, repeat + + +![](img/peerreview.jpg) + + +## Reproducibility + + +![[\@ianholmes](https://twitter.com/ianholmes/status/288689712636493824)](img/goodluck.png) + +## Obstacles + +:::::::::::::: {.columns} +::: {.column width="50%"} + +* **Missing data** +* Bleeding edge tools and libraries +* Throwaway software + * Hacky + * Little to no documentation +* Multiple languages + +::: +::: {.column width="50%"} +![](img/will_it_work.png){ height=80% } +::: + +:::::::::::::: + + +## Obstacles + +![](img/noidea-pc.png) + +## Is it a problem? + +![[https://www.nature.com/](https://www.nature.com/news/1-500-scientists-lift-the-lid-on-reproducibility-1.19970)](img/reproducibility.jpg){ height=80% } + + +## Jupyter notebooks + +![](img/jupyter-screenshot.png) + +## Jupyter architecture + +![](img/jupyter-architecture.png) + + +## Docker to the rescue + +![[towardsdatascience.com](https://towardsdatascience.com/how-docker-can-help-you-become-a-more-effective-data-scientist-7fc048ef91d5)](img/dockerrescue.png) + + +## Jupyter/docker-stacks + +![](img/dockerstacks.png){ height=50% } + +## Reproducible environment + +```bash +docker run --rm -p 8888:8888 \ + -v $(WDIR)/:/home/jovyan/work/ \ + jupyter/scipy-notebook +``` + +## And friendly, too + + +```yaml +version: '2' +services: + jupyter: + image: jupyter/scipy-notebook + volumes: + - "./.nbconfig:/home/jovyan/.jupyter/nbconfig" + - "./work:/home/jovyan/work/" + ports: + - "8888:8888"" +``` + +```bash +docker-compose up +``` + +## Related projects + +* Using docker images to share trained systems + +![](img/gym.png){ height=500px } + +# For small groups { .white data-background="img/group.jpeg" } + + +## Requirements + +* Shared environments +* Resource sharing +* Easy configuration +* Versioning +* Backups + +And **little to no overhead** + + +## Isolation + +![](img/noidea.jpg) + +## Jupyterhub + + +:::::::::::::: {.columns} +::: {.column width="60%"} + +![](img/jhub-parts.png){ height=500px } + +::: +::: {.column width="40%"} + +#### Authenticators + +* Local +* OAuth +* LDAP +* JWT + +#### Spawners + +* Local +* Docker +* Kubernetes +* Marathon + +::: +::::::::::::::: + +## More infrastructure + +![](img/docker-gitlab.jpg){.noborder height="250px"} +![](img/nextcloud.jpg){.noborder height="250px"} + +![](img/sharelatex.jpg){.noborder height="250px"} + +# Demo { data-background="img/party.jpg"} + +## It's demo time + + +![](img/demogods.jpg){ height=80% } + + + + +# Other tools + +## Zeppelin + +* Alternative to Jupyter + +![](img/zeppelin.png) + +## CoCalc + +* Alternative to Jupyter + +![](img/cocalc.png){ height=500px } + + +## Docker-Nvidia + +* CUDA for docker + +![](img/dockernvidia.png) + +## Jupyter Binder + +* Custom Jupyter from git repositories + +![](img/binder.png){ height=500px } + + +## Knowledge-Repo + +![](img/knowledgerepo.png) + + + +# Conclusions + +## Lessons learned + +* Docker + Docker-compose + * Reproducible environments (partially) + * Reduced tooling / experience + * Ephemeral containers force you to automate/document installation +* Jupyterhub + * Shared environments + * Web interface (zero knowledge) + +## What's missing? + +* Roles and permissions +* Backups + +* Ideas: + * Kubernetes? + * OpenShift? + +## Thanks for listening! + + + + + diff --git a/slides/style.css b/slides/style.css new file mode 100644 index 0000000..398067c --- /dev/null +++ b/slides/style.css @@ -0,0 +1,11 @@ +.white h1 { + color: black; + -webkit-text-stroke: 3px white; + text-stroke: 3px white +} +img.noborder { + border: none !important; + -webkit-box-shadow: none !important; + -moz-box-shadow: none !important; + box-shadow: none !important; +}