Back to Blog

Running Jupyter Notebooks in the Cloud

TutorialsAmin Al Ali Al Darwish9 min read

Running Jupyter Notebooks in the Cloud

Jupyter notebooks are the standard for data science workflows. But running them in production—scheduled jobs, API endpoints, automated pipelines—requires executing them without the interactive interface.

HopX sandboxes let you run notebooks programmatically, in isolated environments, with full access to results.

Why Run Notebooks in Sandboxes?

Local notebook execution problems:

  • Notebooks can execute arbitrary code
  • User notebooks might contain malicious code
  • Resource consumption is unpredictable
  • Environment conflicts between notebooks

Sandbox advantages:

  • Complete isolation from your infrastructure
  • Consistent, reproducible environments
  • Resource limits prevent abuse
  • Parallel execution at scale

Basic Notebook Execution

Using nbconvert

The simplest way to execute a notebook:

python
1
from hopx import Sandbox
2
import json
3
 
4
sandbox = Sandbox.create(template="code-interpreter")
5
 
6
# Create a sample notebook
7
notebook = {
8
    "cells": [
9
        {
10
            "cell_type": "code",
11
            "execution_count": None,
12
            "metadata": {},
13
            "outputs": [],
14
            "source": ["import pandas as pd\n", "import numpy as np\n", "print('Libraries loaded')"]
15
        },
16
        {
17
            "cell_type": "code",
18
            "execution_count": None,
19
            "metadata": {},
20
            "outputs": [],
21
            "source": ["df = pd.DataFrame({'a': [1,2,3], 'b': [4,5,6]})\n", "print(df)"]
22
        },
23
        {
24
            "cell_type": "code",
25
            "execution_count": None,
26
            "metadata": {},
27
            "outputs": [],
28
            "source": ["result = df['a'].sum()\n", "print(f'Sum: {result}')"]
29
        }
30
    ],
31
    "metadata": {
32
        "kernelspec": {
33
            "display_name": "Python 3",
34
            "language": "python",
35
            "name": "python3"
36
        }
37
    },
38
    "nbformat": 4,
39
    "nbformat_minor": 4
40
}
41
 
42
# Upload notebook
43
sandbox.files.write("/app/analysis.ipynb", json.dumps(notebook))
44
 
45
# Execute with nbconvert
46
result = sandbox.commands.run("""
47
cd /app && jupyter nbconvert --to notebook --execute analysis.ipynb --output executed.ipynb
48
""")
49
 
50
print("Execution status:", "Success" if result.exit_code == 0 else "Failed")
51
print(result.stdout)
52
 

Extracting Results

After execution, read the output notebook:

python
1
# Download executed notebook
2
executed = sandbox.files.read("/app/executed.ipynb")
3
executed_nb = json.loads(executed)
4
 
5
# Extract outputs from each cell
6
for i, cell in enumerate(executed_nb["cells"]):
7
    if cell["cell_type"] == "code":
8
        print(f"\n--- Cell {i} ---")
9
        print("Source:", "".join(cell["source"][:50]))
10
        
11
        for output in cell.get("outputs", []):
12
            if output["output_type"] == "stream":
13
                print("Output:", "".join(output["text"]))
14
            elif output["output_type"] == "execute_result":
15
                print("Result:", output["data"].get("text/plain", ""))
16
 

Papermill for Parameterized Notebooks

Papermill is the industry standard for parameterized notebook execution.

Basic Papermill Usage

python
1
from hopx import Sandbox
2
import json
3
 
4
sandbox = Sandbox.create(template="code-interpreter")
5
 
6
# Install papermill
7
sandbox.commands.run("pip install papermill -q")
8
 
9
# Create parameterized notebook
10
notebook = {
11
    "cells": [
12
        {
13
            "cell_type": "code",
14
            "metadata": {"tags": ["parameters"]},  # Parameters cell
15
            "source": ["# Parameters\n", "dataset_path = '/data/default.csv'\n", "threshold = 0.5"]
16
        },
17
        {
18
            "cell_type": "code",
19
            "metadata": {},
20
            "source": [
21
                "import pandas as pd\n",
22
                "print(f'Loading: {dataset_path}')\n",
23
                "print(f'Threshold: {threshold}')"
24
            ]
25
        }
26
    ],
27
    "metadata": {"kernelspec": {"name": "python3"}},
28
    "nbformat": 4,
29
    "nbformat_minor": 4
30
}
31
 
32
sandbox.files.write("/app/template.ipynb", json.dumps(notebook))
33
 
34
# Execute with parameters
35
result = sandbox.commands.run("""
36
papermill /app/template.ipynb /app/output.ipynb \
37
    -p dataset_path "/data/sales_2024.csv" \
38
    -p threshold 0.75
39
""")
40
 
41
print(result.stdout)
42
 

Injecting Complex Parameters

python
1
# Parameters can be Python objects
2
params = {
3
    "config": {
4
        "model": "random_forest",
5
        "features": ["age", "income", "score"],
6
        "hyperparams": {"n_estimators": 100, "max_depth": 10}
7
    },
8
    "date_range": ["2024-01-01", "2024-12-31"]
9
}
10
 
11
# Write params as JSON
12
sandbox.files.write("/app/params.json", json.dumps(params))
13
 
14
# Notebook loads params from file
15
notebook_code = '''
16
import json
17
 
18
with open('/app/params.json') as f:
19
    params = json.load(f)
20
 
21
config = params['config']
22
date_range = params['date_range']
23
 
24
print(f"Model: {config['model']}")
25
print(f"Features: {config['features']}")
26
'''
27
 

Building a Notebook Execution API

FastAPI Notebook Runner

python
1
from fastapi import FastAPI, UploadFile, HTTPException
2
from pydantic import BaseModel
3
from hopx import Sandbox
4
import json
5
import base64
6
 
7
app = FastAPI()
8
 
9
class NotebookRequest(BaseModel):
10
    notebook: str  # Base64 encoded notebook
11
    parameters: dict = {}
12
    timeout: int = 300
13
 
14
class NotebookResponse(BaseModel):
15
    success: bool
16
    executed_notebook: str  # Base64 encoded
17
    outputs: list
18
    error: str = None
19
 
20
@app.post("/execute", response_model=NotebookResponse)
21
async def execute_notebook(request: NotebookRequest):
22
    sandbox = None
23
    
24
    try:
25
        sandbox = Sandbox.create(template="code-interpreter")
26
        
27
        # Decode and upload notebook
28
        notebook_content = base64.b64decode(request.notebook)
29
        sandbox.files.write("/app/input.ipynb", notebook_content)
30
        
31
        # Write parameters
32
        sandbox.files.write("/app/params.json", json.dumps(request.parameters))
33
        
34
        # Execute
35
        result = sandbox.commands.run(
36
            "papermill /app/input.ipynb /app/output.ipynb -f /app/params.json",
37
            timeout=request.timeout
38
        )
39
        
40
        if result.exit_code != 0:
41
            return NotebookResponse(
42
                success=False,
43
                executed_notebook="",
44
                outputs=[],
45
                error=result.stderr
46
            )
47
        
48
        # Read executed notebook
49
        executed = sandbox.files.read("/app/output.ipynb")
50
        executed_nb = json.loads(executed)
51
        
52
        # Extract outputs
53
        outputs = []
54
        for cell in executed_nb["cells"]:
55
            if cell["cell_type"] == "code":
56
                cell_outputs = []
57
                for output in cell.get("outputs", []):
58
                    cell_outputs.append({
59
                        "type": output["output_type"],
60
                        "content": extract_output_content(output)
61
                    })
62
                outputs.append(cell_outputs)
63
        
64
        return NotebookResponse(
65
            success=True,
66
            executed_notebook=base64.b64encode(executed).decode(),
67
            outputs=outputs
68
        )
69
    
70
    except Exception as e:
71
        raise HTTPException(status_code=500, detail=str(e))
72
    
73
    finally:
74
        if sandbox:
75
            sandbox.kill()
76
 
77
 
78
def extract_output_content(output):
79
    """Extract readable content from notebook output"""
80
    if output["output_type"] == "stream":
81
        return "".join(output.get("text", []))
82
    elif output["output_type"] == "execute_result":
83
        data = output.get("data", {})
84
        return data.get("text/plain", data.get("text/html", ""))
85
    elif output["output_type"] == "display_data":
86
        data = output.get("data", {})
87
        if "image/png" in data:
88
            return {"type": "image", "data": data["image/png"]}
89
        return data.get("text/plain", "")
90
    elif output["output_type"] == "error":
91
        return {
92
            "ename": output.get("ename"),
93
            "evalue": output.get("evalue"),
94
            "traceback": output.get("traceback", [])
95
        }
96
    return str(output)
97
 

Scheduled Notebook Execution

python
1
import schedule
2
import time
3
from hopx import Sandbox
4
import json
5
from datetime import datetime
6
 
7
class NotebookScheduler:
8
    def __init__(self):
9
        self.results = []
10
    
11
    def run_notebook(self, notebook_path: str, params: dict):
12
        """Execute a notebook and store results"""
13
        sandbox = Sandbox.create(template="code-interpreter")
14
        
15
        try:
16
            # Upload notebook
17
            with open(notebook_path, "rb") as f:
18
                sandbox.files.write("/app/notebook.ipynb", f.read())
19
            
20
            # Execute
21
            result = sandbox.commands.run(
22
                "papermill /app/notebook.ipynb /app/output.ipynb " +
23
                " ".join([f'-p {k} "{v}"' for k, v in params.items()])
24
            )
25
            
26
            # Store result
27
            self.results.append({
28
                "timestamp": datetime.now().isoformat(),
29
                "notebook": notebook_path,
30
                "success": result.exit_code == 0,
31
                "output": result.stdout
32
            })
33
            
34
            # Download output notebook
35
            output_name = f"output_{datetime.now().strftime('%Y%m%d_%H%M%S')}.ipynb"
36
            output_data = sandbox.files.read("/app/output.ipynb")
37
            with open(f"outputs/{output_name}", "wb") as f:
38
                f.write(output_data)
39
            
40
            return result.exit_code == 0
41
        
42
        finally:
43
            sandbox.kill()
44
    
45
    def schedule_daily(self, notebook_path: str, params: dict, time_str: str):
46
        """Schedule notebook to run daily"""
47
        schedule.every().day.at(time_str).do(
48
            self.run_notebook, notebook_path, params
49
        )
50
    
51
    def run(self):
52
        """Start the scheduler"""
53
        while True:
54
            schedule.run_pending()
55
            time.sleep(60)
56
 
57
 
58
# Usage
59
scheduler = NotebookScheduler()
60
scheduler.schedule_daily(
61
    "reports/daily_metrics.ipynb",
62
    {"date": "today"},
63
    "06:00"
64
)
65
scheduler.run()
66
 

Handling Notebook Outputs

Extracting Visualizations

python
1
from hopx import Sandbox
2
import json
3
import base64
4
 
5
sandbox = Sandbox.create(template="code-interpreter")
6
 
7
# Notebook with matplotlib output
8
notebook = {
9
    "cells": [
10
        {
11
            "cell_type": "code",
12
            "metadata": {},
13
            "source": [
14
                "import matplotlib.pyplot as plt\n",
15
                "import numpy as np\n",
16
                "\n",
17
                "x = np.linspace(0, 10, 100)\n",
18
                "plt.figure(figsize=(10, 6))\n",
19
                "plt.plot(x, np.sin(x))\n",
20
                "plt.title('Sine Wave')\n",
21
                "plt.show()"
22
            ]
23
        }
24
    ],
25
    "metadata": {"kernelspec": {"name": "python3"}},
26
    "nbformat": 4,
27
    "nbformat_minor": 4
28
}
29
 
30
sandbox.files.write("/app/viz.ipynb", json.dumps(notebook))
31
 
32
# Execute
33
sandbox.commands.run(
34
    "jupyter nbconvert --to notebook --execute viz.ipynb --output executed.ipynb",
35
    cwd="/app"
36
)
37
 
38
# Extract images from output
39
executed = json.loads(sandbox.files.read("/app/executed.ipynb"))
40
 
41
images = []
42
for cell in executed["cells"]:
43
    for output in cell.get("outputs", []):
44
        if "image/png" in output.get("data", {}):
45
            img_data = output["data"]["image/png"]
46
            images.append(base64.b64decode(img_data))
47
 
48
# Save extracted images
49
for i, img in enumerate(images):
50
    with open(f"figure_{i}.png", "wb") as f:
51
        f.write(img)
52
 
53
print(f"Extracted {len(images)} images")
54
 

Capturing DataFrames

python
1
# Notebook cell that outputs a DataFrame
2
notebook_code = '''
3
import pandas as pd
4
import json
5
 
6
df = pd.DataFrame({
7
    'product': ['A', 'B', 'C'],
8
    'sales': [100, 150, 80],
9
    'profit': [20, 45, 15]
10
})
11
 
12
# Output as JSON for programmatic access
13
print("__DATA_START__")
14
print(df.to_json(orient='records'))
15
print("__DATA_END__")
16
 
17
# Also display nicely
18
display(df)
19
'''
20
 
21
# After execution, parse the output
22
for cell in executed_nb["cells"]:
23
    for output in cell.get("outputs", []):
24
        if output["output_type"] == "stream":
25
            text = "".join(output["text"])
26
            if "__DATA_START__" in text:
27
                start = text.index("__DATA_START__") + len("__DATA_START__")
28
                end = text.index("__DATA_END__")
29
                json_data = text[start:end].strip()
30
                data = json.loads(json_data)
31
                print("Extracted DataFrame:", data)
32
 

Error Handling

Graceful Notebook Failures

python
1
from hopx import Sandbox
2
import json
3
 
4
def execute_notebook_safely(notebook_content: bytes, params: dict = None):
5
    """Execute notebook with comprehensive error handling"""
6
    sandbox = Sandbox.create(template="code-interpreter")
7
    
8
    try:
9
        sandbox.files.write("/app/notebook.ipynb", notebook_content)
10
        
11
        if params:
12
            sandbox.files.write("/app/params.json", json.dumps(params))
13
            cmd = "papermill /app/notebook.ipynb /app/output.ipynb -f /app/params.json"
14
        else:
15
            cmd = "jupyter nbconvert --execute --to notebook /app/notebook.ipynb --output output.ipynb"
16
        
17
        result = sandbox.commands.run(cmd, timeout=300)
18
        
19
        # Read output notebook even if execution failed
20
        try:
21
            output_nb = json.loads(sandbox.files.read("/app/output.ipynb"))
22
        except:
23
            output_nb = None
24
        
25
        # Check for cell errors
26
        errors = []
27
        if output_nb:
28
            for i, cell in enumerate(output_nb.get("cells", [])):
29
                for output in cell.get("outputs", []):
30
                    if output.get("output_type") == "error":
31
                        errors.append({
32
                            "cell": i,
33
                            "ename": output.get("ename"),
34
                            "evalue": output.get("evalue"),
35
                            "traceback": output.get("traceback", [])
36
                        })
37
        
38
        return {
39
            "success": result.exit_code == 0 and len(errors) == 0,
40
            "exit_code": result.exit_code,
41
            "stdout": result.stdout,
42
            "stderr": result.stderr,
43
            "notebook": output_nb,
44
            "errors": errors
45
        }
46
    
47
    except TimeoutError:
48
        return {
49
            "success": False,
50
            "error": "Notebook execution timed out",
51
            "errors": [{"type": "timeout"}]
52
        }
53
    
54
    except Exception as e:
55
        return {
56
            "success": False,
57
            "error": str(e),
58
            "errors": [{"type": "exception", "message": str(e)}]
59
        }
60
    
61
    finally:
62
        sandbox.kill()
63
 
64
 
65
# Usage
66
with open("analysis.ipynb", "rb") as f:
67
    result = execute_notebook_safely(f.read(), {"threshold": 0.5})
68
 
69
if not result["success"]:
70
    print("Execution failed!")
71
    for error in result["errors"]:
72
        print(f"  Cell {error.get('cell', '?')}: {error.get('evalue', error)}")
73
 

Parallel Notebook Execution

Run multiple notebooks simultaneously:

python
1
from hopx import Sandbox
2
import concurrent.futures
3
import json
4
 
5
def run_single_notebook(notebook_path: str, params: dict):
6
    """Run one notebook in its own sandbox"""
7
    sandbox = Sandbox.create(template="code-interpreter")
8
    
9
    try:
10
        with open(notebook_path, "rb") as f:
11
            sandbox.files.write("/app/notebook.ipynb", f.read())
12
        
13
        sandbox.files.write("/app/params.json", json.dumps(params))
14
        
15
        result = sandbox.commands.run(
16
            "papermill /app/notebook.ipynb /app/output.ipynb -f /app/params.json",
17
            timeout=300
18
        )
19
        
20
        output = sandbox.files.read("/app/output.ipynb")
21
        
22
        return {
23
            "notebook": notebook_path,
24
            "params": params,
25
            "success": result.exit_code == 0,
26
            "output": output
27
        }
28
    
29
    finally:
30
        sandbox.kill()
31
 
32
 
33
def run_notebooks_parallel(jobs: list[tuple[str, dict]], max_workers: int = 5):
34
    """Run multiple notebooks in parallel"""
35
    results = []
36
    
37
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
38
        futures = {
39
            executor.submit(run_single_notebook, path, params): (path, params)
40
            for path, params in jobs
41
        }
42
        
43
        for future in concurrent.futures.as_completed(futures):
44
            path, params = futures[future]
45
            try:
46
                result = future.result()
47
                results.append(result)
48
                print(f"✓ Completed: {path}")
49
            except Exception as e:
50
                results.append({
51
                    "notebook": path,
52
                    "params": params,
53
                    "success": False,
54
                    "error": str(e)
55
                })
56
                print(f"✗ Failed: {path} - {e}")
57
    
58
    return results
59
 
60
 
61
# Run 10 notebooks with different parameters
62
jobs = [
63
    ("analysis.ipynb", {"region": region})
64
    for region in ["US", "EU", "APAC", "LATAM", "MEA"]
65
]
66
 
67
results = run_notebooks_parallel(jobs, max_workers=5)
68
print(f"Completed: {sum(1 for r in results if r['success'])}/{len(results)}")
69
 

Best Practices

1. Version Your Notebooks

python
1
# Add version metadata
2
notebook["metadata"]["version"] = "1.2.0"
3
notebook["metadata"]["last_modified"] = "2024-11-15"
4
 
5
# Check version before execution
6
if notebook["metadata"].get("version", "0.0.0") < "1.0.0":
7
    raise ValueError("Notebook version too old")
8
 

2. Validate Inputs

python
1
def validate_notebook(content: bytes) -> bool:
2
    """Validate notebook before execution"""
3
    try:
4
        nb = json.loads(content)
5
        
6
        # Check format
7
        if nb.get("nbformat", 0) < 4:
8
            raise ValueError("Notebook format too old")
9
        
10
        # Check for required cells
11
        has_code = any(c["cell_type"] == "code" for c in nb["cells"])
12
        if not has_code:
13
            raise ValueError("Notebook has no code cells")
14
        
15
        # Check for dangerous imports
16
        dangerous = ["os.system", "subprocess", "eval(", "exec("]
17
        for cell in nb["cells"]:
18
            source = "".join(cell.get("source", []))
19
            for d in dangerous:
20
                if d in source:
21
                    raise ValueError(f"Potentially dangerous code: {d}")
22
        
23
        return True
24
    
25
    except json.JSONDecodeError:
26
        raise ValueError("Invalid JSON")
27
 

3. Resource Limits

python
1
# Set memory and time limits
2
sandbox.commands.run(
3
    "timeout 300 jupyter nbconvert --execute notebook.ipynb",
4
    memory_limit="4G"
5
)
6
 

4. Clean Output Notebooks

python
1
def clean_notebook_output(notebook: dict) -> dict:
2
    """Remove outputs for storage/version control"""
3
    cleaned = notebook.copy()
4
    
5
    for cell in cleaned["cells"]:
6
        if cell["cell_type"] == "code":
7
            cell["outputs"] = []
8
            cell["execution_count"] = None
9
    
10
    return cleaned
11
 

Conclusion

Running Jupyter notebooks in HopX sandboxes enables:

  • Safe execution of untrusted notebooks
  • Scalable automation with parallel execution
  • Consistent environments for reproducibility
  • Programmatic access to notebook outputs

Whether you're building notebook-powered APIs, scheduling data science jobs, or processing user-submitted notebooks, sandboxed execution is the secure, scalable solution.

Resources