Cross-Language Web Scraping & Automation
This advanced program demonstrates how Python and JavaScript work together, with Python orchestrating workflows and JavaScript handling dynamic web content.
#!/usr/bin/env python3
"""
Advanced Tier: Python + JavaScript Integration
Demonstrates Python controlling JavaScript execution via Node.js
"""
import subprocess
import json
from typing import Dict, List, Any
JS_SCRAPER_CODE = """
const scrapeData = async (url) => {
return {
url: url,
title: `Page Title from ${url}`,
content: 'Dynamic content loaded by JavaScript',
metadata: {
timestamp: new Date().toISOString(),
engine: 'Node.js + JavaScript'
}
};
};
(async () => {
const urls = process.argv.slice(2);
const results = await Promise.all(
urls.map(url => scrapeData(url))
);
console.log(JSON.stringify(results, null, 2));
})();
"""
class PythonJavaScriptIntegration:
def execute_javascript(self, urls: List[str]) -> List[Dict]:
"""Execute JavaScript code via Node.js from Python"""
with open('/tmp/scraper.js', 'w') as f:
f.write(JS_SCRAPER_CODE)
result = subprocess.run(
['node', '/tmp/scraper.js'] + urls,
capture_output=True,
text=True
)
return json.loads(result.stdout)
def process_results(self, raw_results: List[Dict]) -> Dict:
"""Python processes the JavaScript results"""
return {
'total_pages': len(raw_results),
'pages': [page['url'] for page in raw_results],
'summary': {
'engines_used': ['Python', 'JavaScript', 'Node.js'],
'integration_type': 'Python → JavaScript → Python'
}
}
# Usage
integration = PythonJavaScriptIntegration()
urls = ['https://example.com/1', 'https://example.com/2']
raw_results = integration.execute_javascript(urls)
processed = integration.process_results(raw_results)
print(json.dumps(processed, indent=2))
Python scripts controlling browser automation via Node.js and Puppeteer
JavaScript extracts data, Python processes and stores it
Combine Python's data science libraries with JavaScript's async capabilities
Python test runners executing JavaScript test scenarios