Overview

This guide covers advanced patterns and best practices for working with content and segments in HyperFlow Python transforms. It includes techniques for efficient processing, metadata preservation, and integration with HyperFlow's content management system.

Understanding Content vs Segments

Content

Segments

Content Processing Patterns

Extracting and Transforming Content

# Process multiple content items with HTTP access
contents = input1  # Content input type

processed_contents = []
for content in contents:
    # Access content data
    mimetype = content["mimetype"]
    url = content["url"]
    text = content.get("text", "")  # Extracted text if available
    
    # Handle different content types
    if mimetype.startswith("image/"):
        # Download and process images
        response = requests.get(url)
        image_data = response.content
        # Process image and create summary
        summary = f"Image analysis: {len(image_data)} bytes, type: {mimetype}"
    elif text:
        # Use extracted text for text-based content
        lines = text.split('\\n')
        summary = '\\n'.join(lines[:5])  # First 5 lines
    else:
        summary = f"Content type: {mimetype}, size: available via URL"
    
    # Create new content with summary
    processed_contents.append({
        "dataType": "text",
        "mimetype": "text/plain",
        "data": f"SUMMARY:\\n{summary}"
    })

# Output as new content
output = processed_contents

Content Type Detection and Routing

# Route content based on mimetype and process via HTTP access
contents = input1

text_contents = []
json_contents = []
other_contents = []

for content in contents:
    mimetype = content["mimetype"]
    
    if mimetype.startswith("text/"):
        text_contents.append(content)
    elif mimetype == "application/json":
        json_contents.append(content)
    else:
        other_contents.append(content)

# Process each type differently
processed = []

# Process text files
for tc in text_contents:
    # Access content data via URL if needed
    text = tc.get("text", "")
    if not text and tc.get("url"):
        response = requests.get(tc["url"])
        text = response.text
    
    processed.append({
        "dataType": "text",
        "mimetype": "text/plain",
        "data": text.upper()  # Example transformation
    })

# Process JSON files
for jc in json_contents:
    # Access content data via URL if needed
    text = jc.get("text", "")
    if not text and jc.get("url"):
        response = requests.get(jc["url"])
        text = response.text
    
    data = json.loads(text)
    # Transform JSON data
    data["processed"] = True
    processed.append({
        "dataType": "text",
        "mimetype": "application/json",
        "data": json.dumps(data, indent=2)
    })

output = processed

Merging Multiple Contents

# Merge multiple documents into one with HTTP access
contents = input1

# Group by mimetype and access content data
grouped = {}
for content in contents:
    mimetype = content["mimetype"]
    if mimetype not in grouped:
        grouped[mimetype] = []
    
    # Access content data via HTTP if needed
    text = content.get("text", "")
    if not text and content.get("url"):
        response = requests.get(content["url"])
        text = response.text
    
    grouped[mimetype].append(text)

# Create merged documents
merged_contents = []
for mimetype, texts in grouped.items():
    if mimetype.startswith("text/"):
        separator = "\\n\\n" + "="*50 + "\\n\\n"
        merged_text = separator.join(texts)
        
        merged_contents.append({
            "dataType": "text",
            "mimetype": mimetype,
            "data": merged_text
        })

output = merged_contents

Content with Rich Metadata

# Generate content with detailed metadata
import datetime

# Process input and generate analysis
analysis_result = analyze_text(input1)

# Create content with metadata
output = {
    "dataType": "text",
    "mimetype": "application/json",
    "data": json.dumps(analysis_result, indent=2),
    "metadata": [
        {"key": "analysis_type", "value": "sentiment"},
        {"key": "processed_date", "value": datetime.now().isoformat()},
        {"key": "confidence", "value": str(analysis_result.get("confidence", 0))},
        {"key": "version", "value": "1.0"}
    ]
}