This guide covers advanced patterns and best practices for working with content and segments in HyperFlow Python transforms. It includes techniques for efficient processing, metadata preservation, and integration with HyperFlow's content management system.
# Process multiple content items with HTTP access
contents = input1 # Content input type
processed_contents = []
for content in contents:
# Access content data
mimetype = content["mimetype"]
url = content["url"]
text = content.get("text", "") # Extracted text if available
# Handle different content types
if mimetype.startswith("image/"):
# Download and process images
response = requests.get(url)
image_data = response.content
# Process image and create summary
summary = f"Image analysis: {len(image_data)} bytes, type: {mimetype}"
elif text:
# Use extracted text for text-based content
lines = text.split('\\n')
summary = '\\n'.join(lines[:5]) # First 5 lines
else:
summary = f"Content type: {mimetype}, size: available via URL"
# Create new content with summary
processed_contents.append({
"dataType": "text",
"mimetype": "text/plain",
"data": f"SUMMARY:\\n{summary}"
})
# Output as new content
output = processed_contents
# Route content based on mimetype and process via HTTP access
contents = input1
text_contents = []
json_contents = []
other_contents = []
for content in contents:
mimetype = content["mimetype"]
if mimetype.startswith("text/"):
text_contents.append(content)
elif mimetype == "application/json":
json_contents.append(content)
else:
other_contents.append(content)
# Process each type differently
processed = []
# Process text files
for tc in text_contents:
# Access content data via URL if needed
text = tc.get("text", "")
if not text and tc.get("url"):
response = requests.get(tc["url"])
text = response.text
processed.append({
"dataType": "text",
"mimetype": "text/plain",
"data": text.upper() # Example transformation
})
# Process JSON files
for jc in json_contents:
# Access content data via URL if needed
text = jc.get("text", "")
if not text and jc.get("url"):
response = requests.get(jc["url"])
text = response.text
data = json.loads(text)
# Transform JSON data
data["processed"] = True
processed.append({
"dataType": "text",
"mimetype": "application/json",
"data": json.dumps(data, indent=2)
})
output = processed
# Merge multiple documents into one with HTTP access
contents = input1
# Group by mimetype and access content data
grouped = {}
for content in contents:
mimetype = content["mimetype"]
if mimetype not in grouped:
grouped[mimetype] = []
# Access content data via HTTP if needed
text = content.get("text", "")
if not text and content.get("url"):
response = requests.get(content["url"])
text = response.text
grouped[mimetype].append(text)
# Create merged documents
merged_contents = []
for mimetype, texts in grouped.items():
if mimetype.startswith("text/"):
separator = "\\n\\n" + "="*50 + "\\n\\n"
merged_text = separator.join(texts)
merged_contents.append({
"dataType": "text",
"mimetype": mimetype,
"data": merged_text
})
output = merged_contents
# Generate content with detailed metadata
import datetime
# Process input and generate analysis
analysis_result = analyze_text(input1)
# Create content with metadata
output = {
"dataType": "text",
"mimetype": "application/json",
"data": json.dumps(analysis_result, indent=2),
"metadata": [
{"key": "analysis_type", "value": "sentiment"},
{"key": "processed_date", "value": datetime.now().isoformat()},
{"key": "confidence", "value": str(analysis_result.get("confidence", 0))},
{"key": "version", "value": "1.0"}
]
}