{ "title": "Scene Director Schema", "description": "Schema for describing camera, actors, and audio in a structured way for T2V/I2V prompt pipelines.", "type": "object", "additionalProperties": false, "required": ["camera", "actors", "audio"], "properties": { "camera": { "$ref": "#/$defs/camera" }, "actors": { "type": "array", "minItems": 1, "items": { "$ref": "#/$defs/actor" } }, "audio": { "$ref": "#/$defs/audio" } }, "$defs": { "camera": { "type": "object", "additionalProperties": false, "required": ["composition", "movement", "position", "angle", "framing", "focus", "lens", "notes"], "properties": { "composition": { "$ref": "#/$defs/composition" }, "movement": { "$ref": "#/$defs/movement" }, "position": { "$ref": "#/$defs/position" }, "angle": { "$ref": "#/$defs/angle" }, "framing": { "$ref": "#/$defs/framing" }, "focus": { "$ref": "#/$defs/focus" }, "lens": { "$ref": "#/$defs/lens" }, "notes": { "type": "string", "description": "Optional director notes for the camera." } } }, "position": { "type": "object", "additionalProperties": false, "required": ["description"], "properties": { "description": { "type": "string", "minLength": 1, "description": "Natural language description of physical camera placement (e.g., 'Eye level, 2m from chef, slightly camera-left')." } } }, "actor": { "type": "object", "additionalProperties": false, "required": ["id", "description", "action"], "properties": { "id": { "type": "string", "pattern": "^[a-z][a-z0-9_]*$", "description": "Stable identifier used by dialogue.actorId (snake_case recommended)." }, "description": { "type": "string", "minLength": 1, "description": "Who/what the actor is (appearance, age range, vibe, clothing)." }, "action": { "type": "string", "minLength": 1, "description": "What the actor is doing in the shot." } } }, "audio": { "type": "object", "additionalProperties": false, "required": ["dialogue", "music", "sfx"], "properties": { "dialogue": { "type": "array", "items": { "$ref": "#/$defs/dialogueLine" }, "description": "Ordered spoken lines. Empty array allowed if no dialogue." }, "music": { "type": "string", "description": "Optional music direction (genre, mood, instrumentation)." }, "sfx": { "type": "array", "items": { "type": "string" }, "description": "Optional sound effects directions." } } }, "dialogueLine": { "type": "object", "additionalProperties": false, "required": ["actorId", "line"], "properties": { "actorId": { "type": "string", "pattern": "^[a-z][a-z0-9_]*$", "description": "Must match an actors[].id." }, "line": { "type": "string", "minLength": 1, "description": "Spoken line." } } }, "composition": { "type": "string", "description": "Primary shot size / composition label.", "enum": [ "extreme_wide_shot", "wide_shot", "full_shot", "medium_long_shot", "medium_shot", "medium_close_up", "close_up", "extreme_close_up", "two_shot", "three_shot", "group_shot", "over_the_shoulder", "point_of_view", "insert_shot", "cutaway", "reaction_shot", "establishing_shot", "centered", "rule_of_thirds_left", "rule_of_thirds_right", "symmetrical", "asymmetrical", "negative_space_heavy", "foreground_framing", "silhouette", "profile_shot" ] }, "movement": { "type": "string", "description": "Primary camera movement style (single label).", "enum": [ "static", "pan_left", "pan_right", "tilt_up", "tilt_down", "roll", "push_in", "pull_out", "slow_push_in", "slow_pull_out", "fast_push_in", "fast_pull_out", "zoom_in", "zoom_out", "slow_zoom_in", "slow_zoom_out", "snap_zoom_in", "snap_zoom_out", "digital_zoom", "dolly_in", "dolly_out", "dolly_left", "dolly_right", "tracking_forward", "tracking_backward", "tracking_left", "tracking_right", "follow_shot", "lead_shot", "crane_up", "crane_down", "jib_up", "jib_down", "boom_up", "boom_down", "handheld", "subtle_handheld", "shaky_handheld", "documentary_handheld", "steadicam", "orbit", "arc_left", "arc_right", "spiral_in", "spiral_out", "parallax_move", "whip_pan", "whip_tilt", "drone_static", "drone_push_in", "drone_pull_out", "drone_orbit", "drone_flyover", "drone_drop_down", "speed_ramp_in", "speed_ramp_out", "invisible_cut_push", "motion_blur_transition" ] }, "angle": { "type": "string", "description": "Optional camera angle.", "enum": [ "eye_level", "low_angle", "high_angle", "bird_eye_view", "worm_eye_view", "dutch_angle" ] }, "framing": { "type": "string", "description": "Optional subject framing emphasis.", "enum": [ "centered", "off_center", "symmetrical", "asymmetrical", "negative_space_left", "negative_space_right", "foreground_framing", "background_dominant" ] }, "focus": { "type": "string", "description": "Optional focus / depth-of-field behavior.", "enum": [ "deep_focus", "shallow_focus", "soft_focus", "macro_focus", "rack_focus_foreground_to_background", "rack_focus_background_to_foreground", "tilt_shift" ] }, "lens": { "type": "object", "description": "Optional lens metadata (kept flexible; not everyone wants to be a DP).", "additionalProperties": false, "required": ["focalLengthMm", "aperture", "notes"], "properties": { "focalLengthMm": { "type": "number", "minimum": 1, "description": "Focal length in millimeters." }, "aperture": { "type": "number", "minimum": 0.1, "description": "F-number (e.g., 1.2, 2.8, 5.6)." }, "notes": { "type": "string", "description": "Free-form lens notes (e.g., 'anamorphic', 'vintage', 'soft edges')." } } } } }