How can we generate the cost for this system: video = cv2.VideoCapture("data/bison.mp4") base64Frames = [] while video.isOpened(): success, frame = video.read () if not success: break _, buffer = cv2.imencode(".jpg", frame) base64Frames.append(base64.b64encode(buffer).decode("utf-8")) video.release() print(len(base64Frames), "frames read.") PROMPT_MESSAGES = [ { "role": "user", "content": [ "These are frames from a video that I want to upload. Generate a compelling description that I can upload along with the video.", *map(lambda x: {"image": x, "resize": 768}, base64Frames[0::50]), ], }, ] params = { "model": "gpt-4o", "messages": PROMPT_MESSAGES, "max_tokens": 200, } result = client.chat .completions.create(**params) print(result.choices[0].message.content) Reference: https://cookbook.openai.com/examples/gpt_with_vision_for_video_understanding