163 lines
6.5 KiB
Python
163 lines
6.5 KiB
Python
from fastapi import BackgroundTasks
|
|
from sqlalchemy.orm import Session
|
|
import tempfile
|
|
import zipfile
|
|
import time
|
|
from fastapi.responses import FileResponse
|
|
import os
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
|
|
from schemas.ResponseSchemas import CallTranscriptsResponse
|
|
from database import get_db
|
|
from models.CallTranscripts import CallTranscripts
|
|
from exceptions.business_exception import BusinessValidationException
|
|
from services.s3Service import get_signed_url
|
|
from interface.common_response import CommonResponse
|
|
from loguru import logger
|
|
|
|
from exceptions.db_exceptions import DBExceptionHandler
|
|
|
|
class CallTranscriptServices:
|
|
def __init__(self):
|
|
self.db:Session = next(get_db())
|
|
self.logger = logger
|
|
|
|
async def get_call_transcripts(self, limit:int, offset:int):
|
|
try:
|
|
call_transcripts = self.db.query(CallTranscripts).limit(limit).offset(offset).all()
|
|
|
|
total = self.db.query(CallTranscripts).count()
|
|
|
|
response = [CallTranscriptsResponse(**call_transcript.__dict__.copy()) for call_transcript in call_transcripts]
|
|
|
|
for call_transcript in response:
|
|
call_transcript.transcript_key_id = get_signed_url(call_transcript.transcript_key_id)
|
|
|
|
return_response = CommonResponse(data=response, total=total)
|
|
|
|
return return_response
|
|
except Exception as e:
|
|
DBExceptionHandler.handle_exception(e, context="getting call transcripts")
|
|
finally:
|
|
self.db.close()
|
|
|
|
def download_call_transcript(self, key_id: str):
|
|
try:
|
|
call_transcript = self.db.query(CallTranscripts).filter(CallTranscripts.transcript_key_id == key_id).first()
|
|
|
|
if not call_transcript:
|
|
raise BusinessValidationException("Call transcript not found!")
|
|
|
|
return get_signed_url(call_transcript.transcript_key_id)
|
|
except Exception as e:
|
|
DBExceptionHandler.handle_exception(e, context="downloading call transcript")
|
|
finally:
|
|
self.db.close()
|
|
|
|
|
|
def download_file(self, url: str, file_path: str) -> None:
|
|
"""
|
|
Download a file from a signed URL to a local path.
|
|
|
|
Args:
|
|
url: The pre-signed URL to download from
|
|
file_path: The local path to save the file to
|
|
"""
|
|
try:
|
|
import requests
|
|
response = requests.get(url)
|
|
if response.status_code == 200:
|
|
with open(file_path, 'wb') as f:
|
|
f.write(response.content)
|
|
else:
|
|
print(f"Failed to download file: {response.status_code}")
|
|
except Exception as e:
|
|
print(f"Error downloading file: {e}")
|
|
|
|
def cleanup_temp_files(self, temp_dir: str, zip_path: str) -> None:
|
|
"""
|
|
Clean up temporary files after sending the zip.
|
|
|
|
Args:
|
|
temp_dir: Directory containing temporary files
|
|
zip_path: Path to the zip file
|
|
"""
|
|
try:
|
|
# Wait a short time to ensure the file has been sent
|
|
time.sleep(5)
|
|
|
|
# Remove the zip file
|
|
if os.path.exists(zip_path):
|
|
os.remove(zip_path)
|
|
|
|
# Remove the temp directory and all its contents
|
|
if os.path.exists(temp_dir):
|
|
for file in os.listdir(temp_dir):
|
|
os.remove(os.path.join(temp_dir, file))
|
|
os.rmdir(temp_dir)
|
|
except Exception as e:
|
|
print(f"Error during cleanup: {e}")
|
|
|
|
async def bulk_download_call_transcripts(self, key_ids: list[int], background_tasks: BackgroundTasks):
|
|
try:
|
|
transcript_ids = self.db.query(CallTranscripts).filter(CallTranscripts.id.in_(key_ids)).all()
|
|
|
|
keys = [transcript.transcript_key_id for transcript in transcript_ids]
|
|
|
|
if len(keys) < 1:
|
|
raise BusinessValidationException("No call transcripts found!")
|
|
|
|
|
|
temp_dir = tempfile.mkdtemp(prefix="call_transcripts_")
|
|
zip_path = os.path.join(temp_dir, "call_transcripts.zip")
|
|
|
|
# Prepare download information
|
|
download_info = []
|
|
for key in keys:
|
|
# Generate signed URL for each key
|
|
url = get_signed_url(key)
|
|
|
|
# Determine filename (using key's basename or a formatted name)
|
|
filename = os.path.basename(key)
|
|
file_path = os.path.join(temp_dir, filename)
|
|
download_info.append((url, file_path, filename))
|
|
|
|
# Use ThreadPoolExecutor for concurrent downloads
|
|
# Adjust max_workers based on your system capabilities and S3 rate limits
|
|
max_workers = min(32, len(download_info)) # Cap at 32 threads or number of files, whichever is smaller
|
|
|
|
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
# Submit all download tasks
|
|
future_to_file = {executor.submit(self.download_file, url, file_path): (file_path, filename)
|
|
for url, file_path, filename in download_info}
|
|
|
|
# Collect results as they complete
|
|
file_paths = []
|
|
for future in as_completed(future_to_file):
|
|
file_path, filename = future_to_file[future]
|
|
try:
|
|
future.result() # Get the result to catch any exceptions
|
|
file_paths.append((file_path, filename))
|
|
except Exception as e:
|
|
print(f"Error downloading {filename}: {e}")
|
|
|
|
# Create zip file from downloaded files
|
|
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zip_file:
|
|
for file_path, arcname in file_paths:
|
|
if os.path.exists(file_path):
|
|
zip_file.write(file_path, arcname=arcname)
|
|
|
|
# Add cleanup task to run after response is sent
|
|
background_tasks.add_task(self.cleanup_temp_files, temp_dir, zip_path)
|
|
|
|
# Return the zip file as a response
|
|
return FileResponse(
|
|
path=zip_path,
|
|
media_type="application/zip",
|
|
filename="call_transcripts.zip",
|
|
background=background_tasks
|
|
)
|
|
except Exception as e:
|
|
DBExceptionHandler.handle_exception(e, context="bulk downloading call transcripts")
|
|
finally:
|
|
self.db.close() |