Initial release: DictIA v0.8.14-alpha (fork de Speakr, AGPL-3.0)

This commit is contained in:
InnovA AI
2026-03-16 21:47:37 +00:00
commit 42772a31ed
365 changed files with 103572 additions and 0 deletions

73
src/models/__init__.py Normal file
View File

@@ -0,0 +1,73 @@
"""
Database models package for the Speakr application.
This package contains all database models organized by domain:
- User and authentication models
- Recording and transcript models
- Sharing models (public and internal)
- Organization models (groups and tags)
- Event, template, and search session models
- System configuration models
"""
# Import database instance
from src.database import db
# Import all models
from .user import User, Speaker
from .api_token import APIToken
from .speaker_snippet import SpeakerSnippet
from .recording import Recording, TranscriptChunk
from .sharing import Share, InternalShare, SharedRecordingState
from .organization import Group, GroupMembership, Tag, RecordingTag, Folder
from .events import Event
from .templates import TranscriptTemplate
from .naming_template import NamingTemplate
from .export_template import ExportTemplate
from .inquire import InquireSession
from .system import SystemSetting
from .audit import ShareAuditLog
from .access_log import AccessLog
from .auth_log import AuthLog
from .push_subscription import PushSubscription
from .processing_job import ProcessingJob
from .token_usage import TokenUsage
from .transcription_usage import TranscriptionUsage
# Export all models
__all__ = [
# Database instance
'db',
# User models
'User',
'Speaker',
'APIToken',
'SpeakerSnippet',
# Recording models
'Recording',
'TranscriptChunk',
# Sharing models
'Share',
'InternalShare',
'SharedRecordingState',
'ShareAuditLog',
'AccessLog',
'AuthLog',
# Organization models
'Group',
'GroupMembership',
'Tag',
'RecordingTag',
'Folder',
# Other models
'Event',
'TranscriptTemplate',
'NamingTemplate',
'ExportTemplate',
'InquireSession',
'SystemSetting',
'PushSubscription',
'ProcessingJob',
'TokenUsage',
'TranscriptionUsage',
]

97
src/models/access_log.py Normal file
View File

@@ -0,0 +1,97 @@
"""
Access audit log model for tracking data access operations.
Provides traceability for Loi 25 compliance: who accessed what, when, from where.
"""
from datetime import datetime
from src.database import db
class AccessLog(db.Model):
"""Audit trail for data access operations (view, download, edit, delete)."""
__tablename__ = 'access_log'
id = db.Column(db.Integer, primary_key=True)
# Who
user_id = db.Column(db.Integer, db.ForeignKey('user.id'), nullable=True) # nullable for anonymous/public access
user = db.relationship('User', backref=db.backref('access_logs', lazy='dynamic'))
# What action
action = db.Column(db.String(30), nullable=False) # 'view', 'download', 'edit', 'delete', 'export', 'share'
# What resource
resource_type = db.Column(db.String(50), nullable=False) # 'recording', 'audio', 'transcript', 'user', 'summary'
resource_id = db.Column(db.Integer, nullable=True)
# When
timestamp = db.Column(db.DateTime, default=datetime.utcnow, nullable=False, index=True)
# Where from
ip_address = db.Column(db.String(45), nullable=True)
user_agent = db.Column(db.String(500), nullable=True)
# Result
status = db.Column(db.String(20), default='success', nullable=False) # 'success', 'denied', 'error'
# Extra context (JSON)
details = db.Column(db.JSON, nullable=True)
def to_dict(self):
"""Convert to dictionary for API responses."""
return {
'id': self.id,
'user_id': self.user_id,
'username': self.user.username if self.user else None,
'action': self.action,
'resource_type': self.resource_type,
'resource_id': self.resource_id,
'timestamp': self.timestamp.isoformat() if self.timestamp else None,
'ip_address': self.ip_address,
'user_agent': self.user_agent,
'status': self.status,
'details': self.details,
}
@staticmethod
def log_access(action, resource_type, resource_id=None, user_id=None, status='success', details=None, ip_address=None, user_agent=None):
"""Log a data access event."""
log = AccessLog(
user_id=user_id,
action=action,
resource_type=resource_type,
resource_id=resource_id,
status=status,
details=details,
ip_address=ip_address,
user_agent=user_agent,
)
db.session.add(log)
return log
@staticmethod
def log_view(resource_type, resource_id, user_id=None, ip_address=None, user_agent=None, details=None):
"""Log a view/read access."""
return AccessLog.log_access('view', resource_type, resource_id, user_id=user_id, ip_address=ip_address, user_agent=user_agent, details=details)
@staticmethod
def log_download(resource_type, resource_id, user_id=None, ip_address=None, user_agent=None, details=None):
"""Log a download access."""
return AccessLog.log_access('download', resource_type, resource_id, user_id=user_id, ip_address=ip_address, user_agent=user_agent, details=details)
@staticmethod
def log_edit(resource_type, resource_id, user_id=None, ip_address=None, user_agent=None, details=None):
"""Log an edit/modification."""
return AccessLog.log_access('edit', resource_type, resource_id, user_id=user_id, ip_address=ip_address, user_agent=user_agent, details=details)
@staticmethod
def log_delete(resource_type, resource_id, user_id=None, ip_address=None, user_agent=None, details=None):
"""Log a deletion."""
return AccessLog.log_access('delete', resource_type, resource_id, user_id=user_id, ip_address=ip_address, user_agent=user_agent, details=details)
@staticmethod
def log_export(resource_type, resource_id, user_id=None, ip_address=None, user_agent=None, details=None):
"""Log a data export."""
return AccessLog.log_access('export', resource_type, resource_id, user_id=user_id, ip_address=ip_address, user_agent=user_agent, details=details)

51
src/models/api_token.py Normal file
View File

@@ -0,0 +1,51 @@
"""
API Token database model.
This module defines the APIToken model for managing user API tokens
that allow authentication via Bearer tokens for automation tools.
"""
from datetime import datetime
from src.database import db
class APIToken(db.Model):
"""API Token model for token-based authentication."""
__tablename__ = 'api_token'
id = db.Column(db.Integer, primary_key=True)
user_id = db.Column(db.Integer, db.ForeignKey('user.id'), nullable=False)
token_hash = db.Column(db.String(64), unique=True, nullable=False, index=True)
name = db.Column(db.String(100), nullable=True) # User-friendly label (e.g., "n8n", "CLI")
created_at = db.Column(db.DateTime, default=datetime.utcnow, nullable=False)
last_used_at = db.Column(db.DateTime, nullable=True)
expires_at = db.Column(db.DateTime, nullable=True)
revoked = db.Column(db.Boolean, default=False, nullable=False, index=True)
# Relationship to User
user = db.relationship('User', backref=db.backref('api_tokens', lazy=True, cascade='all, delete-orphan'))
def __repr__(self):
return f"APIToken(name='{self.name}', user_id={self.user_id}, revoked={self.revoked})"
def to_dict(self):
"""Convert token to dictionary for API responses."""
return {
'id': self.id,
'name': self.name,
'created_at': self.created_at.isoformat() if self.created_at else None,
'last_used_at': self.last_used_at.isoformat() if self.last_used_at else None,
'expires_at': self.expires_at.isoformat() if self.expires_at else None,
'revoked': self.revoked
}
def is_expired(self):
"""Check if token has expired."""
if not self.expires_at:
return False
return self.expires_at < datetime.utcnow()
def is_valid(self):
"""Check if token is valid (not revoked and not expired)."""
return not self.revoked and not self.is_expired()

109
src/models/audit.py Normal file
View File

@@ -0,0 +1,109 @@
"""
Audit logging models for tracking share operations.
Provides comprehensive audit trail for security and compliance.
"""
from datetime import datetime
from src.database import db
class ShareAuditLog(db.Model):
"""Audit trail for share operations."""
__tablename__ = 'share_audit_log'
id = db.Column(db.Integer, primary_key=True)
# Action details
action = db.Column(db.String(20), nullable=False) # 'created', 'modified', 'revoked', 'cascade_revoked'
recording_id = db.Column(db.Integer, db.ForeignKey('recording.id', ondelete='CASCADE'), nullable=False)
# Actor (who performed the action)
actor_id = db.Column(db.Integer, db.ForeignKey('user.id'), nullable=False)
actor = db.relationship('User', foreign_keys=[actor_id], backref='audit_actions_performed')
# Target (who was affected - optional for some actions)
target_user_id = db.Column(db.Integer, db.ForeignKey('user.id'), nullable=True)
target_user = db.relationship('User', foreign_keys=[target_user_id])
# Permission snapshot at time of action
permissions_granted = db.Column(db.JSON, nullable=True) # What was granted/revoked
actor_permissions = db.Column(db.JSON, nullable=True) # What actor had at time
# Metadata
timestamp = db.Column(db.DateTime, default=datetime.utcnow, nullable=False)
share_id = db.Column(db.Integer, nullable=True) # Reference to share if applicable
# Context and notes
notes = db.Column(db.Text, nullable=True) # System-generated notes (e.g., "Permission constrained", "Cascade revocation")
ip_address = db.Column(db.String(45), nullable=True) # Actor's IP address
# Recording relationship
recording = db.relationship('Recording', backref=db.backref('share_audit_logs', cascade='all, delete-orphan'))
def to_dict(self):
"""Convert to dictionary for API responses."""
return {
'id': self.id,
'action': self.action,
'recording_id': self.recording_id,
'actor_id': self.actor_id,
'actor_username': self.actor.username if self.actor else None,
'target_user_id': self.target_user_id,
'target_username': self.target_user.username if self.target_user else None,
'permissions_granted': self.permissions_granted,
'actor_permissions': self.actor_permissions,
'timestamp': self.timestamp.isoformat() if self.timestamp else None,
'share_id': self.share_id,
'notes': self.notes,
'ip_address': self.ip_address
}
@staticmethod
def log_share_created(recording_id, actor_id, target_user_id, permissions, actor_permissions=None, notes=None, ip_address=None):
"""Log share creation."""
log = ShareAuditLog(
action='created',
recording_id=recording_id,
actor_id=actor_id,
target_user_id=target_user_id,
permissions_granted=permissions,
actor_permissions=actor_permissions,
notes=notes,
ip_address=ip_address
)
db.session.add(log)
return log
@staticmethod
def log_share_modified(share_id, recording_id, actor_id, target_user_id, old_permissions, new_permissions, notes=None, ip_address=None):
"""Log share modification."""
log = ShareAuditLog(
action='modified',
recording_id=recording_id,
actor_id=actor_id,
target_user_id=target_user_id,
permissions_granted={'old': old_permissions, 'new': new_permissions},
share_id=share_id,
notes=notes,
ip_address=ip_address
)
db.session.add(log)
return log
@staticmethod
def log_share_revoked(share_id, recording_id, actor_id, target_user_id, was_cascade=False, notes=None, ip_address=None):
"""Log share revocation."""
action = 'cascade_revoked' if was_cascade else 'revoked'
log = ShareAuditLog(
action=action,
recording_id=recording_id,
actor_id=actor_id,
target_user_id=target_user_id,
share_id=share_id,
notes=notes,
ip_address=ip_address
)
db.session.add(log)
return log

94
src/models/auth_log.py Normal file
View File

@@ -0,0 +1,94 @@
"""
Authentication audit log model for tracking auth events.
Provides traceability for Loi 25 compliance: login/logout history, failed attempts.
"""
from datetime import datetime
from src.database import db
class AuthLog(db.Model):
"""Audit trail for authentication events."""
__tablename__ = 'auth_log'
id = db.Column(db.Integer, primary_key=True)
# Who (nullable for failed logins where user doesn't exist)
user_id = db.Column(db.Integer, db.ForeignKey('user.id'), nullable=True)
user = db.relationship('User', backref=db.backref('auth_logs', lazy='dynamic'))
# What
action = db.Column(db.String(30), nullable=False) # 'login', 'logout', 'failed_login', 'register', 'password_change', 'password_reset', 'sso_login'
# When
timestamp = db.Column(db.DateTime, default=datetime.utcnow, nullable=False, index=True)
# Where from
ip_address = db.Column(db.String(45), nullable=True)
user_agent = db.Column(db.String(500), nullable=True)
# Extra context (JSON) — e.g. email attempted, SSO provider, reason for failure
details = db.Column(db.JSON, nullable=True)
def to_dict(self):
"""Convert to dictionary for API responses."""
return {
'id': self.id,
'user_id': self.user_id,
'username': self.user.username if self.user else None,
'action': self.action,
'timestamp': self.timestamp.isoformat() if self.timestamp else None,
'ip_address': self.ip_address,
'user_agent': self.user_agent,
'details': self.details,
}
@staticmethod
def log_auth(action, user_id=None, ip_address=None, user_agent=None, details=None):
"""Log an authentication event."""
log = AuthLog(
user_id=user_id,
action=action,
ip_address=ip_address,
user_agent=user_agent,
details=details,
)
db.session.add(log)
return log
@staticmethod
def log_login(user_id, ip_address=None, user_agent=None, details=None):
"""Log a successful login."""
return AuthLog.log_auth('login', user_id=user_id, ip_address=ip_address, user_agent=user_agent, details=details)
@staticmethod
def log_logout(user_id, ip_address=None, user_agent=None):
"""Log a logout."""
return AuthLog.log_auth('logout', user_id=user_id, ip_address=ip_address, user_agent=user_agent)
@staticmethod
def log_failed_login(ip_address=None, user_agent=None, details=None):
"""Log a failed login attempt."""
return AuthLog.log_auth('failed_login', ip_address=ip_address, user_agent=user_agent, details=details)
@staticmethod
def log_register(user_id, ip_address=None, user_agent=None):
"""Log a new user registration."""
return AuthLog.log_auth('register', user_id=user_id, ip_address=ip_address, user_agent=user_agent)
@staticmethod
def log_password_change(user_id, ip_address=None, user_agent=None, details=None):
"""Log a password change."""
return AuthLog.log_auth('password_change', user_id=user_id, ip_address=ip_address, user_agent=user_agent, details=details)
@staticmethod
def log_password_reset(user_id, ip_address=None, user_agent=None):
"""Log a password reset."""
return AuthLog.log_auth('password_reset', user_id=user_id, ip_address=ip_address, user_agent=user_agent)
@staticmethod
def log_sso_login(user_id, ip_address=None, user_agent=None, details=None):
"""Log an SSO login."""
return AuthLog.log_auth('sso_login', user_id=user_id, ip_address=ip_address, user_agent=user_agent, details=details)

43
src/models/events.py Normal file
View File

@@ -0,0 +1,43 @@
"""
Event model for calendar events extracted from transcripts.
This module defines the Event model for storing calendar events
that are extracted from transcriptions.
"""
import json
from datetime import datetime
from src.database import db
class Event(db.Model):
"""Calendar events extracted from transcripts."""
id = db.Column(db.Integer, primary_key=True)
recording_id = db.Column(db.Integer, db.ForeignKey('recording.id'), nullable=False)
title = db.Column(db.String(200), nullable=False)
description = db.Column(db.Text, nullable=True)
start_datetime = db.Column(db.DateTime, nullable=False)
end_datetime = db.Column(db.DateTime, nullable=True)
location = db.Column(db.String(500), nullable=True)
attendees = db.Column(db.Text, nullable=True) # JSON list of attendees
reminder_minutes = db.Column(db.Integer, nullable=True, default=15)
created_at = db.Column(db.DateTime, default=datetime.utcnow)
# Relationship
recording = db.relationship('Recording', backref=db.backref('events', lazy=True, cascade='all, delete-orphan'))
def to_dict(self):
"""Convert model to dictionary representation."""
return {
'id': self.id,
'recording_id': self.recording_id,
'title': self.title,
'description': self.description,
'start_datetime': self.start_datetime.isoformat() if self.start_datetime else None,
'end_datetime': self.end_datetime.isoformat() if self.end_datetime else None,
'location': self.location,
'attendees': json.loads(self.attendees) if self.attendees else [],
'reminder_minutes': self.reminder_minutes,
'created_at': self.created_at.isoformat() if self.created_at else None
}

View File

@@ -0,0 +1,37 @@
"""
ExportTemplate model for user-defined export formatting.
This module defines the ExportTemplate model for storing
custom templates for markdown export formatting.
"""
from datetime import datetime
from src.database import db
class ExportTemplate(db.Model):
"""Stores user-defined templates for markdown export formatting."""
id = db.Column(db.Integer, primary_key=True)
user_id = db.Column(db.Integer, db.ForeignKey('user.id'), nullable=False)
name = db.Column(db.String(100), nullable=False)
template = db.Column(db.Text, nullable=False)
description = db.Column(db.String(500), nullable=True)
is_default = db.Column(db.Boolean, default=False)
created_at = db.Column(db.DateTime, default=datetime.utcnow)
updated_at = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
# Relationships
user = db.relationship('User', backref=db.backref('export_templates', lazy=True, cascade='all, delete-orphan'))
def to_dict(self):
"""Convert model to dictionary representation."""
return {
'id': self.id,
'name': self.name,
'template': self.template,
'description': self.description,
'is_default': self.is_default,
'created_at': self.created_at.isoformat() if self.created_at else None,
'updated_at': self.updated_at.isoformat() if self.updated_at else None
}

45
src/models/inquire.py Normal file
View File

@@ -0,0 +1,45 @@
"""
InquireSession model for semantic search sessions.
This module defines the InquireSession model for tracking
inquire mode sessions and their filtering criteria.
"""
import json
from datetime import datetime
from src.database import db
class InquireSession(db.Model):
"""Tracks inquire mode sessions and their filtering criteria."""
id = db.Column(db.Integer, primary_key=True)
user_id = db.Column(db.Integer, db.ForeignKey('user.id'), nullable=False)
session_name = db.Column(db.String(200), nullable=True) # Optional user-defined name
# Filter criteria (JSON stored as text)
filter_tags = db.Column(db.Text, nullable=True) # JSON array of tag IDs
filter_speakers = db.Column(db.Text, nullable=True) # JSON array of speaker names
filter_date_from = db.Column(db.Date, nullable=True)
filter_date_to = db.Column(db.Date, nullable=True)
filter_recording_ids = db.Column(db.Text, nullable=True) # JSON array of specific recording IDs
created_at = db.Column(db.DateTime, default=datetime.utcnow)
last_used = db.Column(db.DateTime, default=datetime.utcnow)
# Relationships
user = db.relationship('User', backref=db.backref('inquire_sessions', lazy=True, cascade='all, delete-orphan'))
def to_dict(self):
"""Convert model to dictionary representation."""
return {
'id': self.id,
'session_name': self.session_name,
'filter_tags': json.loads(self.filter_tags) if self.filter_tags else [],
'filter_speakers': json.loads(self.filter_speakers) if self.filter_speakers else [],
'filter_date_from': self.filter_date_from.isoformat() if self.filter_date_from else None,
'filter_date_to': self.filter_date_to.isoformat() if self.filter_date_to else None,
'filter_recording_ids': json.loads(self.filter_recording_ids) if self.filter_recording_ids else [],
'created_at': self.created_at.isoformat() if self.created_at else None,
'last_used': self.last_used.isoformat() if self.last_used else None
}

View File

@@ -0,0 +1,114 @@
"""
NamingTemplate model for user-defined recording title formatting.
This module defines the NamingTemplate model for storing
custom templates for generating recording titles from filenames,
metadata, and AI-generated content.
"""
import json
import re
import os
from datetime import datetime
from src.database import db
class NamingTemplate(db.Model):
"""Stores user-defined templates for recording title generation."""
id = db.Column(db.Integer, primary_key=True)
user_id = db.Column(db.Integer, db.ForeignKey('user.id'), nullable=False)
name = db.Column(db.String(100), nullable=False)
template = db.Column(db.Text, nullable=False) # e.g., "{{phone}} - {{date}} {{ai_title}}"
description = db.Column(db.String(500), nullable=True)
regex_patterns = db.Column(db.Text, nullable=True) # JSON: {"phone": "\\d{10}", "caller": "^([^-]+)"}
is_default = db.Column(db.Boolean, default=False)
created_at = db.Column(db.DateTime, default=datetime.utcnow)
updated_at = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
# Relationships
user = db.relationship('User', foreign_keys=[user_id], backref=db.backref('naming_templates', lazy=True, cascade='all, delete-orphan'))
def to_dict(self):
"""Convert model to dictionary representation."""
return {
'id': self.id,
'name': self.name,
'template': self.template,
'description': self.description,
'regex_patterns': json.loads(self.regex_patterns) if self.regex_patterns else {},
'is_default': self.is_default,
'created_at': self.created_at.isoformat() if self.created_at else None,
'updated_at': self.updated_at.isoformat() if self.updated_at else None
}
def get_regex_patterns(self):
"""Parse and return regex patterns as dictionary."""
if not self.regex_patterns:
return {}
try:
return json.loads(self.regex_patterns)
except json.JSONDecodeError:
return {}
def needs_ai_title(self):
"""Check if template requires AI-generated title."""
return '{{ai_title}}' in self.template
def apply(self, original_filename, meeting_date=None, ai_title=None):
"""
Apply this template to generate a recording title.
Args:
original_filename: The original filename of the recording
meeting_date: Optional datetime of the recording
ai_title: Optional AI-generated title
Returns:
Generated title string, or None if template produces empty result
"""
# Start with template
result = self.template
# Get filename without extension for {{filename}}
filename_no_ext = os.path.splitext(original_filename)[0] if original_filename else ''
# Build built-in variables
variables = {
'ai_title': ai_title or '',
'filename': filename_no_ext,
'filename_full': original_filename or '',
'date': meeting_date.strftime('%Y-%m-%d') if meeting_date else '',
'datetime': meeting_date.strftime('%Y-%m-%d %H:%M') if meeting_date else '',
'time': meeting_date.strftime('%H:%M') if meeting_date else '',
'year': meeting_date.strftime('%Y') if meeting_date else '',
'month': meeting_date.strftime('%m') if meeting_date else '',
'day': meeting_date.strftime('%d') if meeting_date else '',
}
# Extract custom variables from filename using regex patterns
regex_patterns = self.get_regex_patterns()
for var_name, pattern in regex_patterns.items():
try:
match = re.search(pattern, filename_no_ext)
if match:
# Use first capture group if exists, else full match
variables[var_name] = match.group(1) if match.groups() else match.group(0)
else:
variables[var_name] = ''
except re.error as e:
# Invalid regex - log and treat as empty
variables[var_name] = ''
# Replace all variables in template
for var_name, value in variables.items():
result = result.replace('{{' + var_name + '}}', value)
# Clean up result
result = result.strip()
# If result is empty or only whitespace, return None
if not result:
return None
return result

240
src/models/organization.py Normal file
View File

@@ -0,0 +1,240 @@
"""
Organization models for groups, tags, and related structures.
This module defines models for organizing users into groups and tagging recordings.
"""
from datetime import datetime
from src.database import db
class Group(db.Model):
"""Groups for organizing users and sharing recordings."""
__tablename__ = 'group'
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String(100), nullable=False, unique=True)
description = db.Column(db.Text, nullable=True)
created_at = db.Column(db.DateTime, default=datetime.utcnow)
updated_at = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
# Relationships
memberships = db.relationship('GroupMembership', back_populates='group', cascade='all, delete-orphan')
def to_dict(self):
"""Convert model to dictionary representation."""
return {
'id': self.id,
'name': self.name,
'description': self.description,
'member_count': len(self.memberships),
'created_at': self.created_at.isoformat() if self.created_at else None,
'updated_at': self.updated_at.isoformat() if self.updated_at else None
}
class GroupMembership(db.Model):
"""Tracks user membership in groups with roles."""
__tablename__ = 'group_membership'
id = db.Column(db.Integer, primary_key=True)
group_id = db.Column(db.Integer, db.ForeignKey('group.id', ondelete='CASCADE'), nullable=False)
user_id = db.Column(db.Integer, db.ForeignKey('user.id', ondelete='CASCADE'), nullable=False)
role = db.Column(db.String(20), default='member') # 'admin' or 'member'
joined_at = db.Column(db.DateTime, default=datetime.utcnow)
# Relationships
group = db.relationship('Group', back_populates='memberships')
user = db.relationship('User', backref=db.backref('group_memberships', lazy=True))
# Unique constraint: user can only be in a group once
__table_args__ = (db.UniqueConstraint('group_id', 'user_id', name='unique_group_membership'),)
def to_dict(self):
"""Convert model to dictionary representation."""
return {
'id': self.id,
'group_id': self.group_id,
'group_name': self.group.name if self.group else None,
'user_id': self.user_id,
'username': self.user.username if self.user else None,
'role': self.role,
'joined_at': self.joined_at.isoformat() if self.joined_at else None
}
class RecordingTag(db.Model):
"""Many-to-many relationship table for recordings and tags."""
__tablename__ = 'recording_tags'
recording_id = db.Column(db.Integer, db.ForeignKey('recording.id'), primary_key=True)
tag_id = db.Column(db.Integer, db.ForeignKey('tag.id'), primary_key=True)
added_at = db.Column(db.DateTime, default=datetime.utcnow, nullable=True)
order = db.Column(db.Integer, nullable=False, default=0)
# Relationships
recording = db.relationship('Recording', back_populates='tag_associations')
tag = db.relationship('Tag', back_populates='recording_associations')
class Folder(db.Model):
"""Folders for organizing recordings (one-to-many relationship)."""
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String(50), nullable=False)
user_id = db.Column(db.Integer, db.ForeignKey('user.id'), nullable=False)
group_id = db.Column(db.Integer, db.ForeignKey('group.id', ondelete='CASCADE'), nullable=True) # Group-scoped folder
color = db.Column(db.String(7), default='#10B981') # Hex color for UI (green to differentiate from tags)
# Custom settings for this folder
custom_prompt = db.Column(db.Text, nullable=True) # Custom summarization prompt
default_language = db.Column(db.String(10), nullable=True) # Default transcription language
default_min_speakers = db.Column(db.Integer, nullable=True) # Default min speakers for ASR
default_max_speakers = db.Column(db.Integer, nullable=True) # Default max speakers for ASR
# Transcription hints
default_hotwords = db.Column(db.Text, nullable=True) # Comma-separated words to bias recognition
default_initial_prompt = db.Column(db.Text, nullable=True) # Initial prompt to steer transcription
# Retention and deletion settings
protect_from_deletion = db.Column(db.Boolean, default=False) # Exempt recordings in folder from auto-deletion
retention_days = db.Column(db.Integer, nullable=True) # Folder-specific retention override
# Group folder settings
auto_share_on_apply = db.Column(db.Boolean, default=True) # Auto-share recording with group when moved to folder
share_with_group_lead = db.Column(db.Boolean, default=True) # Share with group admins when moved to folder
# Naming template for recordings in this folder
naming_template_id = db.Column(db.Integer, db.ForeignKey('naming_template.id', ondelete='SET NULL'), nullable=True)
# Export template for recordings in this folder
export_template_id = db.Column(db.Integer, db.ForeignKey('export_template.id', ondelete='SET NULL'), nullable=True)
created_at = db.Column(db.DateTime, default=datetime.utcnow)
updated_at = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
# Relationships
user = db.relationship('User', backref=db.backref('folders', lazy=True, cascade='all, delete-orphan'))
group = db.relationship('Group', backref=db.backref('folders', lazy=True))
naming_template = db.relationship('NamingTemplate', foreign_keys=[naming_template_id])
export_template = db.relationship('ExportTemplate', foreign_keys=[export_template_id])
# One-to-many relationship with recordings
recordings = db.relationship('Recording', back_populates='folder', lazy=True)
# Unique constraint: folder name must be unique per user
__table_args__ = (db.UniqueConstraint('name', 'user_id', name='_user_folder_uc'),)
@property
def is_group_folder(self):
"""Check if this is a group-scoped folder."""
return self.group_id is not None
def to_dict(self):
"""Convert model to dictionary representation."""
return {
'id': self.id,
'name': self.name,
'color': self.color,
'group_id': self.group_id,
'is_group_folder': self.is_group_folder,
'group_name': self.group.name if self.group else None,
'custom_prompt': self.custom_prompt,
'default_language': self.default_language,
'default_min_speakers': self.default_min_speakers,
'default_max_speakers': self.default_max_speakers,
'default_hotwords': self.default_hotwords,
'default_initial_prompt': self.default_initial_prompt,
'protect_from_deletion': self.protect_from_deletion,
'retention_days': self.retention_days,
'auto_share_on_apply': self.auto_share_on_apply,
'share_with_group_lead': self.share_with_group_lead,
'naming_template_id': self.naming_template_id,
'naming_template_name': self.naming_template.name if self.naming_template else None,
'export_template_id': self.export_template_id,
'export_template_name': self.export_template.name if self.export_template else None,
'created_at': self.created_at.isoformat() if self.created_at else None,
'recording_count': len(self.recordings) if self.recordings else 0
}
class Tag(db.Model):
"""Tags for organizing and categorizing recordings."""
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String(50), nullable=False)
user_id = db.Column(db.Integer, db.ForeignKey('user.id'), nullable=False)
group_id = db.Column(db.Integer, db.ForeignKey('group.id', ondelete='CASCADE'), nullable=True) # Group-scoped tag
color = db.Column(db.String(7), default='#3B82F6') # Hex color for UI
# Custom settings for this tag
custom_prompt = db.Column(db.Text, nullable=True) # Custom summarization prompt
default_language = db.Column(db.String(10), nullable=True) # Default transcription language
default_min_speakers = db.Column(db.Integer, nullable=True) # Default min speakers for ASR
default_max_speakers = db.Column(db.Integer, nullable=True) # Default max speakers for ASR
# Transcription hints
default_hotwords = db.Column(db.Text, nullable=True) # Comma-separated words to bias recognition
default_initial_prompt = db.Column(db.Text, nullable=True) # Initial prompt to steer transcription
# Retention and deletion settings
protect_from_deletion = db.Column(db.Boolean, default=False) # Exempt tagged recordings from auto-deletion
retention_days = db.Column(db.Integer, nullable=True) # Group-specific retention override (overrides global)
# Group tag settings
auto_share_on_apply = db.Column(db.Boolean, default=True) # Auto-share recording with group when this tag is applied
share_with_group_lead = db.Column(db.Boolean, default=True) # Share with group admins when this tag is applied
# Naming template for recordings with this tag
naming_template_id = db.Column(db.Integer, db.ForeignKey('naming_template.id', ondelete='SET NULL'), nullable=True)
# Export template for recordings with this tag
export_template_id = db.Column(db.Integer, db.ForeignKey('export_template.id', ondelete='SET NULL'), nullable=True)
created_at = db.Column(db.DateTime, default=datetime.utcnow)
updated_at = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
# Relationships
user = db.relationship('User', backref=db.backref('tags', lazy=True, cascade='all, delete-orphan'))
group = db.relationship('Group', backref=db.backref('tags', lazy=True))
naming_template = db.relationship('NamingTemplate', foreign_keys=[naming_template_id])
export_template = db.relationship('ExportTemplate', foreign_keys=[export_template_id])
# Use association object for many-to-many with order tracking
recording_associations = db.relationship('RecordingTag', back_populates='tag', cascade='all, delete-orphan')
# Unique constraint: tag name must be unique per user (or per group if group_id is set)
__table_args__ = (db.UniqueConstraint('name', 'user_id', name='_user_tag_uc'),)
@property
def is_group_tag(self):
"""Check if this is a group-scoped tag."""
return self.group_id is not None
def to_dict(self):
"""Convert model to dictionary representation."""
return {
'id': self.id,
'name': self.name,
'color': self.color,
'group_id': self.group_id,
'is_group_tag': self.is_group_tag,
'group_name': self.group.name if self.group else None,
'custom_prompt': self.custom_prompt,
'default_language': self.default_language,
'default_min_speakers': self.default_min_speakers,
'default_max_speakers': self.default_max_speakers,
'default_hotwords': self.default_hotwords,
'default_initial_prompt': self.default_initial_prompt,
'protect_from_deletion': self.protect_from_deletion,
'retention_days': self.retention_days,
'auto_share_on_apply': self.auto_share_on_apply,
'share_with_group_lead': self.share_with_group_lead,
'naming_template_id': self.naming_template_id,
'naming_template_name': self.naming_template.name if self.naming_template else None,
'export_template_id': self.export_template_id,
'export_template_name': self.export_template.name if self.export_template else None,
'created_at': self.created_at.isoformat() if self.created_at else None,
'recording_count': len(self.recording_associations)
}

View File

@@ -0,0 +1,63 @@
"""
ProcessingJob database model for persistent job queue.
This model stores background processing jobs in the database to ensure
they survive application restarts and support fair scheduling across users.
"""
from datetime import datetime
from src.database import db
class ProcessingJob(db.Model):
"""Database model for tracking background processing jobs."""
__tablename__ = 'processing_job'
id = db.Column(db.Integer, primary_key=True)
user_id = db.Column(db.Integer, db.ForeignKey('user.id'), nullable=False, index=True)
recording_id = db.Column(db.Integer, db.ForeignKey('recording.id', ondelete='CASCADE'), nullable=False, index=True)
# Job type: transcribe, summarize, reprocess_transcription, reprocess_summary
job_type = db.Column(db.String(50), nullable=False)
# Status: queued, processing, completed, failed
status = db.Column(db.String(20), default='queued', nullable=False, index=True)
# JSON blob for job-specific parameters (language, min_speakers, custom_prompt, etc.)
params = db.Column(db.Text, nullable=True)
# Error tracking
error_message = db.Column(db.Text, nullable=True)
retry_count = db.Column(db.Integer, default=0, nullable=False)
# Track if this is a new upload (vs reprocessing) - for cleanup on failure
is_new_upload = db.Column(db.Boolean, default=False, nullable=False)
# Timestamps
created_at = db.Column(db.DateTime, default=datetime.utcnow, nullable=False, index=True)
started_at = db.Column(db.DateTime, nullable=True)
completed_at = db.Column(db.DateTime, nullable=True)
# Relationships
user = db.relationship('User', backref=db.backref('processing_jobs', lazy='dynamic'))
recording = db.relationship('Recording', backref=db.backref('processing_jobs', lazy='dynamic', cascade='all, delete-orphan'))
def __repr__(self):
return f'<ProcessingJob {self.id} type={self.job_type} status={self.status}>'
def to_dict(self):
"""Convert job to dictionary for API responses."""
return {
'id': self.id,
'user_id': self.user_id,
'recording_id': self.recording_id,
'job_type': self.job_type,
'status': self.status,
'retry_count': self.retry_count,
'is_new_upload': self.is_new_upload,
'created_at': self.created_at.isoformat() if self.created_at else None,
'started_at': self.started_at.isoformat() if self.started_at else None,
'completed_at': self.completed_at.isoformat() if self.completed_at else None,
'error_message': self.error_message
}

View File

@@ -0,0 +1,37 @@
"""
Push Subscription Model
Stores web push notification subscriptions for users
"""
from datetime import datetime
from src.database import db
class PushSubscription(db.Model):
"""Web Push notification subscription"""
__tablename__ = 'push_subscriptions'
id = db.Column(db.Integer, primary_key=True)
user_id = db.Column(db.Integer, db.ForeignKey('user.id'), nullable=False)
# Push subscription endpoint (unique per browser/device)
endpoint = db.Column(db.String(500), nullable=False, unique=True)
# Encryption keys for sending push messages
p256dh_key = db.Column(db.String(200), nullable=False)
auth_key = db.Column(db.String(100), nullable=False)
# Timestamps
created_at = db.Column(db.DateTime, default=datetime.utcnow)
updated_at = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
def __repr__(self):
return f'<PushSubscription {self.id} user={self.user_id}>'
def to_dict(self):
"""Convert to dictionary"""
return {
'id': self.id,
'user_id': self.user_id,
'endpoint': self.endpoint,
'created_at': self.created_at.isoformat() if self.created_at else None
}

338
src/models/recording.py Normal file
View File

@@ -0,0 +1,338 @@
"""
Recording and TranscriptChunk database models.
This module defines models for audio recordings and their chunked transcriptions.
"""
import logging
import os
from datetime import datetime
from sqlalchemy import func
from src.database import db
from src.utils import local_datetime_filter, md_to_html
logger = logging.getLogger(__name__)
class Recording(db.Model):
"""Main recording model storing audio files and their metadata."""
# Add user_id foreign key to associate recordings with users
user_id = db.Column(db.Integer, db.ForeignKey('user.id'), nullable=True)
id = db.Column(db.Integer, primary_key=True)
# Title will now often be AI-generated, maybe start with filename?
title = db.Column(db.String(200), nullable=True) # Allow Null initially
participants = db.Column(db.String(500))
notes = db.Column(db.Text)
transcription = db.Column(db.Text, nullable=True)
summary = db.Column(db.Text, nullable=True)
status = db.Column(db.String(50), default='PENDING') # PENDING, PROCESSING, SUMMARIZING, COMPLETED, FAILED
audio_path = db.Column(db.String(500))
created_at = db.Column(db.DateTime, default=datetime.utcnow)
meeting_date = db.Column(db.DateTime, nullable=True)
file_size = db.Column(db.Integer) # Store file size in bytes
original_filename = db.Column(db.String(500), nullable=True) # Store the original uploaded filename
is_inbox = db.Column(db.Boolean, default=True) # New recordings are marked as inbox by default
is_highlighted = db.Column(db.Boolean, default=False) # Recordings can be highlighted by the user
mime_type = db.Column(db.String(100), nullable=True)
completed_at = db.Column(db.DateTime, nullable=True)
processing_time_seconds = db.Column(db.Integer, nullable=True)
transcription_duration_seconds = db.Column(db.Integer, nullable=True) # Time taken for transcription
summarization_duration_seconds = db.Column(db.Integer, nullable=True) # Time taken for summarization
processing_source = db.Column(db.String(50), default='upload') # upload, auto_process, recording
error_message = db.Column(db.Text, nullable=True) # Store detailed error messages
file_hash = db.Column(db.String(64), nullable=True) # SHA-256 hash for duplicate detection
# Auto-deletion and archival fields
audio_deleted_at = db.Column(db.DateTime, nullable=True) # When audio file was deleted (null = not deleted)
deletion_exempt = db.Column(db.Boolean, default=False) # Manual exemption from auto-deletion
# Speaker embeddings from diarization (JSON dict mapping speaker IDs to 256-dimensional vectors)
speaker_embeddings = db.Column(db.JSON, nullable=True)
# Folder relationship (one-to-many: a recording belongs to at most one folder)
folder_id = db.Column(db.Integer, db.ForeignKey('folder.id', ondelete='SET NULL'), nullable=True, index=True)
# Relationships
folder = db.relationship('Folder', back_populates='recordings')
tag_associations = db.relationship('RecordingTag', back_populates='recording', cascade='all, delete-orphan', order_by='RecordingTag.order')
@property
def tags(self):
"""Get tags ordered by the order they were added to this recording."""
return [assoc.tag for assoc in sorted(self.tag_associations, key=lambda x: x.order)]
def get_visible_tags(self, viewer_user):
"""
Get tags that are visible to a specific user viewing this recording.
Visibility rules:
- Group tags: visible if viewer is a member of the tag's group
- Personal tags: visible only to the tag creator
Note: These rules apply to ALL users, including the recording owner.
Personal tags are private to their creator regardless of recording ownership.
Args:
viewer_user: User object viewing the recording (or None for backward compatibility)
Returns:
List of Tag objects visible to the viewer
"""
# If no viewer specified, return all tags (backward compatibility)
if viewer_user is None:
return self.tags
if not self.tags:
return []
# Import here to avoid circular dependencies
from src.models.organization import GroupMembership
visible_tags = []
for tag in self.tags:
# Group tags: visible if viewer is a member of the group
if tag.group_id:
membership = GroupMembership.query.filter_by(
group_id=tag.group_id,
user_id=viewer_user.id
).first()
if membership:
visible_tags.append(tag)
# Personal tags: visible only to tag creator
else:
if tag.user_id == viewer_user.id:
visible_tags.append(tag)
return visible_tags
def get_user_notes(self, user):
"""
Get notes from user's perspective (owner or shared recipient).
- Recording owner sees Recording.notes
- Shared users see their personal_notes from SharedRecordingState
Args:
user: User object viewing the recording
Returns:
String notes content or None
"""
if user is None:
return self.notes
if self.user_id == user.id:
return self.notes # Owner sees Recording.notes
else:
# Shared user sees their personal notes
from src.models.sharing import SharedRecordingState
state = SharedRecordingState.query.filter_by(
recording_id=self.id,
user_id=user.id
).first()
return state.personal_notes if state else None
def get_audio_duration(self):
"""
Get the audio duration in seconds using ffprobe.
Returns:
Float duration in seconds, or None if unavailable
"""
if self.audio_deleted_at is not None:
return None
if not self.audio_path or not os.path.exists(self.audio_path):
return None
try:
from src.utils.ffprobe import get_duration
# Allow longer timeout for packet scanning fallback on files without duration metadata
duration = get_duration(self.audio_path, timeout=30)
return duration
except Exception as e:
logger.warning(f"Failed to get duration for recording {self.id}: {e}")
return None
def get_duplicate_info(self):
"""Check if other recordings share the same file_hash for this user.
Returns:
Dict with total copy count and list of copies, or None.
"""
if not self.file_hash:
return None
dupes = Recording.query.filter(
Recording.user_id == self.user_id,
Recording.file_hash == self.file_hash,
).with_entities(
Recording.id, Recording.title, Recording.created_at
).order_by(Recording.created_at).all()
if len(dupes) > 1:
return {
'total_copies': len(dupes),
'copies': [
{
'id': d.id,
'title': d.title or f'#{d.id}',
'created_at': local_datetime_filter(d.created_at),
'is_self': d.id == self.id
}
for d in dupes
]
}
return None
def to_list_dict(self, viewer_user=None):
"""
Lightweight dict for list views - excludes expensive HTML conversions.
Args:
viewer_user: User viewing the recording (for tag visibility filtering)
"""
# Import here to avoid circular dependencies
from src.models.sharing import InternalShare, Share
# Count internal shares for this recording
shared_with_count = db.session.query(func.count(InternalShare.id)).filter(
InternalShare.recording_id == self.id
).scalar() or 0
# Count public shares (link shares) for this recording
public_share_count = db.session.query(func.count(Share.id)).filter(
Share.recording_id == self.id
).scalar() or 0
# Get visible tags for this viewer
visible_tags = self.get_visible_tags(viewer_user)
return {
'id': self.id,
'title': self.title,
'participants': self.participants,
'status': self.status,
'created_at': local_datetime_filter(self.created_at),
'completed_at': local_datetime_filter(self.completed_at),
'meeting_date': local_datetime_filter(self.meeting_date),
'file_size': self.file_size,
'original_filename': self.original_filename,
'is_inbox': self.is_inbox,
'is_highlighted': self.is_highlighted,
'audio_deleted_at': local_datetime_filter(self.audio_deleted_at),
'audio_available': self.audio_deleted_at is None,
'deletion_exempt': self.deletion_exempt,
'folder_id': self.folder_id,
'folder': self.folder.to_dict() if self.folder else None,
'tags': [tag.to_dict() for tag in visible_tags] if visible_tags else [],
'duplicate_info': self.get_duplicate_info(),
'shared_with_count': shared_with_count,
'public_share_count': public_share_count
}
def to_dict(self, include_html=True, viewer_user=None):
"""
Full dict with optional HTML conversion for notes/summary.
Args:
include_html: Whether to include HTML-rendered markdown fields
viewer_user: User viewing the recording (for tag visibility filtering)
"""
# Import here to avoid circular dependencies
from src.models.sharing import InternalShare, Share
# Count internal shares for this recording
shared_with_count = db.session.query(func.count(InternalShare.id)).filter(
InternalShare.recording_id == self.id
).scalar() or 0
# Count public shares (link shares) for this recording
public_share_count = db.session.query(func.count(Share.id)).filter(
Share.recording_id == self.id
).scalar() or 0
# Get visible tags for this viewer
visible_tags = self.get_visible_tags(viewer_user)
# Get user-specific notes
user_notes = self.get_user_notes(viewer_user)
data = {
'id': self.id,
'title': self.title,
'participants': self.participants,
'notes': user_notes,
'transcription': self.transcription,
'summary': self.summary,
'status': self.status,
'created_at': local_datetime_filter(self.created_at),
'completed_at': local_datetime_filter(self.completed_at),
'processing_time_seconds': self.processing_time_seconds,
'transcription_duration_seconds': self.transcription_duration_seconds,
'summarization_duration_seconds': self.summarization_duration_seconds,
'meeting_date': local_datetime_filter(self.meeting_date),
'file_size': self.file_size,
'original_filename': self.original_filename,
'user_id': self.user_id,
'is_inbox': self.is_inbox,
'is_highlighted': self.is_highlighted,
'mime_type': self.mime_type,
'audio_deleted_at': local_datetime_filter(self.audio_deleted_at),
'audio_available': self.audio_deleted_at is None,
'audio_duration': self.get_audio_duration(),
'deletion_exempt': self.deletion_exempt,
'folder_id': self.folder_id,
'folder': self.folder.to_dict() if self.folder else None,
'tags': [tag.to_dict() for tag in visible_tags] if visible_tags else [],
'events': [event.to_dict() for event in self.events] if self.events else [],
'duplicate_info': self.get_duplicate_info(),
'shared_with_count': shared_with_count,
'public_share_count': public_share_count
}
# Only compute expensive HTML conversions when explicitly requested
if include_html:
data['notes_html'] = md_to_html(user_notes) if user_notes else ""
data['summary_html'] = md_to_html(self.summary) if self.summary else ""
else:
data['notes_html'] = ""
data['summary_html'] = ""
return data
class TranscriptChunk(db.Model):
"""Stores chunked transcription segments for efficient retrieval and embedding."""
id = db.Column(db.Integer, primary_key=True)
recording_id = db.Column(db.Integer, db.ForeignKey('recording.id'), nullable=False)
user_id = db.Column(db.Integer, db.ForeignKey('user.id'), nullable=False)
chunk_index = db.Column(db.Integer, nullable=False) # Order within the recording
content = db.Column(db.Text, nullable=False) # The actual text chunk
start_time = db.Column(db.Float, nullable=True) # Start time in seconds (if available)
end_time = db.Column(db.Float, nullable=True) # End time in seconds (if available)
speaker_name = db.Column(db.String(100), nullable=True, index=True) # Speaker for this chunk (indexed for speaker rename operations)
embedding = db.Column(db.LargeBinary, nullable=True) # Stored as binary vector
created_at = db.Column(db.DateTime, default=datetime.utcnow)
# Composite index for efficient speaker name lookups scoped to user
__table_args__ = (
db.Index('idx_user_speaker_name', 'user_id', 'speaker_name'),
)
# Relationships
recording = db.relationship('Recording', backref=db.backref('chunks', lazy=True, cascade='all, delete-orphan'))
user = db.relationship('User', backref=db.backref('transcript_chunks', lazy=True, cascade='all, delete-orphan'))
def to_dict(self):
"""Convert model to dictionary representation."""
return {
'id': self.id,
'recording_id': self.recording_id,
'chunk_index': self.chunk_index,
'content': self.content,
'start_time': self.start_time,
'end_time': self.end_time,
'speaker_name': self.speaker_name,
'created_at': self.created_at.isoformat() if self.created_at else None
}

236
src/models/sharing.py Normal file
View File

@@ -0,0 +1,236 @@
"""
Sharing models for public and internal recording shares.
This module defines models for sharing recordings both publicly (via links)
and internally (between users).
"""
import os
import secrets
from datetime import datetime
from src.database import db
from src.utils import local_datetime_filter
# Get sharing configuration from environment
SHOW_USERNAMES_IN_UI = os.environ.get('SHOW_USERNAMES_IN_UI', 'false').lower() == 'true'
class Share(db.Model):
"""Public sharing via shareable links."""
id = db.Column(db.Integer, primary_key=True)
public_id = db.Column(db.String(32), unique=True, nullable=False, default=lambda: secrets.token_urlsafe(16))
recording_id = db.Column(db.Integer, db.ForeignKey('recording.id'), nullable=False)
user_id = db.Column(db.Integer, db.ForeignKey('user.id'), nullable=False)
created_at = db.Column(db.DateTime, default=datetime.utcnow)
share_summary = db.Column(db.Boolean, default=True)
share_notes = db.Column(db.Boolean, default=True)
user = db.relationship('User', backref=db.backref('shares', lazy=True, cascade='all, delete-orphan'))
recording = db.relationship('Recording', backref=db.backref('shares', lazy=True, cascade='all, delete-orphan'))
def to_dict(self):
"""Convert model to dictionary representation."""
return {
'id': self.id,
'public_id': self.public_id,
'recording_id': self.recording_id,
'created_at': local_datetime_filter(self.created_at),
'share_summary': self.share_summary,
'share_notes': self.share_notes,
'recording_title': self.recording.title if self.recording else "N/A",
'audio_available': self.recording.audio_deleted_at is None if self.recording else True
}
class InternalShare(db.Model):
"""Tracks internal sharing of recordings between users (independent of teams)."""
__tablename__ = 'internal_share'
id = db.Column(db.Integer, primary_key=True)
recording_id = db.Column(db.Integer, db.ForeignKey('recording.id', ondelete='CASCADE'), nullable=False)
owner_id = db.Column(db.Integer, db.ForeignKey('user.id', ondelete='CASCADE'), nullable=False) # User who shared
shared_with_user_id = db.Column(db.Integer, db.ForeignKey('user.id', ondelete='CASCADE'), nullable=False) # User it was shared with
# Permissions
can_edit = db.Column(db.Boolean, default=False) # Can edit notes/metadata
can_reshare = db.Column(db.Boolean, default=False) # Can share with others
# Source tracking for share cleanup
source_type = db.Column(db.String(20), default='manual') # 'manual' or 'group_tag'
source_tag_id = db.Column(db.Integer, db.ForeignKey('tag.id', ondelete='SET NULL'), nullable=True)
created_at = db.Column(db.DateTime, default=datetime.utcnow)
# Relationship for source tag
source_tag = db.relationship('Tag', foreign_keys=[source_tag_id], backref=db.backref('created_shares', lazy=True))
# Relationships
recording = db.relationship('Recording', backref=db.backref('internal_shares', lazy=True, cascade='all, delete-orphan'))
owner = db.relationship('User', foreign_keys=[owner_id], backref=db.backref('shared_recordings', lazy=True))
shared_with = db.relationship('User', foreign_keys=[shared_with_user_id], backref=db.backref('received_shares', lazy=True))
# Unique constraint: can't share same recording with same user twice
__table_args__ = (db.UniqueConstraint('recording_id', 'shared_with_user_id', name='unique_recording_share'),)
def to_dict(self):
"""Convert model to dictionary representation."""
return {
'id': self.id,
'recording_id': self.recording_id,
'owner_id': self.owner_id,
'owner_username': self.owner.username if SHOW_USERNAMES_IN_UI else None,
'user_id': self.shared_with_user_id, # For frontend compatibility
'username': self.shared_with.username, # Always include username
'can_edit': self.can_edit,
'can_reshare': self.can_reshare,
'source_type': self.source_type,
'source_tag_id': self.source_tag_id,
'created_at': self.created_at.isoformat() if self.created_at else None
}
@staticmethod
def get_user_max_permissions(recording, user):
"""
Get the maximum permissions a user can grant for a recording.
Args:
recording: Recording object
user: User object attempting to share
Returns:
Dict with 'can_edit' and 'can_reshare' boolean flags
"""
# Owner has unlimited permissions
if recording.user_id == user.id:
return {'can_edit': True, 'can_reshare': True}
# Get user's share for this recording
user_share = InternalShare.query.filter_by(
recording_id=recording.id,
shared_with_user_id=user.id
).first()
if not user_share:
# User has no access
return {'can_edit': False, 'can_reshare': False}
# User can only grant what they have
return {
'can_edit': user_share.can_edit,
'can_reshare': user_share.can_reshare
}
@staticmethod
def validate_reshare_permissions(recording, grantor_user, requested_permissions):
"""
Validate that a user can grant the requested permissions.
Args:
recording: Recording object being shared
grantor_user: User attempting to share (current_user)
requested_permissions: Dict with 'can_edit' and 'can_reshare' flags
Returns:
Tuple of (is_valid: bool, error_message: str or None)
"""
# Owner can grant anything
if recording.user_id == grantor_user.id:
return True, None
# Get grantor's permissions
max_permissions = InternalShare.get_user_max_permissions(recording, grantor_user)
# Validate edit permission
if requested_permissions.get('can_edit', False) and not max_permissions['can_edit']:
return False, "You cannot grant edit permission because you do not have edit access"
# Validate reshare permission
if requested_permissions.get('can_reshare', False) and not max_permissions['can_reshare']:
return False, "You cannot grant reshare permission because you do not have reshare access"
return True, None
@staticmethod
def find_downstream_shares(recording_id, user_id):
"""
Find all shares created by a specific user for a recording.
Used for cascade revocation.
Args:
recording_id: ID of the recording
user_id: ID of the user whose downstream shares to find
Returns:
List of InternalShare objects
"""
return InternalShare.query.filter_by(
recording_id=recording_id,
owner_id=user_id
).all()
@staticmethod
def has_alternate_access_path(recording_id, user_id, excluding_grantor_id=None):
"""
Check if a user has alternate access to a recording through other shares.
Used to prevent cascade revocation when user has multiple access paths (diamond pattern).
Args:
recording_id: ID of the recording
user_id: ID of the user to check
excluding_grantor_id: Exclude shares from this grantor (the one being revoked)
Returns:
Boolean - True if user has alternate access path
"""
query = InternalShare.query.filter(
InternalShare.recording_id == recording_id,
InternalShare.shared_with_user_id == user_id
)
if excluding_grantor_id is not None:
query = query.filter(InternalShare.owner_id != excluding_grantor_id)
return query.count() > 0
class SharedRecordingState(db.Model):
"""Tracks per-user state for shared recordings (notes, highlights, etc)."""
__tablename__ = 'shared_recording_state'
id = db.Column(db.Integer, primary_key=True)
recording_id = db.Column(db.Integer, db.ForeignKey('recording.id', ondelete='CASCADE'), nullable=False)
user_id = db.Column(db.Integer, db.ForeignKey('user.id', ondelete='CASCADE'), nullable=False)
# User-specific state
personal_notes = db.Column(db.Text, nullable=True) # Private notes only this user can see
is_inbox = db.Column(db.Boolean, default=True) # User's personal inbox status
is_highlighted = db.Column(db.Boolean, default=False) # User's personal highlight/favorite status
last_viewed = db.Column(db.DateTime, default=datetime.utcnow)
created_at = db.Column(db.DateTime, default=datetime.utcnow)
updated_at = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
# Relationships
recording = db.relationship('Recording', backref=db.backref('user_states', lazy=True, cascade='all, delete-orphan'))
user = db.relationship('User', backref=db.backref('recording_states', lazy=True))
# Unique constraint: one state per user per recording
__table_args__ = (db.UniqueConstraint('recording_id', 'user_id', name='unique_user_recording_state'),)
def to_dict(self):
"""Convert model to dictionary representation."""
return {
'id': self.id,
'recording_id': self.recording_id,
'user_id': self.user_id,
'personal_notes': self.personal_notes,
'is_inbox': self.is_inbox,
'is_highlighted': self.is_highlighted,
'last_viewed': self.last_viewed.isoformat() if self.last_viewed else None,
'created_at': self.created_at.isoformat() if self.created_at else None,
'updated_at': self.updated_at.isoformat() if self.updated_at else None
}

View File

@@ -0,0 +1,43 @@
"""
SpeakerSnippet database model.
This module defines the SpeakerSnippet model for storing example quotes/snippets
from recordings that feature specific speakers. These snippets provide context
when viewing speaker profiles and help users verify speaker identifications.
"""
from datetime import datetime
from src.database import db
class SpeakerSnippet(db.Model):
"""Model for storing representative speech snippets from speakers."""
__tablename__ = 'speaker_snippet'
id = db.Column(db.Integer, primary_key=True)
speaker_id = db.Column(db.Integer, db.ForeignKey('speaker.id', ondelete='CASCADE'), nullable=False)
recording_id = db.Column(db.Integer, db.ForeignKey('recording.id', ondelete='CASCADE'), nullable=False)
segment_index = db.Column(db.Integer, nullable=False) # Index in the transcript
text_snippet = db.Column(db.String(200), nullable=False) # The actual quote
timestamp = db.Column(db.Float, nullable=True) # Seconds into the recording
created_at = db.Column(db.DateTime, default=datetime.utcnow)
# Relationships
speaker = db.relationship('Speaker', backref=db.backref('snippets', lazy=True, cascade='all, delete-orphan'))
recording = db.relationship('Recording', backref=db.backref('speaker_snippets', lazy=True, cascade='all, delete-orphan'))
def to_dict(self):
"""Convert model to dictionary representation."""
return {
'id': self.id,
'speaker_id': self.speaker_id,
'recording_id': self.recording_id,
'text': self.text_snippet,
'timestamp': self.timestamp,
'recording_title': self.recording.title if self.recording else 'Unknown',
'created_at': self.created_at.isoformat() if self.created_at else None
}
def __repr__(self):
return f"SpeakerSnippet(speaker_id={self.speaker_id}, recording_id={self.recording_id}, text='{self.text_snippet[:30]}...')"

77
src/models/system.py Normal file
View File

@@ -0,0 +1,77 @@
"""
SystemSetting model for application configuration.
This module defines the SystemSetting model for storing
dynamic system configuration in the database.
"""
from datetime import datetime
from src.database import db
class SystemSetting(db.Model):
"""Stores system-wide configuration settings."""
id = db.Column(db.Integer, primary_key=True)
key = db.Column(db.String(100), unique=True, nullable=False)
value = db.Column(db.Text, nullable=True)
description = db.Column(db.Text, nullable=True)
setting_type = db.Column(db.String(50), nullable=False, default='string') # string, integer, boolean, float
created_at = db.Column(db.DateTime, default=datetime.utcnow)
updated_at = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
def to_dict(self):
"""Convert model to dictionary representation."""
return {
'id': self.id,
'key': self.key,
'value': self.value,
'description': self.description,
'setting_type': self.setting_type,
'created_at': self.created_at,
'updated_at': self.updated_at
}
@staticmethod
def get_setting(key, default_value=None):
"""Get a system setting value by key, with optional default."""
setting = SystemSetting.query.filter_by(key=key).first()
if setting:
# Convert value based on type
if setting.setting_type == 'integer':
try:
return int(setting.value) if setting.value is not None else default_value
except (ValueError, TypeError):
return default_value
elif setting.setting_type == 'boolean':
return setting.value.lower() in ('true', '1', 'yes') if setting.value else default_value
elif setting.setting_type == 'float':
try:
return float(setting.value) if setting.value is not None else default_value
except (ValueError, TypeError):
return default_value
else: # string
return setting.value if setting.value is not None else default_value
return default_value
@staticmethod
def set_setting(key, value, description=None, setting_type='string'):
"""Set a system setting value."""
setting = SystemSetting.query.filter_by(key=key).first()
if setting:
setting.value = str(value) if value is not None else None
setting.updated_at = datetime.utcnow()
if description:
setting.description = description
if setting_type:
setting.setting_type = setting_type
else:
setting = SystemSetting(
key=key,
value=str(value) if value is not None else None,
description=description,
setting_type=setting_type
)
db.session.add(setting)
db.session.commit()
return setting

37
src/models/templates.py Normal file
View File

@@ -0,0 +1,37 @@
"""
TranscriptTemplate model for user-defined transcript formatting.
This module defines the TranscriptTemplate model for storing
custom templates for transcript formatting.
"""
from datetime import datetime
from src.database import db
class TranscriptTemplate(db.Model):
"""Stores user-defined templates for transcript formatting."""
id = db.Column(db.Integer, primary_key=True)
user_id = db.Column(db.Integer, db.ForeignKey('user.id'), nullable=False)
name = db.Column(db.String(100), nullable=False)
template = db.Column(db.Text, nullable=False)
description = db.Column(db.String(500), nullable=True)
is_default = db.Column(db.Boolean, default=False)
created_at = db.Column(db.DateTime, default=datetime.utcnow)
updated_at = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
# Relationships
user = db.relationship('User', backref=db.backref('transcript_templates', lazy=True, cascade='all, delete-orphan'))
def to_dict(self):
"""Convert model to dictionary representation."""
return {
'id': self.id,
'name': self.name,
'template': self.template,
'description': self.description,
'is_default': self.is_default,
'created_at': self.created_at.isoformat() if self.created_at else None,
'updated_at': self.updated_at.isoformat() if self.updated_at else None
}

44
src/models/token_usage.py Normal file
View File

@@ -0,0 +1,44 @@
"""
Token usage tracking model for monitoring LLM API consumption.
"""
from datetime import datetime, date
from src.database import db
class TokenUsage(db.Model):
"""Daily token usage aggregates per user per operation type."""
__tablename__ = 'token_usage'
id = db.Column(db.Integer, primary_key=True)
user_id = db.Column(db.Integer, db.ForeignKey('user.id'), nullable=False)
date = db.Column(db.Date, nullable=False, default=date.today)
operation_type = db.Column(db.String(50), nullable=False)
# Token counts (from API response.usage)
prompt_tokens = db.Column(db.Integer, default=0)
completion_tokens = db.Column(db.Integer, default=0)
total_tokens = db.Column(db.Integer, default=0)
# Cost tracking (OpenRouter provides this)
cost = db.Column(db.Float, default=0.0)
# Request count for this day/operation
request_count = db.Column(db.Integer, default=0)
# Model info
model_name = db.Column(db.String(100), nullable=True)
created_at = db.Column(db.DateTime, default=datetime.utcnow)
updated_at = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
# Relationships
user = db.relationship('User', backref=db.backref('token_usage', lazy='dynamic'))
__table_args__ = (
db.UniqueConstraint('user_id', 'date', 'operation_type', name='uq_user_date_op'),
db.Index('idx_token_user_date', 'user_id', 'date'),
)
def __repr__(self):
return f'<TokenUsage {self.user_id} {self.date} {self.operation_type}: {self.total_tokens}>'

View File

@@ -0,0 +1,42 @@
"""
Transcription usage tracking model for monitoring audio transcription consumption.
"""
from datetime import datetime, date
from src.database import db
class TranscriptionUsage(db.Model):
"""Daily transcription usage aggregates per user per connector type."""
__tablename__ = 'transcription_usage'
id = db.Column(db.Integer, primary_key=True)
user_id = db.Column(db.Integer, db.ForeignKey('user.id'), nullable=False)
date = db.Column(db.Date, nullable=False, default=date.today)
connector_type = db.Column(db.String(50), nullable=False) # 'openai_whisper', 'openai_transcribe', 'asr_endpoint'
# Audio duration tracking (in seconds for precision)
audio_duration_seconds = db.Column(db.Integer, default=0)
# Cost tracking ($0 for self-hosted ASR)
estimated_cost = db.Column(db.Float, default=0.0)
# Request count for this day/connector
request_count = db.Column(db.Integer, default=0)
# Model info (e.g., 'whisper-1', 'gpt-4o-transcribe', 'asr-endpoint')
model_name = db.Column(db.String(100), nullable=True)
created_at = db.Column(db.DateTime, default=datetime.utcnow)
updated_at = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
# Relationships
user = db.relationship('User', backref=db.backref('transcription_usage', lazy='dynamic'))
__table_args__ = (
db.UniqueConstraint('user_id', 'date', 'connector_type', name='uq_user_date_connector'),
db.Index('idx_transcription_user_date', 'user_id', 'date'),
)
def __repr__(self):
return f'<TranscriptionUsage {self.user_id} {self.date} {self.connector_type}: {self.audio_duration_seconds}s>'

98
src/models/user.py Normal file
View File

@@ -0,0 +1,98 @@
"""
User and Speaker database models.
This module defines the User model for authentication and user profiles,
and the Speaker model for tracking speaker profiles used in diarization.
"""
from datetime import datetime
from flask_login import UserMixin
from src.database import db
class User(db.Model, UserMixin):
"""User model for authentication and profile management."""
id = db.Column(db.Integer, primary_key=True)
username = db.Column(db.String(20), unique=True, nullable=False)
email = db.Column(db.String(120), unique=True, nullable=False)
password = db.Column(db.String(60), nullable=True)
sso_provider = db.Column(db.String(100), nullable=True)
sso_subject = db.Column(db.String(255), unique=True, nullable=True)
is_admin = db.Column(db.Boolean, default=False)
can_share_publicly = db.Column(db.Boolean, default=True) # Permission to create public share links
recordings = db.relationship('Recording', backref='owner', lazy=True)
transcription_language = db.Column(db.String(10), nullable=True) # For ISO 639-1 codes
output_language = db.Column(db.String(50), nullable=True) # For full language names like "Spanish"
ui_language = db.Column(db.String(10), nullable=True, default='en') # For UI language preference (en, es, fr, zh)
summary_prompt = db.Column(db.Text, nullable=True)
extract_events = db.Column(db.Boolean, default=False) # Enable event extraction from transcripts
name = db.Column(db.String(100), nullable=True)
job_title = db.Column(db.String(100), nullable=True)
company = db.Column(db.String(100), nullable=True)
diarize = db.Column(db.Boolean, default=False)
# Default naming template for title generation
default_naming_template_id = db.Column(db.Integer, db.ForeignKey('naming_template.id', ondelete='SET NULL'), nullable=True)
default_naming_template = db.relationship('NamingTemplate', foreign_keys=[default_naming_template_id])
# Token budget (None = unlimited)
monthly_token_budget = db.Column(db.Integer, nullable=True)
# Transcription budget in seconds (None = unlimited)
monthly_transcription_budget = db.Column(db.Integer, nullable=True)
# Email verification fields
email_verified = db.Column(db.Boolean, default=False)
email_verification_token = db.Column(db.String(200), nullable=True, index=True)
email_verification_sent_at = db.Column(db.DateTime, nullable=True)
# Password reset fields
password_reset_token = db.Column(db.String(200), nullable=True, index=True)
password_reset_sent_at = db.Column(db.DateTime, nullable=True)
# Auto speaker labelling settings
auto_speaker_labelling = db.Column(db.Boolean, default=False) # Enable auto-labelling when voice confidence exceeds threshold
auto_speaker_labelling_threshold = db.Column(db.String(10), nullable=True, default='medium') # 'low', 'medium', 'high'
# Auto summarization setting (user can disable if admin hasn't globally disabled)
auto_summarization = db.Column(db.Boolean, default=True)
# Transcription hints (hotwords and initial prompt for improving ASR accuracy)
transcription_hotwords = db.Column(db.Text, nullable=True)
transcription_initial_prompt = db.Column(db.Text, nullable=True)
def __repr__(self):
return f"User('{self.username}', '{self.email}')"
class Speaker(db.Model):
"""Speaker model for tracking voice profiles used in diarization."""
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String(100), nullable=False)
user_id = db.Column(db.Integer, db.ForeignKey('user.id'), nullable=False)
created_at = db.Column(db.DateTime, default=datetime.utcnow)
last_used = db.Column(db.DateTime, default=datetime.utcnow)
use_count = db.Column(db.Integer, default=1)
# Voice embedding fields (256 dimensions from WhisperX)
average_embedding = db.Column(db.LargeBinary, nullable=True) # Binary numpy array (256 × 4 bytes = 1024 bytes)
embeddings_history = db.Column(db.JSON, nullable=True) # List of metadata: [{recording_id, timestamp, similarity}, ...]
embedding_count = db.Column(db.Integer, default=0) # Number of embeddings collected
confidence_score = db.Column(db.Float, nullable=True) # 0-1 score based on embedding consistency
# Relationship to user
user = db.relationship('User', backref=db.backref('speakers', lazy=True, cascade='all, delete-orphan'))
def to_dict(self):
"""Convert model to dictionary representation."""
return {
'id': self.id,
'name': self.name,
'created_at': self.created_at,
'last_used': self.last_used,
'use_count': self.use_count,
'embedding_count': self.embedding_count,
'confidence_score': self.confidence_score
}