Initial release: DictIA v0.8.14-alpha (fork de Speakr, AGPL-3.0)

This commit is contained in:
InnovA AI
2026-03-16 21:47:37 +00:00
commit 42772a31ed
365 changed files with 103572 additions and 0 deletions

219
scripts/parse_asr_json.py Executable file
View File

@@ -0,0 +1,219 @@
#!/usr/bin/env python3
"""
ASR JSON Parser - Analyzes speaker information in ASR response JSON files
"""
import json
import sys
from collections import defaultdict, Counter
def analyze_asr_json(json_data):
"""
Analyze ASR JSON data to understand speaker distribution and identify issues
"""
if not isinstance(json_data, dict) or 'segments' not in json_data:
print("ERROR: Invalid JSON structure. Expected dict with 'segments' key.")
return
segments = json_data['segments']
if not isinstance(segments, list):
print("ERROR: 'segments' should be a list.")
return
print(f"=== ASR JSON Analysis ===")
print(f"Total segments: {len(segments)}")
print()
# Track segment-level speakers
segment_speakers = []
segments_with_speaker = 0
segments_without_speaker = 0
# Track word-level speakers
word_speakers = []
words_with_speaker = 0
words_without_speaker = 0
# Track segments with null speakers
null_speaker_segments = []
for i, segment in enumerate(segments):
# Analyze segment-level speaker
segment_speaker = segment.get('speaker')
if segment_speaker is not None:
segment_speakers.append(segment_speaker)
segments_with_speaker += 1
else:
segments_without_speaker += 1
null_speaker_segments.append(i)
# Analyze word-level speakers
words = segment.get('words', [])
for word_data in words:
word_speaker = word_data.get('speaker')
if word_speaker is not None:
word_speakers.append(word_speaker)
words_with_speaker += 1
else:
words_without_speaker += 1
# Print segment-level analysis
print("=== SEGMENT-LEVEL SPEAKERS ===")
print(f"Segments with speakers: {segments_with_speaker}")
print(f"Segments without speakers: {segments_without_speaker}")
if segment_speakers:
segment_speaker_counts = Counter(segment_speakers)
print(f"Unique segment speakers: {sorted(segment_speaker_counts.keys())}")
print("Segment speaker distribution:")
for speaker, count in segment_speaker_counts.most_common():
print(f" {speaker}: {count} segments")
else:
print("No segment-level speakers found!")
print()
# Print word-level analysis
print("=== WORD-LEVEL SPEAKERS ===")
print(f"Words with speakers: {words_with_speaker}")
print(f"Words without speakers: {words_without_speaker}")
if word_speakers:
word_speaker_counts = Counter(word_speakers)
print(f"Unique word speakers: {sorted(word_speaker_counts.keys())}")
print("Word speaker distribution:")
for speaker, count in word_speaker_counts.most_common():
print(f" {speaker}: {count} words")
else:
print("No word-level speakers found!")
print()
# Analyze segments without speakers
if null_speaker_segments:
print("=== SEGMENTS WITHOUT SPEAKERS ===")
print(f"Segment indices without speakers: {null_speaker_segments[:10]}{'...' if len(null_speaker_segments) > 10 else ''}")
print("\nFirst few segments without speakers:")
for i in null_speaker_segments[:5]:
segment = segments[i]
text = segment.get('text', '').strip()
start = segment.get('start')
end = segment.get('end')
words = segment.get('words', [])
print(f" Segment {i}: '{text}' ({start}-{end}s)")
print(f" Keys: {list(segment.keys())}")
# Check if words have speakers even when segment doesn't
word_speakers_in_segment = [w.get('speaker') for w in words if w.get('speaker')]
if word_speakers_in_segment:
word_speaker_counts = Counter(word_speakers_in_segment)
print(f" Word speakers: {dict(word_speaker_counts)}")
else:
print(f" No word speakers either")
print()
# Suggest speaker assignment strategy
print("=== SPEAKER ASSIGNMENT STRATEGY ===")
if segments_without_speaker > 0:
print(f"Found {segments_without_speaker} segments without speakers.")
if words_with_speaker > 0:
print("RECOMMENDATION: Use word-level speaker information to assign segment speakers.")
print("Strategy: For segments without speakers, find the most common speaker among their words.")
else:
print("RECOMMENDATION: Assign a default speaker label (e.g., 'UNKNOWN_SPEAKER') to segments without speakers.")
else:
print("All segments have speakers assigned. No action needed.")
def suggest_preprocessing_fix(json_data):
"""
Suggest how to fix the preprocessing based on the JSON structure
"""
print("\n=== PREPROCESSING FIX SUGGESTION ===")
segments = json_data.get('segments', [])
if not segments:
return
# Check if we can derive segment speakers from word speakers
segments_fixable = 0
for segment in segments:
if segment.get('speaker') is None:
words = segment.get('words', [])
word_speakers = [w.get('speaker') for w in words if w.get('speaker')]
if word_speakers:
segments_fixable += 1
if segments_fixable > 0:
print(f"{segments_fixable} segments can be fixed using word-level speaker information.")
print("\nSuggested code fix:")
print("""
# In the ASR processing function, replace the segment processing with:
for i, segment in enumerate(asr_response_data['segments']):
speaker = segment.get('speaker')
text = segment.get('text', '').strip()
# If segment doesn't have a speaker, try to derive from words
if speaker is None:
words = segment.get('words', [])
word_speakers = [w.get('speaker') for w in words if w.get('speaker')]
if word_speakers:
# Use the most common speaker among the words
from collections import Counter
speaker_counts = Counter(word_speakers)
speaker = speaker_counts.most_common(1)[0][0]
app.logger.info(f"Derived speaker '{speaker}' for segment {i} from word-level data")
else:
speaker = 'UNKNOWN_SPEAKER'
app.logger.warning(f"No speaker info available for segment {i}, using UNKNOWN_SPEAKER")
simplified_segments.append({
'speaker': speaker,
'sentence': text,
'start_time': segment.get('start'),
'end_time': segment.get('end')
})
""")
else:
print("❌ Segments cannot be fixed using word-level data.")
print("Recommendation: Assign 'UNKNOWN_SPEAKER' to segments without speakers.")
def main():
if len(sys.argv) != 2:
print("Usage: python parse_asr_json.py <json_file>")
print(" or: python parse_asr_json.py -")
print(" (use '-' to read from stdin)")
sys.exit(1)
filename = sys.argv[1]
try:
if filename == '-':
# Read from stdin
json_text = sys.stdin.read()
else:
# Read from file
with open(filename, 'r', encoding='utf-8') as f:
json_text = f.read()
# Parse JSON
json_data = json.loads(json_text)
# Analyze the data
analyze_asr_json(json_data)
suggest_preprocessing_fix(json_data)
except FileNotFoundError:
print(f"ERROR: File '{filename}' not found.")
sys.exit(1)
except json.JSONDecodeError as e:
print(f"ERROR: Invalid JSON - {e}")
sys.exit(1)
except Exception as e:
print(f"ERROR: {e}")
sys.exit(1)
if __name__ == "__main__":
main()