Skip to content

Referencia Core

Documentación autogenerada de los módulos core.

Reclist Parser module.

This module handles parsing of 7-Mora reclist files for VCV voicebanks. Based on Section 6, MÓDULO 1 specification.

ReclistParseError

Bases: Exception

Exception raised when reclist parsing fails.

Attributes:

Name Type Description
line_number

The line number where the error occurred

line_content

The content of the problematic line

message

Detailed error message

Source code in src/core/reclist_parser.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
class ReclistParseError(Exception):
    """Exception raised when reclist parsing fails.

    Attributes:
        line_number: The line number where the error occurred
        line_content: The content of the problematic line
        message: Detailed error message
    """

    def __init__(self, message: str, line_number: int = 0, line_content: str = ""):
        self.line_number = line_number
        self.line_content = line_content
        self.message = message
        super().__init__(self._format_message())

    def _format_message(self) -> str:
        if self.line_number > 0:
            return f"Error en línea {self.line_number}: {self.message}\nContenido: '{self.line_content}'"
        return self.message

ReclistParser

Parser for 7-Mora VCV reclist files.

Analyzes reclist text files and converts them into structured PhoneticLine objects with metadata for each line.

The parser identifies: - Pure vowels (VV): a_a_i_a_u_e_o - Basic consonants (CV): ba_be_bi_bo_bu_ba_b - Clusters (CCR/CCL): pra_pre_pri... - Diphthongs (DIP): kya_kyu_kyo... - Breaths (R): R or breath markers

Example

parser = ReclistParser(bpm=120) lines = parser.parse_file("reclist.txt") print(lines[0].segments) ['a', 'a', 'i', 'a', 'u', 'e', 'o']

Source code in src/core/reclist_parser.py
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
class ReclistParser:
    """Parser for 7-Mora VCV reclist files.

    Analyzes reclist text files and converts them into structured
    PhoneticLine objects with metadata for each line.

    The parser identifies:
    - Pure vowels (VV): a_a_i_a_u_e_o
    - Basic consonants (CV): ba_be_bi_bo_bu_ba_b
    - Clusters (CCR/CCL): pra_pre_pri...
    - Diphthongs (DIP): kya_kyu_kyo...
    - Breaths (R): R or breath markers

    Example:
        >>> parser = ReclistParser(bpm=120)
        >>> lines = parser.parse_file("reclist.txt")
        >>> print(lines[0].segments)
        ['a', 'a', 'i', 'a', 'u', 'e', 'o']
    """

    # Spanish/Common vowels
    VOWELS: Set[str] = {"a", "e", "i", "o", "u"}

    # Common consonant patterns
    CONSONANTS: Set[str] = {
        "b", "c", "ch", "d", "f", "g", "h", "j", "k", "l", "ll",
        "m", "n", "ñ", "p", "q", "r", "rr", "s", "t", "v", "w",
        "x", "y", "z"
    }

    # Consonant clusters
    CLUSTERS: Set[str] = {
        "br", "bl", "cr", "cl", "dr", "fl", "fr", "gl", "gr",
        "kr", "pl", "pr", "tr", "tl"
    }

    # Breath/silence markers
    BREATH_MARKERS: Set[str] = {"R", "r", "breath", "br", "息"}

    def __init__(self, bpm: int = DEFAULT_BPM):
        """Initialize parser with BPM for duration calculations.

        Args:
            bpm: Beats per minute for timing calculations
        """
        self.bpm = bpm
        self._ms_per_mora = ms_per_beat(bpm)

    def parse_file(self, filepath: str) -> List[PhoneticLine]:
        """Parse a reclist file and return list of PhoneticLine objects.

        Args:
            filepath: Path to the reclist .txt file

        Returns:
            List of PhoneticLine objects with complete metadata

        Raises:
            ReclistParseError: If the file format is invalid
            FileNotFoundError: If the file doesn't exist
        """
        path = Path(filepath)

        if not path.exists():
            raise FileNotFoundError(f"Reclist file not found: {filepath}")

        if not path.suffix.lower() == ".txt":
            raise ReclistParseError(
                "Reclist must be a .txt file",
                line_content=filepath
            )

        try:
            content = path.read_text(encoding="utf-8")
        except UnicodeDecodeError:
            # Try with other common encodings
            try:
                content = path.read_text(encoding="shift-jis")
            except UnicodeDecodeError:
                content = path.read_text(encoding="latin-1")

        return self.parse_content(content)

    def parse_content(self, content: str) -> List[PhoneticLine]:
        """Parse reclist content string.

        Args:
            content: Raw text content of reclist

        Returns:
            List of PhoneticLine objects

        Raises:
            ReclistParseError: If content format is invalid
        """
        lines = content.strip().split("\n")
        phonetic_lines: List[PhoneticLine] = []

        for line_num, raw_line in enumerate(lines, start=1):
            # Skip empty lines and comments
            stripped = raw_line.strip()
            if not stripped or stripped.startswith("#") or stripped.startswith("//"):
                continue

            try:
                phonetic_line = self._parse_line(stripped, line_num)
                phonetic_lines.append(phonetic_line)
            except ReclistParseError:
                raise
            except Exception as e:
                raise ReclistParseError(
                    f"Error inesperado: {str(e)}",
                    line_number=line_num,
                    line_content=stripped
                )

        if not phonetic_lines:
            raise ReclistParseError("El archivo reclist está vacío o no contiene líneas válidas")

        return phonetic_lines

    def _parse_line(self, line: str, line_number: int) -> PhoneticLine:
        """Parse a single reclist line.

        Args:
            line: Raw line text (e.g., "ba_be_bi_bo_bu_ba_b")
            line_number: Line number for error reporting

        Returns:
            PhoneticLine with parsed segments
        """
        # Split by underscore (standard reclist format)
        segments = line.split("_")

        # Detect phoneme types for each segment
        phoneme_types = [self.detect_phoneme_type(seg) for seg in segments]

        # Generate filename from line content
        filename = f"{line}.wav"

        # Calculate expected duration
        expected_duration = self._ms_per_mora * len(segments)

        return PhoneticLine(
            index=line_number,
            raw_text=line,
            segments=segments,
            phoneme_types=phoneme_types,
            expected_duration_ms=expected_duration,
            filename=filename,
        )

    def validate_mora_count(self, line: str, expected: int = MORAS_PER_LINE) -> bool:
        """Verify that the line has the expected number of moras.

        Args:
            line: Raw line text
            expected: Expected mora count (default: 7)

        Returns:
            True if mora count matches expected
        """
        segments = line.split("_")
        return len(segments) == expected

    def detect_phoneme_type(self, segment: str) -> PhonemeType:
        """Classify a phoneme segment into its type.

        Args:
            segment: Individual segment (e.g., "ba", "a", "pra")

        Returns:
            PhonemeType classification
        """
        segment_lower = segment.lower()

        # Check for breath markers
        if segment_lower in self.BREATH_MARKERS or segment_lower == "":
            return PhonemeType.R

        # Pure vowel (single vowel character)
        if segment_lower in self.VOWELS:
            return PhonemeType.VV

        # Check for consonant clusters at start (CCR/CCL)
        for cluster in self.CLUSTERS:
            if segment_lower.startswith(cluster):
                return PhonemeType.CCR

        # Check for diphthongs (consonant + y/w + vowel)
        diphthong_pattern = re.compile(r'^[bcdfghjklmnpqrstvwxyz]+[yw][aeiou]$', re.IGNORECASE)
        if diphthong_pattern.match(segment_lower):
            return PhonemeType.DIP

        # Check if ends with vowel (CV pattern)
        if segment_lower and segment_lower[-1] in self.VOWELS:
            # Check if it's a VCV pattern (vowel + consonant + vowel)
            if len(segment_lower) >= 3 and segment_lower[0] in self.VOWELS:
                return PhonemeType.VCV
            return PhonemeType.CV

        # Ends with consonant (VC pattern or standalone consonant)
        if segment_lower and segment_lower[-1] not in self.VOWELS:
            if segment_lower[0] in self.VOWELS:
                return PhonemeType.VC
            # Standalone consonant (like "b" at end of ba_be_bi_bo_bu_ba_b)
            return PhonemeType.CV

        # Default to CV for unrecognized patterns
        return PhonemeType.CV

    def get_line_summary(self, line: PhoneticLine) -> str:
        """Generate a human-readable summary of a phonetic line.

        Args:
            line: PhoneticLine to summarize

        Returns:
            Formatted summary string
        """
        type_counts = {}
        for ptype in line.phoneme_types:
            type_counts[ptype.name] = type_counts.get(ptype.name, 0) + 1

        type_summary = ", ".join(f"{k}:{v}" for k, v in type_counts.items())

        return (
            f"Línea {line.index:03d}: {line.raw_text}\n"
            f"  Segmentos: {line.mora_count} | Duración: {line.expected_duration_ms:.1f}ms\n"
            f"  Tipos: {type_summary}"
        )

__init__(bpm=DEFAULT_BPM)

Initialize parser with BPM for duration calculations.

Parameters:

Name Type Description Default
bpm int

Beats per minute for timing calculations

DEFAULT_BPM
Source code in src/core/reclist_parser.py
75
76
77
78
79
80
81
82
def __init__(self, bpm: int = DEFAULT_BPM):
    """Initialize parser with BPM for duration calculations.

    Args:
        bpm: Beats per minute for timing calculations
    """
    self.bpm = bpm
    self._ms_per_mora = ms_per_beat(bpm)

detect_phoneme_type(segment)

Classify a phoneme segment into its type.

Parameters:

Name Type Description Default
segment str

Individual segment (e.g., "ba", "a", "pra")

required

Returns:

Type Description
PhonemeType

PhonemeType classification

Source code in src/core/reclist_parser.py
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
def detect_phoneme_type(self, segment: str) -> PhonemeType:
    """Classify a phoneme segment into its type.

    Args:
        segment: Individual segment (e.g., "ba", "a", "pra")

    Returns:
        PhonemeType classification
    """
    segment_lower = segment.lower()

    # Check for breath markers
    if segment_lower in self.BREATH_MARKERS or segment_lower == "":
        return PhonemeType.R

    # Pure vowel (single vowel character)
    if segment_lower in self.VOWELS:
        return PhonemeType.VV

    # Check for consonant clusters at start (CCR/CCL)
    for cluster in self.CLUSTERS:
        if segment_lower.startswith(cluster):
            return PhonemeType.CCR

    # Check for diphthongs (consonant + y/w + vowel)
    diphthong_pattern = re.compile(r'^[bcdfghjklmnpqrstvwxyz]+[yw][aeiou]$', re.IGNORECASE)
    if diphthong_pattern.match(segment_lower):
        return PhonemeType.DIP

    # Check if ends with vowel (CV pattern)
    if segment_lower and segment_lower[-1] in self.VOWELS:
        # Check if it's a VCV pattern (vowel + consonant + vowel)
        if len(segment_lower) >= 3 and segment_lower[0] in self.VOWELS:
            return PhonemeType.VCV
        return PhonemeType.CV

    # Ends with consonant (VC pattern or standalone consonant)
    if segment_lower and segment_lower[-1] not in self.VOWELS:
        if segment_lower[0] in self.VOWELS:
            return PhonemeType.VC
        # Standalone consonant (like "b" at end of ba_be_bi_bo_bu_ba_b)
        return PhonemeType.CV

    # Default to CV for unrecognized patterns
    return PhonemeType.CV

get_line_summary(line)

Generate a human-readable summary of a phonetic line.

Parameters:

Name Type Description Default
line PhoneticLine

PhoneticLine to summarize

required

Returns:

Type Description
str

Formatted summary string

Source code in src/core/reclist_parser.py
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
def get_line_summary(self, line: PhoneticLine) -> str:
    """Generate a human-readable summary of a phonetic line.

    Args:
        line: PhoneticLine to summarize

    Returns:
        Formatted summary string
    """
    type_counts = {}
    for ptype in line.phoneme_types:
        type_counts[ptype.name] = type_counts.get(ptype.name, 0) + 1

    type_summary = ", ".join(f"{k}:{v}" for k, v in type_counts.items())

    return (
        f"Línea {line.index:03d}: {line.raw_text}\n"
        f"  Segmentos: {line.mora_count} | Duración: {line.expected_duration_ms:.1f}ms\n"
        f"  Tipos: {type_summary}"
    )

parse_content(content)

Parse reclist content string.

Parameters:

Name Type Description Default
content str

Raw text content of reclist

required

Returns:

Type Description
List[PhoneticLine]

List of PhoneticLine objects

Raises:

Type Description
ReclistParseError

If content format is invalid

Source code in src/core/reclist_parser.py
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
def parse_content(self, content: str) -> List[PhoneticLine]:
    """Parse reclist content string.

    Args:
        content: Raw text content of reclist

    Returns:
        List of PhoneticLine objects

    Raises:
        ReclistParseError: If content format is invalid
    """
    lines = content.strip().split("\n")
    phonetic_lines: List[PhoneticLine] = []

    for line_num, raw_line in enumerate(lines, start=1):
        # Skip empty lines and comments
        stripped = raw_line.strip()
        if not stripped or stripped.startswith("#") or stripped.startswith("//"):
            continue

        try:
            phonetic_line = self._parse_line(stripped, line_num)
            phonetic_lines.append(phonetic_line)
        except ReclistParseError:
            raise
        except Exception as e:
            raise ReclistParseError(
                f"Error inesperado: {str(e)}",
                line_number=line_num,
                line_content=stripped
            )

    if not phonetic_lines:
        raise ReclistParseError("El archivo reclist está vacío o no contiene líneas válidas")

    return phonetic_lines

parse_file(filepath)

Parse a reclist file and return list of PhoneticLine objects.

Parameters:

Name Type Description Default
filepath str

Path to the reclist .txt file

required

Returns:

Type Description
List[PhoneticLine]

List of PhoneticLine objects with complete metadata

Raises:

Type Description
ReclistParseError

If the file format is invalid

FileNotFoundError

If the file doesn't exist

Source code in src/core/reclist_parser.py
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
def parse_file(self, filepath: str) -> List[PhoneticLine]:
    """Parse a reclist file and return list of PhoneticLine objects.

    Args:
        filepath: Path to the reclist .txt file

    Returns:
        List of PhoneticLine objects with complete metadata

    Raises:
        ReclistParseError: If the file format is invalid
        FileNotFoundError: If the file doesn't exist
    """
    path = Path(filepath)

    if not path.exists():
        raise FileNotFoundError(f"Reclist file not found: {filepath}")

    if not path.suffix.lower() == ".txt":
        raise ReclistParseError(
            "Reclist must be a .txt file",
            line_content=filepath
        )

    try:
        content = path.read_text(encoding="utf-8")
    except UnicodeDecodeError:
        # Try with other common encodings
        try:
            content = path.read_text(encoding="shift-jis")
        except UnicodeDecodeError:
            content = path.read_text(encoding="latin-1")

    return self.parse_content(content)

validate_mora_count(line, expected=MORAS_PER_LINE)

Verify that the line has the expected number of moras.

Parameters:

Name Type Description Default
line str

Raw line text

required
expected int

Expected mora count (default: 7)

MORAS_PER_LINE

Returns:

Type Description
bool

True if mora count matches expected

Source code in src/core/reclist_parser.py
188
189
190
191
192
193
194
195
196
197
198
199
def validate_mora_count(self, line: str, expected: int = MORAS_PER_LINE) -> bool:
    """Verify that the line has the expected number of moras.

    Args:
        line: Raw line text
        expected: Expected mora count (default: 7)

    Returns:
        True if mora count matches expected
    """
    segments = line.split("_")
    return len(segments) == expected

Data models for VocalParam.

This module contains all dataclasses and enums used throughout the application. Following the specification in Section 6-7 of the design document.

OtoEntry dataclass

Represents a single entry in oto.ini file.

Format: filename.wav=alias,offset,consonant,cutoff,preutter,overlap

Attributes:

Name Type Description
filename str

WAV file name

alias str

Phonetic alias (e.g., "- ba" or "a be")

offset float

Start position in ms (cian/cyan line)

consonant float

Fixed consonant region in ms (dark blue line)

cutoff float

End position in ms, negative = from end (pink/magenta line)

preutter float

Pre-utterance point in ms (red line)

overlap float

Overlap region in ms (green line)

Source code in src/core/models.py
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
@dataclass
class OtoEntry:
    """Represents a single entry in oto.ini file.

    Format: filename.wav=alias,offset,consonant,cutoff,preutter,overlap

    Attributes:
        filename: WAV file name
        alias: Phonetic alias (e.g., "- ba" or "a be")
        offset: Start position in ms (cian/cyan line)
        consonant: Fixed consonant region in ms (dark blue line)
        cutoff: End position in ms, negative = from end (pink/magenta line)
        preutter: Pre-utterance point in ms (red line)
        overlap: Overlap region in ms (green line)
    """
    filename: str
    alias: str
    offset: float
    consonant: float
    cutoff: float
    preutter: float
    overlap: float
    comment: str = ""

    def to_oto_line(self) -> str:
        """Convert to oto.ini format string."""
        return (
            f"{self.filename}={self.alias},"
            f"{self.offset:.1f},{self.consonant:.1f},"
            f"{self.cutoff:.1f},{self.preutter:.1f},{self.overlap:.1f}"
            f" #{self.comment}" if self.comment else ""
        )

    @classmethod
    def from_oto_line(cls, line: str) -> "OtoEntry":
        """Parse an oto.ini format line."""
        # Split filename from parameters
        filename_part, params_part = line.strip().split("=", 1)
        parts = params_part.split(",")

        return cls(
            filename=filename_part,
            alias=parts[0],
            offset=float(parts[1]),
            consonant=float(parts[2]),
            cutoff=float(parts[3]),
            preutter=float(parts[4]),
            overlap=float(parts[5]),
            comment="" # TODO: Parse comment if present
        )

    def validate(self) -> List[str]:
        """Validate OTO parameters.

        Checks:
        - Overlap <= Preutterance (Gold Rule)

        Returns:
            List of error messages.
        """
        errors = []
        if self.overlap > self.preutter:
            errors.append(f"Overlap ({self.overlap}) cannot be greater than Preutterance ({self.preutter})")
        return errors

from_oto_line(line) classmethod

Parse an oto.ini format line.

Source code in src/core/models.py
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
@classmethod
def from_oto_line(cls, line: str) -> "OtoEntry":
    """Parse an oto.ini format line."""
    # Split filename from parameters
    filename_part, params_part = line.strip().split("=", 1)
    parts = params_part.split(",")

    return cls(
        filename=filename_part,
        alias=parts[0],
        offset=float(parts[1]),
        consonant=float(parts[2]),
        cutoff=float(parts[3]),
        preutter=float(parts[4]),
        overlap=float(parts[5]),
        comment="" # TODO: Parse comment if present
    )

to_oto_line()

Convert to oto.ini format string.

Source code in src/core/models.py
 94
 95
 96
 97
 98
 99
100
101
def to_oto_line(self) -> str:
    """Convert to oto.ini format string."""
    return (
        f"{self.filename}={self.alias},"
        f"{self.offset:.1f},{self.consonant:.1f},"
        f"{self.cutoff:.1f},{self.preutter:.1f},{self.overlap:.1f}"
        f" #{self.comment}" if self.comment else ""
    )

validate()

Validate OTO parameters.

Checks: - Overlap <= Preutterance (Gold Rule)

Returns:

Type Description
List[str]

List of error messages.

Source code in src/core/models.py
121
122
123
124
125
126
127
128
129
130
131
132
133
def validate(self) -> List[str]:
    """Validate OTO parameters.

    Checks:
    - Overlap <= Preutterance (Gold Rule)

    Returns:
        List of error messages.
    """
    errors = []
    if self.overlap > self.preutter:
        errors.append(f"Overlap ({self.overlap}) cannot be greater than Preutterance ({self.preutter})")
    return errors

PhonemeType

Bases: Enum

Classification of phoneme segments.

Based on RF-02.1 specification: - VV: Pure vowels - CV: Consonant + Vowel (basic) - VCV: Vowel-Consonant-Vowel transitions - VC: Vowel + Consonant (coda) - CCR: Consonant clusters (right) - CCL: Consonant clusters (left) - DIP: Diphthongs (palatization/labialization) - R: Breaths/respirations

Source code in src/core/models.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
class PhonemeType(Enum):
    """Classification of phoneme segments.

    Based on RF-02.1 specification:
    - VV: Pure vowels
    - CV: Consonant + Vowel (basic)
    - VCV: Vowel-Consonant-Vowel transitions
    - VC: Vowel + Consonant (coda)
    - CCR: Consonant clusters (right)
    - CCL: Consonant clusters (left)
    - DIP: Diphthongs (palatization/labialization)
    - R: Breaths/respirations
    """
    VV = auto()   # Pure vowels (a_a_i_a_u_e_o)
    CV = auto()   # Consonant-Vowel (ba, ka, sa...)
    VCV = auto()  # Vowel-Consonant-Vowel transitions
    VC = auto()   # Vowel-Consonant codas
    CCR = auto()  # Consonant clusters (pr, tr, kr...)
    CCL = auto()  # Consonant clusters left
    DIP = auto()  # Diphthongs
    R = auto()    # Breaths/respirations

PhoneticLine dataclass

Represents a single line from the Reclist.

As specified in Section 6, MÓDULO 1.

Attributes:

Name Type Description
index int

Line number (001, 002...)

raw_text str

Original text from reclist (e.g., "ba_be_bi_bo_bu_ba_b")

segments List[str]

List of individual segments (e.g., ["ba", "be", "bi"...])

phoneme_types List[PhonemeType]

Classification of each segment

expected_duration_ms float

Calculated duration based on BPM

filename str

Generated WAV filename

Source code in src/core/models.py
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
@dataclass
class PhoneticLine:
    """Represents a single line from the Reclist.

    As specified in Section 6, MÓDULO 1.

    Attributes:
        index: Line number (001, 002...)
        raw_text: Original text from reclist (e.g., "ba_be_bi_bo_bu_ba_b")
        segments: List of individual segments (e.g., ["ba", "be", "bi"...])
        phoneme_types: Classification of each segment
        expected_duration_ms: Calculated duration based on BPM
        filename: Generated WAV filename
    """
    index: int
    raw_text: str
    segments: List[str]
    phoneme_types: List[PhonemeType]
    expected_duration_ms: float
    filename: str

    @property
    def mora_count(self) -> int:
        """Number of moras (segments) in this line."""
        return len(self.segments)

mora_count property

Number of moras (segments) in this line.

ProjectData dataclass

Complete project state for serialization (JSON).

Based on Section 7 data model specification.

Source code in src/core/models.py
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
@dataclass
class ProjectData:
    """Complete project state for serialization (JSON).

    Based on Section 7 data model specification.
    """
    project_name: str
    bpm: int
    reclist_path: str
    output_directory: str
    recordings: List[Recording] = field(default_factory=list)
    created_at: datetime = field(default_factory=datetime.now)
    last_modified: datetime = field(default_factory=datetime.now)
    version: str = "1.0.0"

    def to_dict(self) -> dict:
        """Convert to dictionary for JSON serialization."""
        return {
            "project_name": self.project_name,
            "bpm": self.bpm,
            "reclist_path": self.reclist_path,
            "output_directory": self.output_directory,
            "recordings": [
                {
                    "line_index": r.line_index,
                    "filename": r.filename,
                    "status": r.status.value,
                    "duration_ms": r.duration_ms,
                    "hash": r.hash,
                    "oto_entries": [
                        {
                            "alias": e.alias,
                            "offset": e.offset,
                            "consonant": e.consonant,
                            "cutoff": e.cutoff,
                            "preutter": e.preutter,
                            "overlap": e.overlap,
                            "comment": e.comment,
                        }
                        for e in r.oto_entries
                    ],
                }
                for r in self.recordings
            ],
            "metadata": {
                "created_at": self.created_at.isoformat(),
                "last_modified": self.last_modified.isoformat(),
                "version": self.version,
            },
        }

    @classmethod
    def from_dict(cls, data: dict) -> "ProjectData":
        """Create ProjectData from dictionary with validation.

        Raises:
             ValueError: If required fields are missing.
        """
        required_fields = ["project_name", "bpm", "reclist_path", "output_directory"]
        missing = [f for f in required_fields if f not in data]
        if missing:
            raise ValueError(f"Missing required fields in project data: {', '.join(missing)}")

        recordings = []
        for r in data.get("recordings", []):
            oto_entries = [
                OtoEntry(
                    filename=r["filename"],
                    alias=e["alias"],
                    offset=e["offset"],
                    consonant=e["consonant"],
                    cutoff=e["cutoff"],
                    preutter=e["preutter"],
                    overlap=e["overlap"],
                    comment=e.get("comment", ""),
                )
                for e in r.get("oto_entries", [])
            ]
            recordings.append(Recording(
                line_index=r["line_index"],
                filename=r["filename"],
                status=RecordingStatus(r["status"]),
                duration_ms=r.get("duration_ms", 0.0),
                hash=r.get("hash"),
                oto_entries=oto_entries,
            ))

        metadata = data.get("metadata", {})
        return cls(
            project_name=data["project_name"],
            bpm=data["bpm"],
            reclist_path=data["reclist_path"],
            output_directory=data["output_directory"],
            recordings=recordings,
            created_at=datetime.fromisoformat(metadata.get("created_at", datetime.now().isoformat())),
            last_modified=datetime.fromisoformat(metadata.get("last_modified", datetime.now().isoformat())),
            version=metadata.get("version", "1.0.0"),
        )

    def validate(self) -> List[str]:
        """Validate project data consistency.

        Returns:
            List of error messages, empty if valid.
        """
        errors = []
        if not self.project_name:
            errors.append("Project name cannot be empty")
        if self.bpm < 40 or self.bpm > 300:
            errors.append(f"Invalid BPM: {self.bpm}")

        # Check integrity of recordings
        for i, rec in enumerate(self.recordings):
            if not rec.filename:
                errors.append(f"Recording at index {i} has no filename")
            if rec.duration_ms < 0:
                errors.append(f"Invalid duration for {rec.filename}")

        return errors

from_dict(data) classmethod

Create ProjectData from dictionary with validation.

Raises:

Type Description
ValueError

If required fields are missing.

Source code in src/core/models.py
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
@classmethod
def from_dict(cls, data: dict) -> "ProjectData":
    """Create ProjectData from dictionary with validation.

    Raises:
         ValueError: If required fields are missing.
    """
    required_fields = ["project_name", "bpm", "reclist_path", "output_directory"]
    missing = [f for f in required_fields if f not in data]
    if missing:
        raise ValueError(f"Missing required fields in project data: {', '.join(missing)}")

    recordings = []
    for r in data.get("recordings", []):
        oto_entries = [
            OtoEntry(
                filename=r["filename"],
                alias=e["alias"],
                offset=e["offset"],
                consonant=e["consonant"],
                cutoff=e["cutoff"],
                preutter=e["preutter"],
                overlap=e["overlap"],
                comment=e.get("comment", ""),
            )
            for e in r.get("oto_entries", [])
        ]
        recordings.append(Recording(
            line_index=r["line_index"],
            filename=r["filename"],
            status=RecordingStatus(r["status"]),
            duration_ms=r.get("duration_ms", 0.0),
            hash=r.get("hash"),
            oto_entries=oto_entries,
        ))

    metadata = data.get("metadata", {})
    return cls(
        project_name=data["project_name"],
        bpm=data["bpm"],
        reclist_path=data["reclist_path"],
        output_directory=data["output_directory"],
        recordings=recordings,
        created_at=datetime.fromisoformat(metadata.get("created_at", datetime.now().isoformat())),
        last_modified=datetime.fromisoformat(metadata.get("last_modified", datetime.now().isoformat())),
        version=metadata.get("version", "1.0.0"),
    )

to_dict()

Convert to dictionary for JSON serialization.

Source code in src/core/models.py
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
def to_dict(self) -> dict:
    """Convert to dictionary for JSON serialization."""
    return {
        "project_name": self.project_name,
        "bpm": self.bpm,
        "reclist_path": self.reclist_path,
        "output_directory": self.output_directory,
        "recordings": [
            {
                "line_index": r.line_index,
                "filename": r.filename,
                "status": r.status.value,
                "duration_ms": r.duration_ms,
                "hash": r.hash,
                "oto_entries": [
                    {
                        "alias": e.alias,
                        "offset": e.offset,
                        "consonant": e.consonant,
                        "cutoff": e.cutoff,
                        "preutter": e.preutter,
                        "overlap": e.overlap,
                        "comment": e.comment,
                    }
                    for e in r.oto_entries
                ],
            }
            for r in self.recordings
        ],
        "metadata": {
            "created_at": self.created_at.isoformat(),
            "last_modified": self.last_modified.isoformat(),
            "version": self.version,
        },
    }

validate()

Validate project data consistency.

Returns:

Type Description
List[str]

List of error messages, empty if valid.

Source code in src/core/models.py
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
def validate(self) -> List[str]:
    """Validate project data consistency.

    Returns:
        List of error messages, empty if valid.
    """
    errors = []
    if not self.project_name:
        errors.append("Project name cannot be empty")
    if self.bpm < 40 or self.bpm > 300:
        errors.append(f"Invalid BPM: {self.bpm}")

    # Check integrity of recordings
    for i, rec in enumerate(self.recordings):
        if not rec.filename:
            errors.append(f"Recording at index {i} has no filename")
        if rec.duration_ms < 0:
            errors.append(f"Invalid duration for {rec.filename}")

    return errors

Recording dataclass

Represents a single recording with its oto entries.

Source code in src/core/models.py
136
137
138
139
140
141
142
143
144
@dataclass
class Recording:
    """Represents a single recording with its oto entries."""
    line_index: int
    filename: str
    status: RecordingStatus = RecordingStatus.PENDING
    duration_ms: float = 0.0
    hash: Optional[str] = None
    oto_entries: List[OtoEntry] = field(default_factory=list)

RecordingStatus

Bases: Enum

Status of a recording line.

Source code in src/core/models.py
36
37
38
39
40
class RecordingStatus(Enum):
    """Status of a recording line."""
    PENDING = "pending"
    RECORDED = "recorded"
    VALIDATED = "validated"