Referencia Core

Documentación autogenerada de los módulos core.

Reclist Parser module.

This module handles parsing of 7-Mora reclist files for VCV voicebanks. Based on Section 6, MÓDULO 1 specification.

`ReclistParseError`

Bases: Exception

Exception raised when reclist parsing fails.

Attributes:

Name	Type	Description
`line_number`		The line number where the error occurred
`line_content`		The content of the problematic line
`message`		Detailed error message

Source code in src/core/reclist_parser.py

class ReclistParseError(Exception):
    """Exception raised when reclist parsing fails.

    Attributes:
        line_number: The line number where the error occurred
        line_content: The content of the problematic line
        message: Detailed error message
    """

    def __init__(self, message: str, line_number: int = 0, line_content: str = ""):
        self.line_number = line_number
        self.line_content = line_content
        self.message = message
        super().__init__(self._format_message())

    def _format_message(self) -> str:
        if self.line_number > 0:
            return f"Error en línea {self.line_number}: {self.message}\nContenido: '{self.line_content}'"
        return self.message

`ReclistParser`

Parser for 7-Mora VCV reclist files.

Analyzes reclist text files and converts them into structured PhoneticLine objects with metadata for each line.

The parser identifies: - Pure vowels (VV): a_a_i_a_u_e_o - Basic consonants (CV): ba_be_bi_bo_bu_ba_b - Clusters (CCR/CCL): pra_pre_pri... - Diphthongs (DIP): kya_kyu_kyo... - Breaths (R): R or breath markers

Example

parser = ReclistParser(bpm=120) lines = parser.parse_file("reclist.txt") print(lines[0].segments) ['a', 'a', 'i', 'a', 'u', 'e', 'o']

Source code in src/core/reclist_parser.py

class ReclistParser:
    """Parser for 7-Mora VCV reclist files.

    Analyzes reclist text files and converts them into structured
    PhoneticLine objects with metadata for each line.

    The parser identifies:
    - Pure vowels (VV): a_a_i_a_u_e_o
    - Basic consonants (CV): ba_be_bi_bo_bu_ba_b
    - Clusters (CCR/CCL): pra_pre_pri...
    - Diphthongs (DIP): kya_kyu_kyo...
    - Breaths (R): R or breath markers

    Example:
        >>> parser = ReclistParser(bpm=120)
        >>> lines = parser.parse_file("reclist.txt")
        >>> print(lines[0].segments)
        ['a', 'a', 'i', 'a', 'u', 'e', 'o']
    """

    # Spanish/Common vowels
    VOWELS: Set[str] = {"a", "e", "i", "o", "u"}

    # Common consonant patterns
    CONSONANTS: Set[str] = {
        "b", "c", "ch", "d", "f", "g", "h", "j", "k", "l", "ll",
        "m", "n", "ñ", "p", "q", "r", "rr", "s", "t", "v", "w",
        "x", "y", "z"
    }

    # Consonant clusters
    CLUSTERS: Set[str] = {
        "br", "bl", "cr", "cl", "dr", "fl", "fr", "gl", "gr",
        "kr", "pl", "pr", "tr", "tl"
    }

    # Breath/silence markers
    BREATH_MARKERS: Set[str] = {"R", "r", "breath", "br", "息"}

    def __init__(self, bpm: int = DEFAULT_BPM):
        """Initialize parser with BPM for duration calculations.

        Args:
            bpm: Beats per minute for timing calculations
        """
        self.bpm = bpm
        self._ms_per_mora = ms_per_beat(bpm)

    def parse_file(self, filepath: str) -> List[PhoneticLine]:
        """Parse a reclist file and return list of PhoneticLine objects.

        Args:
            filepath: Path to the reclist .txt file

        Returns:
            List of PhoneticLine objects with complete metadata

        Raises:
            ReclistParseError: If the file format is invalid
            FileNotFoundError: If the file doesn't exist
        """
        path = Path(filepath)

        if not path.exists():
            raise FileNotFoundError(f"Reclist file not found: {filepath}")

        if not path.suffix.lower() == ".txt":
            raise ReclistParseError(
                "Reclist must be a .txt file",
                line_content=filepath
            )

        try:
            content = path.read_text(encoding="utf-8")
        except UnicodeDecodeError:
            # Try with other common encodings
            try:
                content = path.read_text(encoding="shift-jis")
            except UnicodeDecodeError:
                content = path.read_text(encoding="latin-1")

        return self.parse_content(content)

    def parse_content(self, content: str) -> List[PhoneticLine]:
        """Parse reclist content string.

        Args:
            content: Raw text content of reclist

        Returns:
            List of PhoneticLine objects

        Raises:
            ReclistParseError: If content format is invalid
        """
        lines = content.strip().split("\n")
        phonetic_lines: List[PhoneticLine] = []

        for line_num, raw_line in enumerate(lines, start=1):
            # Skip empty lines and comments
            stripped = raw_line.strip()
            if not stripped or stripped.startswith("#") or stripped.startswith("//"):
                continue

            try:
                phonetic_line = self._parse_line(stripped, line_num)
                phonetic_lines.append(phonetic_line)
            except ReclistParseError:
                raise
            except Exception as e:
                raise ReclistParseError(
                    f"Error inesperado: {str(e)}",
                    line_number=line_num,
                    line_content=stripped
                )

        if not phonetic_lines:
            raise ReclistParseError("El archivo reclist está vacío o no contiene líneas válidas")

        return phonetic_lines

    def _parse_line(self, line: str, line_number: int) -> PhoneticLine:
        """Parse a single reclist line.

        Args:
            line: Raw line text (e.g., "ba_be_bi_bo_bu_ba_b")
            line_number: Line number for error reporting

        Returns:
            PhoneticLine with parsed segments
        """
        # Split by underscore (standard reclist format)
        segments = line.split("_")

        # Detect phoneme types for each segment
        phoneme_types = [self.detect_phoneme_type(seg) for seg in segments]

        # Generate filename from line content
        filename = f"{line}.wav"

        # Calculate expected duration
        expected_duration = self._ms_per_mora * len(segments)

        return PhoneticLine(
            index=line_number,
            raw_text=line,
            segments=segments,
            phoneme_types=phoneme_types,
            expected_duration_ms=expected_duration,
            filename=filename,
        )

    def validate_mora_count(self, line: str, expected: int = MORAS_PER_LINE) -> bool:
        """Verify that the line has the expected number of moras.

        Args:
            line: Raw line text
            expected: Expected mora count (default: 7)

        Returns:
            True if mora count matches expected
        """
        segments = line.split("_")
        return len(segments) == expected

    def detect_phoneme_type(self, segment: str) -> PhonemeType:
        """Classify a phoneme segment into its type.

        Args:
            segment: Individual segment (e.g., "ba", "a", "pra")

        Returns:
            PhonemeType classification
        """
        segment_lower = segment.lower()

        # Check for breath markers
        if segment_lower in self.BREATH_MARKERS or segment_lower == "":
            return PhonemeType.R

        # Pure vowel (single vowel character)
        if segment_lower in self.VOWELS:
            return PhonemeType.VV

        # Check for consonant clusters at start (CCR/CCL)
        for cluster in self.CLUSTERS:
            if segment_lower.startswith(cluster):
                return PhonemeType.CCR

        # Check for diphthongs (consonant + y/w + vowel)
        diphthong_pattern = re.compile(r'^[bcdfghjklmnpqrstvwxyz]+[yw][aeiou]$', re.IGNORECASE)
        if diphthong_pattern.match(segment_lower):
            return PhonemeType.DIP

        # Check if ends with vowel (CV pattern)
        if segment_lower and segment_lower[-1] in self.VOWELS:
            # Check if it's a VCV pattern (vowel + consonant + vowel)
            if len(segment_lower) >= 3 and segment_lower[0] in self.VOWELS:
                return PhonemeType.VCV
            return PhonemeType.CV

        # Ends with consonant (VC pattern or standalone consonant)
        if segment_lower and segment_lower[-1] not in self.VOWELS:
            if segment_lower[0] in self.VOWELS:
                return PhonemeType.VC
            # Standalone consonant (like "b" at end of ba_be_bi_bo_bu_ba_b)
            return PhonemeType.CV

        # Default to CV for unrecognized patterns
        return PhonemeType.CV

    def get_line_summary(self, line: PhoneticLine) -> str:
        """Generate a human-readable summary of a phonetic line.

        Args:
            line: PhoneticLine to summarize

        Returns:
            Formatted summary string
        """
        type_counts = {}
        for ptype in line.phoneme_types:
            type_counts[ptype.name] = type_counts.get(ptype.name, 0) + 1

        type_summary = ", ".join(f"{k}:{v}" for k, v in type_counts.items())

        return (
            f"Línea {line.index:03d}: {line.raw_text}\n"
            f"  Segmentos: {line.mora_count} | Duración: {line.expected_duration_ms:.1f}ms\n"
            f"  Tipos: {type_summary}"
        )

`init(bpm=DEFAULT_BPM)`

Initialize parser with BPM for duration calculations.

Parameters:

Name	Type	Description	Default
`bpm`	`int`	Beats per minute for timing calculations	`DEFAULT_BPM`

Source code in src/core/reclist_parser.py

def __init__(self, bpm: int = DEFAULT_BPM):
    """Initialize parser with BPM for duration calculations.

    Args:
        bpm: Beats per minute for timing calculations
    """
    self.bpm = bpm
    self._ms_per_mora = ms_per_beat(bpm)

`detect_phoneme_type(segment)`

Classify a phoneme segment into its type.

Parameters:

Name	Type	Description	Default
`segment`	`str`	Individual segment (e.g., "ba", "a", "pra")	required

Returns:

Type	Description
`PhonemeType`	PhonemeType classification

Source code in src/core/reclist_parser.py

def detect_phoneme_type(self, segment: str) -> PhonemeType:
    """Classify a phoneme segment into its type.

    Args:
        segment: Individual segment (e.g., "ba", "a", "pra")

    Returns:
        PhonemeType classification
    """
    segment_lower = segment.lower()

    # Check for breath markers
    if segment_lower in self.BREATH_MARKERS or segment_lower == "":
        return PhonemeType.R

    # Pure vowel (single vowel character)
    if segment_lower in self.VOWELS:
        return PhonemeType.VV

    # Check for consonant clusters at start (CCR/CCL)
    for cluster in self.CLUSTERS:
        if segment_lower.startswith(cluster):
            return PhonemeType.CCR

    # Check for diphthongs (consonant + y/w + vowel)
    diphthong_pattern = re.compile(r'^[bcdfghjklmnpqrstvwxyz]+[yw][aeiou]$', re.IGNORECASE)
    if diphthong_pattern.match(segment_lower):
        return PhonemeType.DIP

    # Check if ends with vowel (CV pattern)
    if segment_lower and segment_lower[-1] in self.VOWELS:
        # Check if it's a VCV pattern (vowel + consonant + vowel)
        if len(segment_lower) >= 3 and segment_lower[0] in self.VOWELS:
            return PhonemeType.VCV
        return PhonemeType.CV

    # Ends with consonant (VC pattern or standalone consonant)
    if segment_lower and segment_lower[-1] not in self.VOWELS:
        if segment_lower[0] in self.VOWELS:
            return PhonemeType.VC
        # Standalone consonant (like "b" at end of ba_be_bi_bo_bu_ba_b)
        return PhonemeType.CV

    # Default to CV for unrecognized patterns
    return PhonemeType.CV

`get_line_summary(line)`

Generate a human-readable summary of a phonetic line.

Parameters:

Name	Type	Description	Default
`line`	`PhoneticLine`	PhoneticLine to summarize	required

Returns:

Type	Description
`str`	Formatted summary string

Source code in src/core/reclist_parser.py

def get_line_summary(self, line: PhoneticLine) -> str:
    """Generate a human-readable summary of a phonetic line.

    Args:
        line: PhoneticLine to summarize

    Returns:
        Formatted summary string
    """
    type_counts = {}
    for ptype in line.phoneme_types:
        type_counts[ptype.name] = type_counts.get(ptype.name, 0) + 1

    type_summary = ", ".join(f"{k}:{v}" for k, v in type_counts.items())

    return (
        f"Línea {line.index:03d}: {line.raw_text}\n"
        f"  Segmentos: {line.mora_count} | Duración: {line.expected_duration_ms:.1f}ms\n"
        f"  Tipos: {type_summary}"
    )

`parse_content(content)`

Parse reclist content string.

Parameters:

Name	Type	Description	Default
`content`	`str`	Raw text content of reclist	required

Returns:

Type	Description
`List[PhoneticLine]`	List of PhoneticLine objects

Raises:

Type	Description
`ReclistParseError`	If content format is invalid

Source code in src/core/reclist_parser.py

def parse_content(self, content: str) -> List[PhoneticLine]:
    """Parse reclist content string.

    Args:
        content: Raw text content of reclist

    Returns:
        List of PhoneticLine objects

    Raises:
        ReclistParseError: If content format is invalid
    """
    lines = content.strip().split("\n")
    phonetic_lines: List[PhoneticLine] = []

    for line_num, raw_line in enumerate(lines, start=1):
        # Skip empty lines and comments
        stripped = raw_line.strip()
        if not stripped or stripped.startswith("#") or stripped.startswith("//"):
            continue

        try:
            phonetic_line = self._parse_line(stripped, line_num)
            phonetic_lines.append(phonetic_line)
        except ReclistParseError:
            raise
        except Exception as e:
            raise ReclistParseError(
                f"Error inesperado: {str(e)}",
                line_number=line_num,
                line_content=stripped
            )

    if not phonetic_lines:
        raise ReclistParseError("El archivo reclist está vacío o no contiene líneas válidas")

    return phonetic_lines

`parse_file(filepath)`

Parse a reclist file and return list of PhoneticLine objects.

Parameters:

Name	Type	Description	Default
`filepath`	`str`	Path to the reclist .txt file	required

Returns:

Type	Description
`List[PhoneticLine]`	List of PhoneticLine objects with complete metadata

Raises:

Type	Description
`ReclistParseError`	If the file format is invalid
`FileNotFoundError`	If the file doesn't exist

Source code in src/core/reclist_parser.py

def parse_file(self, filepath: str) -> List[PhoneticLine]:
    """Parse a reclist file and return list of PhoneticLine objects.

    Args:
        filepath: Path to the reclist .txt file

    Returns:
        List of PhoneticLine objects with complete metadata

    Raises:
        ReclistParseError: If the file format is invalid
        FileNotFoundError: If the file doesn't exist
    """
    path = Path(filepath)

    if not path.exists():
        raise FileNotFoundError(f"Reclist file not found: {filepath}")

    if not path.suffix.lower() == ".txt":
        raise ReclistParseError(
            "Reclist must be a .txt file",
            line_content=filepath
        )

    try:
        content = path.read_text(encoding="utf-8")
    except UnicodeDecodeError:
        # Try with other common encodings
        try:
            content = path.read_text(encoding="shift-jis")
        except UnicodeDecodeError:
            content = path.read_text(encoding="latin-1")

    return self.parse_content(content)

`validate_mora_count(line, expected=MORAS_PER_LINE)`

Verify that the line has the expected number of moras.

Parameters:

Name	Type	Description	Default
`line`	`str`	Raw line text	required
`expected`	`int`	Expected mora count (default: 7)	`MORAS_PER_LINE`

Returns:

Type	Description
`bool`	True if mora count matches expected

Source code in src/core/reclist_parser.py

def validate_mora_count(self, line: str, expected: int = MORAS_PER_LINE) -> bool:
    """Verify that the line has the expected number of moras.

    Args:
        line: Raw line text
        expected: Expected mora count (default: 7)

    Returns:
        True if mora count matches expected
    """
    segments = line.split("_")
    return len(segments) == expected

Data models for VocalParam.

This module contains all dataclasses and enums used throughout the application. Following the specification in Section 6-7 of the design document.

`OtoEntry` `dataclass`

Represents a single entry in oto.ini file.

Format: filename.wav=alias,offset,consonant,cutoff,preutter,overlap

Attributes:

Name	Type	Description
`filename`	`str`	WAV file name
`alias`	`str`	Phonetic alias (e.g., "- ba" or "a be")
`offset`	`float`	Start position in ms (cian/cyan line)
`consonant`	`float`	Fixed consonant region in ms (dark blue line)
`cutoff`	`float`	End position in ms, negative = from end (pink/magenta line)
`preutter`	`float`	Pre-utterance point in ms (red line)
`overlap`	`float`	Overlap region in ms (green line)

Source code in src/core/models.py

@dataclass
class OtoEntry:
    """Represents a single entry in oto.ini file.

    Format: filename.wav=alias,offset,consonant,cutoff,preutter,overlap

    Attributes:
        filename: WAV file name
        alias: Phonetic alias (e.g., "- ba" or "a be")
        offset: Start position in ms (cian/cyan line)
        consonant: Fixed consonant region in ms (dark blue line)
        cutoff: End position in ms, negative = from end (pink/magenta line)
        preutter: Pre-utterance point in ms (red line)
        overlap: Overlap region in ms (green line)
    """
    filename: str
    alias: str
    offset: float
    consonant: float
    cutoff: float
    preutter: float
    overlap: float
    comment: str = ""

    def to_oto_line(self) -> str:
        """Convert to oto.ini format string."""
        return (
            f"{self.filename}={self.alias},"
            f"{self.offset:.1f},{self.consonant:.1f},"
            f"{self.cutoff:.1f},{self.preutter:.1f},{self.overlap:.1f}"
            f" #{self.comment}" if self.comment else ""
        )

    @classmethod
    def from_oto_line(cls, line: str) -> "OtoEntry":
        """Parse an oto.ini format line."""
        # Split filename from parameters
        filename_part, params_part = line.strip().split("=", 1)
        parts = params_part.split(",")

        return cls(
            filename=filename_part,
            alias=parts[0],
            offset=float(parts[1]),
            consonant=float(parts[2]),
            cutoff=float(parts[3]),
            preutter=float(parts[4]),
            overlap=float(parts[5]),
            comment="" # TODO: Parse comment if present
        )

    def validate(self) -> List[str]:
        """Validate OTO parameters.

        Checks:
        - Overlap <= Preutterance (Gold Rule)

        Returns:
            List of error messages.
        """
        errors = []
        if self.overlap > self.preutter:
            errors.append(f"Overlap ({self.overlap}) cannot be greater than Preutterance ({self.preutter})")
        return errors

`from_oto_line(line)` `classmethod`

Parse an oto.ini format line.

Source code in src/core/models.py

@classmethod
def from_oto_line(cls, line: str) -> "OtoEntry":
    """Parse an oto.ini format line."""
    # Split filename from parameters
    filename_part, params_part = line.strip().split("=", 1)
    parts = params_part.split(",")

    return cls(
        filename=filename_part,
        alias=parts[0],
        offset=float(parts[1]),
        consonant=float(parts[2]),
        cutoff=float(parts[3]),
        preutter=float(parts[4]),
        overlap=float(parts[5]),
        comment="" # TODO: Parse comment if present
    )

`to_oto_line()`

Convert to oto.ini format string.

Source code in src/core/models.py

def to_oto_line(self) -> str:
    """Convert to oto.ini format string."""
    return (
        f"{self.filename}={self.alias},"
        f"{self.offset:.1f},{self.consonant:.1f},"
        f"{self.cutoff:.1f},{self.preutter:.1f},{self.overlap:.1f}"
        f" #{self.comment}" if self.comment else ""
    )

`validate()`

Validate OTO parameters.

Checks: - Overlap <= Preutterance (Gold Rule)

Returns:

Type	Description
`List[str]`	List of error messages.

Source code in src/core/models.py

def validate(self) -> List[str]:
    """Validate OTO parameters.

    Checks:
    - Overlap <= Preutterance (Gold Rule)

    Returns:
        List of error messages.
    """
    errors = []
    if self.overlap > self.preutter:
        errors.append(f"Overlap ({self.overlap}) cannot be greater than Preutterance ({self.preutter})")
    return errors

`PhonemeType`

Bases: Enum

Classification of phoneme segments.

Based on RF-02.1 specification: - VV: Pure vowels - CV: Consonant + Vowel (basic) - VCV: Vowel-Consonant-Vowel transitions - VC: Vowel + Consonant (coda) - CCR: Consonant clusters (right) - CCL: Consonant clusters (left) - DIP: Diphthongs (palatization/labialization) - R: Breaths/respirations

Source code in src/core/models.py

class PhonemeType(Enum):
    """Classification of phoneme segments.

    Based on RF-02.1 specification:
    - VV: Pure vowels
    - CV: Consonant + Vowel (basic)
    - VCV: Vowel-Consonant-Vowel transitions
    - VC: Vowel + Consonant (coda)
    - CCR: Consonant clusters (right)
    - CCL: Consonant clusters (left)
    - DIP: Diphthongs (palatization/labialization)
    - R: Breaths/respirations
    """
    VV = auto()   # Pure vowels (a_a_i_a_u_e_o)
    CV = auto()   # Consonant-Vowel (ba, ka, sa...)
    VCV = auto()  # Vowel-Consonant-Vowel transitions
    VC = auto()   # Vowel-Consonant codas
    CCR = auto()  # Consonant clusters (pr, tr, kr...)
    CCL = auto()  # Consonant clusters left
    DIP = auto()  # Diphthongs
    R = auto()    # Breaths/respirations

`PhoneticLine` `dataclass`

Represents a single line from the Reclist.

As specified in Section 6, MÓDULO 1.

Attributes:

Name	Type	Description
`index`	`int`	Line number (001, 002...)
`raw_text`	`str`	Original text from reclist (e.g., "ba_be_bi_bo_bu_ba_b")
`segments`	`List[str]`	List of individual segments (e.g., ["ba", "be", "bi"...])
`phoneme_types`	`List[PhonemeType]`	Classification of each segment
`expected_duration_ms`	`float`	Calculated duration based on BPM
`filename`	`str`	Generated WAV filename

Source code in src/core/models.py

@dataclass
class PhoneticLine:
    """Represents a single line from the Reclist.

    As specified in Section 6, MÓDULO 1.

    Attributes:
        index: Line number (001, 002...)
        raw_text: Original text from reclist (e.g., "ba_be_bi_bo_bu_ba_b")
        segments: List of individual segments (e.g., ["ba", "be", "bi"...])
        phoneme_types: Classification of each segment
        expected_duration_ms: Calculated duration based on BPM
        filename: Generated WAV filename
    """
    index: int
    raw_text: str
    segments: List[str]
    phoneme_types: List[PhonemeType]
    expected_duration_ms: float
    filename: str

    @property
    def mora_count(self) -> int:
        """Number of moras (segments) in this line."""
        return len(self.segments)

`mora_count` `property`

Number of moras (segments) in this line.

`ProjectData` `dataclass`

Complete project state for serialization (JSON).

Based on Section 7 data model specification.

Source code in src/core/models.py

@dataclass
class ProjectData:
    """Complete project state for serialization (JSON).

    Based on Section 7 data model specification.
    """
    project_name: str
    bpm: int
    reclist_path: str
    output_directory: str
    recordings: List[Recording] = field(default_factory=list)
    created_at: datetime = field(default_factory=datetime.now)
    last_modified: datetime = field(default_factory=datetime.now)
    version: str = "1.0.0"

    def to_dict(self) -> dict:
        """Convert to dictionary for JSON serialization."""
        return {
            "project_name": self.project_name,
            "bpm": self.bpm,
            "reclist_path": self.reclist_path,
            "output_directory": self.output_directory,
            "recordings": [
                {
                    "line_index": r.line_index,
                    "filename": r.filename,
                    "status": r.status.value,
                    "duration_ms": r.duration_ms,
                    "hash": r.hash,
                    "oto_entries": [
                        {
                            "alias": e.alias,
                            "offset": e.offset,
                            "consonant": e.consonant,
                            "cutoff": e.cutoff,
                            "preutter": e.preutter,
                            "overlap": e.overlap,
                            "comment": e.comment,
                        }
                        for e in r.oto_entries
                    ],
                }
                for r in self.recordings
            ],
            "metadata": {
                "created_at": self.created_at.isoformat(),
                "last_modified": self.last_modified.isoformat(),
                "version": self.version,
            },
        }

    @classmethod
    def from_dict(cls, data: dict) -> "ProjectData":
        """Create ProjectData from dictionary with validation.

        Raises:
             ValueError: If required fields are missing.
        """
        required_fields = ["project_name", "bpm", "reclist_path", "output_directory"]
        missing = [f for f in required_fields if f not in data]
        if missing:
            raise ValueError(f"Missing required fields in project data: {', '.join(missing)}")

        recordings = []
        for r in data.get("recordings", []):
            oto_entries = [
                OtoEntry(
                    filename=r["filename"],
                    alias=e["alias"],
                    offset=e["offset"],
                    consonant=e["consonant"],
                    cutoff=e["cutoff"],
                    preutter=e["preutter"],
                    overlap=e["overlap"],
                    comment=e.get("comment", ""),
                )
                for e in r.get("oto_entries", [])
            ]
            recordings.append(Recording(
                line_index=r["line_index"],
                filename=r["filename"],
                status=RecordingStatus(r["status"]),
                duration_ms=r.get("duration_ms", 0.0),
                hash=r.get("hash"),
                oto_entries=oto_entries,
            ))

        metadata = data.get("metadata", {})
        return cls(
            project_name=data["project_name"],
            bpm=data["bpm"],
            reclist_path=data["reclist_path"],
            output_directory=data["output_directory"],
            recordings=recordings,
            created_at=datetime.fromisoformat(metadata.get("created_at", datetime.now().isoformat())),
            last_modified=datetime.fromisoformat(metadata.get("last_modified", datetime.now().isoformat())),
            version=metadata.get("version", "1.0.0"),
        )

    def validate(self) -> List[str]:
        """Validate project data consistency.

        Returns:
            List of error messages, empty if valid.
        """
        errors = []
        if not self.project_name:
            errors.append("Project name cannot be empty")
        if self.bpm < 40 or self.bpm > 300:
            errors.append(f"Invalid BPM: {self.bpm}")

        # Check integrity of recordings
        for i, rec in enumerate(self.recordings):
            if not rec.filename:
                errors.append(f"Recording at index {i} has no filename")
            if rec.duration_ms < 0:
                errors.append(f"Invalid duration for {rec.filename}")

        return errors

`from_dict(data)` `classmethod`

Create ProjectData from dictionary with validation.

Raises:

Type	Description
`ValueError`	If required fields are missing.

Source code in src/core/models.py

@classmethod
def from_dict(cls, data: dict) -> "ProjectData":
    """Create ProjectData from dictionary with validation.

    Raises:
         ValueError: If required fields are missing.
    """
    required_fields = ["project_name", "bpm", "reclist_path", "output_directory"]
    missing = [f for f in required_fields if f not in data]
    if missing:
        raise ValueError(f"Missing required fields in project data: {', '.join(missing)}")

    recordings = []
    for r in data.get("recordings", []):
        oto_entries = [
            OtoEntry(
                filename=r["filename"],
                alias=e["alias"],
                offset=e["offset"],
                consonant=e["consonant"],
                cutoff=e["cutoff"],
                preutter=e["preutter"],
                overlap=e["overlap"],
                comment=e.get("comment", ""),
            )
            for e in r.get("oto_entries", [])
        ]
        recordings.append(Recording(
            line_index=r["line_index"],
            filename=r["filename"],
            status=RecordingStatus(r["status"]),
            duration_ms=r.get("duration_ms", 0.0),
            hash=r.get("hash"),
            oto_entries=oto_entries,
        ))

    metadata = data.get("metadata", {})
    return cls(
        project_name=data["project_name"],
        bpm=data["bpm"],
        reclist_path=data["reclist_path"],
        output_directory=data["output_directory"],
        recordings=recordings,
        created_at=datetime.fromisoformat(metadata.get("created_at", datetime.now().isoformat())),
        last_modified=datetime.fromisoformat(metadata.get("last_modified", datetime.now().isoformat())),
        version=metadata.get("version", "1.0.0"),
    )

`to_dict()`

Convert to dictionary for JSON serialization.

Source code in src/core/models.py

def to_dict(self) -> dict:
    """Convert to dictionary for JSON serialization."""
    return {
        "project_name": self.project_name,
        "bpm": self.bpm,
        "reclist_path": self.reclist_path,
        "output_directory": self.output_directory,
        "recordings": [
            {
                "line_index": r.line_index,
                "filename": r.filename,
                "status": r.status.value,
                "duration_ms": r.duration_ms,
                "hash": r.hash,
                "oto_entries": [
                    {
                        "alias": e.alias,
                        "offset": e.offset,
                        "consonant": e.consonant,
                        "cutoff": e.cutoff,
                        "preutter": e.preutter,
                        "overlap": e.overlap,
                        "comment": e.comment,
                    }
                    for e in r.oto_entries
                ],
            }
            for r in self.recordings
        ],
        "metadata": {
            "created_at": self.created_at.isoformat(),
            "last_modified": self.last_modified.isoformat(),
            "version": self.version,
        },
    }

`validate()`

Validate project data consistency.

Returns:

Type	Description
`List[str]`	List of error messages, empty if valid.

Source code in src/core/models.py

def validate(self) -> List[str]:
    """Validate project data consistency.

    Returns:
        List of error messages, empty if valid.
    """
    errors = []
    if not self.project_name:
        errors.append("Project name cannot be empty")
    if self.bpm < 40 or self.bpm > 300:
        errors.append(f"Invalid BPM: {self.bpm}")

    # Check integrity of recordings
    for i, rec in enumerate(self.recordings):
        if not rec.filename:
            errors.append(f"Recording at index {i} has no filename")
        if rec.duration_ms < 0:
            errors.append(f"Invalid duration for {rec.filename}")

    return errors

`Recording` `dataclass`

Represents a single recording with its oto entries.

Source code in src/core/models.py

@dataclass
class Recording:
    """Represents a single recording with its oto entries."""
    line_index: int
    filename: str
    status: RecordingStatus = RecordingStatus.PENDING
    duration_ms: float = 0.0
    hash: Optional[str] = None
    oto_entries: List[OtoEntry] = field(default_factory=list)

`RecordingStatus`

Bases: Enum

Status of a recording line.

Source code in src/core/models.py

class RecordingStatus(Enum):
    """Status of a recording line."""
    PENDING = "pending"
    RECORDED = "recorded"
    VALIDATED = "validated"

Referencia Core

ReclistParseError

ReclistParser

__init__(bpm=DEFAULT_BPM)

detect_phoneme_type(segment)

get_line_summary(line)

parse_content(content)

parse_file(filepath)

validate_mora_count(line, expected=MORAS_PER_LINE)

OtoEntry dataclass

from_oto_line(line) classmethod

to_oto_line()

validate()

PhonemeType

PhoneticLine dataclass

mora_count property

ProjectData dataclass

from_dict(data) classmethod

to_dict()

validate()

Recording dataclass

RecordingStatus

`ReclistParseError`

`ReclistParser`

`init(bpm=DEFAULT_BPM)`

`detect_phoneme_type(segment)`

`get_line_summary(line)`

`parse_content(content)`

`parse_file(filepath)`

`validate_mora_count(line, expected=MORAS_PER_LINE)`

`OtoEntry` `dataclass`

`from_oto_line(line)` `classmethod`

`to_oto_line()`

`validate()`

`PhonemeType`

`PhoneticLine` `dataclass`

`mora_count` `property`

`ProjectData` `dataclass`

`from_dict(data)` `classmethod`

`to_dict()`

`validate()`

`Recording` `dataclass`

`RecordingStatus`