Remove build artifacts, dev tool configs, unused dependencies, and third-party source dumps. Add proper README, update .gitignore, clean up Makefile.
227 lines
6.6 KiB
Java
227 lines
6.6 KiB
Java
package com.tiedup.remake.util;
|
|
|
|
import java.util.ArrayList;
|
|
import java.util.List;
|
|
import java.util.Set;
|
|
|
|
/**
|
|
* Analyzes word structure to preserve rhythm in gagged speech.
|
|
*/
|
|
public class SyllableAnalyzer {
|
|
|
|
private static final Set<Character> VOWELS = Set.of(
|
|
'a',
|
|
'e',
|
|
'i',
|
|
'o',
|
|
'u',
|
|
'y'
|
|
);
|
|
|
|
/**
|
|
* Count the number of syllables in a word.
|
|
*
|
|
* @param word The word to analyze
|
|
* @return The number of syllables (minimum 1)
|
|
*/
|
|
public static int countSyllables(String word) {
|
|
if (word == null || word.isEmpty()) {
|
|
return 0;
|
|
}
|
|
|
|
String cleanWord = word.toLowerCase().replaceAll("[^a-z]", "");
|
|
if (cleanWord.isEmpty()) {
|
|
return 1;
|
|
}
|
|
|
|
int count = 0;
|
|
boolean prevVowel = false;
|
|
|
|
for (int i = 0; i < cleanWord.length(); i++) {
|
|
boolean isVowel = VOWELS.contains(cleanWord.charAt(i));
|
|
|
|
if (isVowel && !prevVowel) {
|
|
count++;
|
|
}
|
|
prevVowel = isVowel;
|
|
}
|
|
|
|
// Handle silent 'e' at end (common in English/French)
|
|
if (cleanWord.length() > 2 && cleanWord.endsWith("e") && count > 1) {
|
|
char beforeE = cleanWord.charAt(cleanWord.length() - 2);
|
|
// Silent 'e' after consonant
|
|
if (!VOWELS.contains(beforeE) && !cleanWord.endsWith("le")) {
|
|
count--;
|
|
}
|
|
}
|
|
|
|
return Math.max(1, count);
|
|
}
|
|
|
|
/**
|
|
* Split a word into approximate syllables.
|
|
* This is a simplified algorithm that works for most Western languages.
|
|
*
|
|
* @param word The word to split
|
|
* @return List of syllables
|
|
*/
|
|
public static List<String> splitIntoSyllables(String word) {
|
|
List<String> syllables = new ArrayList<>();
|
|
|
|
if (word == null || word.isEmpty()) {
|
|
return syllables;
|
|
}
|
|
|
|
// Preserve original case and non-letter chars
|
|
StringBuilder current = new StringBuilder();
|
|
boolean prevVowel = false;
|
|
int vowelGroups = 0;
|
|
|
|
for (int i = 0; i < word.length(); i++) {
|
|
char c = word.charAt(i);
|
|
char lower = Character.toLowerCase(c);
|
|
boolean isVowel = VOWELS.contains(lower);
|
|
boolean isLetter = Character.isLetter(c);
|
|
|
|
if (!isLetter) {
|
|
// Non-letter characters stick with current syllable
|
|
current.append(c);
|
|
continue;
|
|
}
|
|
|
|
// Starting a new vowel group after consonants = potential syllable break
|
|
if (
|
|
isVowel && !prevVowel && vowelGroups > 0 && current.length() > 0
|
|
) {
|
|
// Check if we should split before this vowel
|
|
// Split if we have at least 2 consonants between vowels
|
|
int consonantCount = countTrailingConsonants(
|
|
current.toString()
|
|
);
|
|
if (consonantCount >= 2) {
|
|
// Keep one consonant with previous syllable, rest go with new
|
|
String syllable = current.substring(
|
|
0,
|
|
current.length() - consonantCount + 1
|
|
);
|
|
String carry = current.substring(
|
|
current.length() - consonantCount + 1
|
|
);
|
|
if (!syllable.isEmpty()) {
|
|
syllables.add(syllable);
|
|
}
|
|
current = new StringBuilder(carry);
|
|
} else if (consonantCount == 1 && current.length() > 1) {
|
|
// Single consonant goes with new syllable
|
|
String syllable = current.substring(
|
|
0,
|
|
current.length() - 1
|
|
);
|
|
String carry = current.substring(current.length() - 1);
|
|
if (!syllable.isEmpty()) {
|
|
syllables.add(syllable);
|
|
}
|
|
current = new StringBuilder(carry);
|
|
}
|
|
}
|
|
|
|
current.append(c);
|
|
|
|
if (isVowel && !prevVowel) {
|
|
vowelGroups++;
|
|
}
|
|
prevVowel = isVowel;
|
|
}
|
|
|
|
// Add remaining
|
|
if (current.length() > 0) {
|
|
syllables.add(current.toString());
|
|
}
|
|
|
|
// If we somehow got no syllables, return the whole word
|
|
if (syllables.isEmpty()) {
|
|
syllables.add(word);
|
|
}
|
|
|
|
return syllables;
|
|
}
|
|
|
|
/**
|
|
* Count trailing consonants in a string.
|
|
*/
|
|
private static int countTrailingConsonants(String s) {
|
|
int count = 0;
|
|
for (int i = s.length() - 1; i >= 0; i--) {
|
|
char c = Character.toLowerCase(s.charAt(i));
|
|
if (Character.isLetter(c) && !VOWELS.contains(c)) {
|
|
count++;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
return count;
|
|
}
|
|
|
|
/**
|
|
* Check if a syllable position is typically stressed.
|
|
* Simple heuristic: first syllable and syllables with long vowels.
|
|
*
|
|
* @param syllable The syllable content
|
|
* @param position Position in word (0-indexed)
|
|
* @param total Total number of syllables
|
|
* @return true if likely stressed
|
|
*/
|
|
public static boolean isStressedSyllable(
|
|
String syllable,
|
|
int position,
|
|
int total
|
|
) {
|
|
// First syllable is often stressed in English/Germanic
|
|
if (position == 0) {
|
|
return true;
|
|
}
|
|
|
|
// Last syllable in short words
|
|
if (total <= 2 && position == total - 1) {
|
|
return true;
|
|
}
|
|
|
|
// Syllables with double vowels or long vowels tend to be stressed
|
|
String lower = syllable.toLowerCase();
|
|
if (
|
|
lower.contains("aa") ||
|
|
lower.contains("ee") ||
|
|
lower.contains("oo") ||
|
|
lower.contains("ii") ||
|
|
lower.contains("uu") ||
|
|
lower.contains("ou") ||
|
|
lower.contains("ai") ||
|
|
lower.contains("ei")
|
|
) {
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Get the primary vowel of a syllable.
|
|
*
|
|
* @param syllable The syllable to analyze
|
|
* @return The primary vowel character, or 'a' as default
|
|
*/
|
|
public static char getPrimaryVowel(String syllable) {
|
|
if (syllable == null || syllable.isEmpty()) {
|
|
return 'a';
|
|
}
|
|
|
|
for (char c : syllable.toCharArray()) {
|
|
if (VOWELS.contains(Character.toLowerCase(c))) {
|
|
return Character.toLowerCase(c);
|
|
}
|
|
}
|
|
|
|
return 'a';
|
|
}
|
|
}
|