~repos /only-bible-app

#kotlin#android#ios

GIT_CONFIG_PARAMETERS="'http.version=HTTP/1.1'" git clone https://git.pyrossh.dev/only-bible-app.git
Discussions: https://groups.google.com/g/rust-embed-devs

The only bible app you will ever need. No ads. No in-app purchases. No distractions.



scripts/addRedLettersNepali.js



import fs from "fs";
import path from "path";
import { fileURLToPath } from "url";
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const nePath = path.join(__dirname, "files", "ne.txt");
// Reuse Hindi IRV USFM as the source of red letter verse IDs/structure
const usfmDir = path.join(__dirname, "hin2017_usfm");
// USFM file prefix -> ne.txt book index (0-based)
const BOOKS = [
{ usfmFile: "46-MAThin2017.usfm", bookIdx: 39 },
{ usfmFile: "47-MRKhin2017.usfm", bookIdx: 40 },
{ usfmFile: "48-LUKhin2017.usfm", bookIdx: 41 },
{ usfmFile: "49-JHNhin2017.usfm", bookIdx: 42 },
{ usfmFile: "50-ACThin2017.usfm", bookIdx: 43 },
{ usfmFile: "52-1COhin2017.usfm", bookIdx: 45 },
{ usfmFile: "77-2COhin2017.usfm", bookIdx: 46 },
{ usfmFile: "72-REVhin2017.usfm", bookIdx: 65 },
];
// ===== USFM Parsing (reused from addRedLettersHindi.js) =====
function parseUSFM(filePath) {
const raw = fs.readFileSync(filePath, "utf-8");
const lines = raw.split("\n");
const verseData = new Map();
let currentChapter = 0;
for (const line of lines) {
const chapMatch = line.match(/^\\c\s+(\d+)/);
if (chapMatch) {
currentChapter = parseInt(chapMatch[1]);
continue;
}
const verseMatch = line.match(/^\\v\s+(\d+)\s+(.*)/);
if (!verseMatch) {
if (line.match(/\\wj/) && verseData.size > 0) {
const keys = [...verseData.keys()].filter((k) => k.startsWith(`${currentChapter}:`));
if (keys.length > 0) {
const lastKey = keys[keys.length - 1];
const entry = verseData.get(lastKey);
const contSegments = extractRedSegments(line);
entry.segments.push(...contSegments);
entry.fullyRed = entry.segments.every((s) => s.red || !s.text.trim());
}
}
continue;
}
const verseNum = parseInt(verseMatch[1]);
const verseContent = verseMatch[2];
const segments = extractRedSegments(verseContent);
const hasRed = segments.some((s) => s.red && s.text.trim());
if (hasRed) {
const fullyRed = segments.every((s) => s.red || !s.text.trim());
const key = `${currentChapter}:${verseNum}`;
if (verseData.has(key)) {
const entry = verseData.get(key);
entry.segments.push(...segments);
entry.fullyRed = entry.fullyRed && fullyRed;
} else {
verseData.set(key, { segments, fullyRed });
}
}
}
return verseData;
}
function extractRedSegments(text) {
text = text.replace(/\\f\s+.*?\\f\*/g, "");
text = text.replace(/\\x\s+.*?\\x\*/g, "");
text = text.replace(/\\bdit\s+.*?\\bdit\*/g, "");
const segments = [];
let pos = 0;
let inRed = false;
while (pos < text.length) {
if (!inRed) {
const wjIdx = text.indexOf("\\wj", pos);
if (wjIdx === -1) {
const remaining = cleanUSFM(text.substring(pos));
if (remaining) segments.push({ text: remaining, red: false });
break;
}
const before = cleanUSFM(text.substring(pos, wjIdx));
if (before) segments.push({ text: before, red: false });
const afterMarker = text.indexOf(" ", wjIdx);
pos = afterMarker !== -1 && afterMarker - wjIdx < 6 ? afterMarker + 1 : wjIdx + 4;
inRed = true;
} else {
const endIdx = text.indexOf("\\wj*", pos);
const endIdx2 = text.indexOf("\\+wj*", pos);
let actualEnd;
if (endIdx === -1 && endIdx2 === -1) {
const remaining = cleanUSFM(text.substring(pos));
if (remaining) segments.push({ text: remaining, red: true });
break;
}
if (endIdx === -1) actualEnd = endIdx2;
else if (endIdx2 === -1) actualEnd = endIdx;
else actualEnd = Math.min(endIdx, endIdx2);
const redText = cleanUSFM(text.substring(pos, actualEnd));
if (redText) segments.push({ text: redText, red: true });
const isPlus = text[actualEnd + 1] === "+";
pos = actualEnd + (isPlus ? 5 : 4);
const nextWj = text.indexOf("\\wj", pos);
const nextText = text.substring(pos, nextWj !== -1 ? nextWj : text.length);
const cleanNext = cleanUSFM(nextText).trim();
if (nextWj !== -1 && !cleanNext) {
const afterMarker2 = text.indexOf(" ", nextWj);
pos = afterMarker2 !== -1 && afterMarker2 - nextWj < 6 ? afterMarker2 + 1 : nextWj + 4;
} else {
inRed = false;
}
}
}
return segments;
}
function cleanUSFM(text) {
return text
.replace(/\\[a-z+]+\*/g, "")
.replace(/\\[a-z]+\d?\s?/g, "")
.replace(/\s+/g, " ")
.trim();
}
// ===== Nepali-specific split logic =====
/**
* Nepali speech markers that indicate where Jesus starts/stops speaking.
* Ordered roughly by specificity/frequency.
*/
const SPEECH_MARKERS = [
// "answered" patterns
"उत्तर दिनुभयो,",
"उत्तर दिनुभयो;",
"उत्तर दिनुभयो:",
"उत्तर दिए,",
"उत्तर दिए;",
"जवाफ दिनुभयो,",
"जवाफ दिनुभयो;",
"जवाफमा भन्नुभयो,",
// "said" patterns
"भन्नुभयो,",
"भन्नुभयो;",
"भन्नुभयो:",
"भनुभयो,",
"भने,",
"भने;",
"भने:",
"भन्यो,",
"भन्यो;",
"भन्नुहुन्छ,",
"भन्नुहुन्छ;",
// "called out/rebuked" etc.
"पुकारेर भन्नुभयो,",
"गर्जेर भन्नुभयो,",
"डाँटेर भन्नुभयो,",
"सोध्नुभयो,",
"सोधे,",
// Curly quote start after speech marker (fallback)
'भन्नुभयो, \u201C',
'भने, \u201C',
];
/**
* Find the best split point in Nepali verse text where red/non-red boundary should be.
* Uses the USFM segment structure to know the pattern (prefix/red, red/suffix, both).
*/
function findRedBoundaries(neText, segments) {
// Combine consecutive same-type segments
const combined = [];
for (const seg of segments) {
if (combined.length > 0 && combined[combined.length - 1].red === seg.red) {
combined[combined.length - 1].text += " " + seg.text;
} else {
combined.push({ ...seg });
}
}
const hasLeadingNonRed = combined.length > 0 && !combined[0].red && combined[0].text.trim();
const hasTrailingNonRed =
combined.length > 0 && !combined[combined.length - 1].red && combined[combined.length - 1].text.trim();
if (combined.every((s) => s.red)) {
return { type: "full" };
}
if (hasLeadingNonRed && !hasTrailingNonRed) {
// Pattern: narrative + red (Jesus speaks to end of verse)
const splitIdx = findSplitPoint(neText, "prefix");
if (splitIdx !== -1) {
return { type: "prefix", splitIdx };
}
} else if (!hasLeadingNonRed && hasTrailingNonRed) {
// Pattern: red + narrative (Jesus finishes, narrator continues)
const splitIdx = findSplitPoint(neText, "suffix");
if (splitIdx !== -1) {
return { type: "suffix", splitIdx };
}
} else if (hasLeadingNonRed && hasTrailingNonRed) {
// Pattern: narrative + red + narrative
const prefixIdx = findSplitPoint(neText, "prefix");
const suffixIdx = findSplitPoint(neText, "suffix");
if (prefixIdx !== -1 && suffixIdx !== -1 && prefixIdx < suffixIdx) {
return { type: "both", prefixIdx, suffixIdx };
}
}
return null;
}
/**
* Find the split point in Nepali text.
* For "prefix": find where Jesus's speech starts (after speech marker + open quote).
* For "suffix": find where Jesus's speech ends (closing quote before narrator resumes).
*/
function findSplitPoint(neText, position) {
if (position === "prefix") {
// Strategy 1: Find the LAST speech marker in the verse
// (handles multi-speaker verses where Jesus speaks last)
let bestIdx = -1;
for (const marker of SPEECH_MARKERS) {
let searchFrom = 0;
let lastFound = -1;
while (true) {
const idx = neText.indexOf(marker, searchFrom);
if (idx === -1) break;
lastFound = idx;
searchFrom = idx + 1;
}
if (lastFound !== -1) {
let splitAt = lastFound + marker.length;
// Skip whitespace after marker
while (splitAt < neText.length && neText[splitAt] === " ") splitAt++;
// Skip opening quote mark if present
if (splitAt < neText.length && (neText[splitAt] === "\u201C" || neText[splitAt] === "'")) {
splitAt++;
}
if (bestIdx === -1 || splitAt > bestIdx) {
bestIdx = splitAt;
}
}
}
if (bestIdx !== -1) return bestIdx;
// Strategy 2: Find the last opening curly quote preceded by comma/space
// This catches patterns like: ...येशूले तिनीहरूलाई भने, "...
const quotePattern = /[,;:]\s*\u201C/g;
let lastMatch = null;
let m;
while ((m = quotePattern.exec(neText)) !== null) {
lastMatch = m;
}
if (lastMatch) {
let splitAt = lastMatch.index + lastMatch[0].length;
return splitAt;
}
return -1;
} else {
// suffix: find where Jesus stops speaking
// Strategy 1: Find the LAST closing curly quote followed by narrative text
// Look for closing quote \u201D followed by non-quote text
const lastCloseQuote = neText.lastIndexOf("\u201D");
if (lastCloseQuote !== -1 && lastCloseQuote < neText.length - 2) {
// There's text after the closing quote - that's the narrator's part
return lastCloseQuote + 1;
}
// Strategy 2: Look for closing single quote
const lastSingleClose = neText.lastIndexOf("'");
if (lastSingleClose !== -1 && lastSingleClose < neText.length - 2) {
return lastSingleClose + 1;
}
return -1;
}
}
/**
* Apply red tags to a single ne.txt line based on boundary information.
*/
function applyRedTags(line, boundaries) {
const parts = line.split("|");
if (parts.length < 7) return line;
const prefix = parts.slice(0, 6).join("|");
let text = parts.slice(6).join("|");
// Don't double-tag
if (text.includes("<red>")) return line;
// Preserve leading ¶ and whitespace
const match = text.match(/^([¶\s]*)(.*)/);
const leading = match[1] || "";
const content = match[2] || "";
if (!content.trim()) return line;
if (boundaries.type === "full") {
return `${prefix}|${leading}<red>${content}</red>`;
}
const fullText = leading + content;
if (boundaries.type === "prefix") {
const splitIdx = boundaries.splitIdx;
if (splitIdx >= 0 && splitIdx < fullText.length) {
const before = fullText.substring(0, splitIdx);
const after = fullText.substring(splitIdx);
if (after.trim()) {
return `${prefix}|${before}<red>${after}</red>`;
}
}
}
if (boundaries.type === "suffix") {
const splitIdx = boundaries.splitIdx;
if (splitIdx > 0 && splitIdx <= fullText.length) {
const before = fullText.substring(0, splitIdx);
const after = fullText.substring(splitIdx);
if (before.trim()) {
return `${prefix}|${leading}<red>${before.substring(leading.length)}</red>${after}`;
}
}
}
if (boundaries.type === "both") {
const { prefixIdx, suffixIdx } = boundaries;
if (prefixIdx >= 0 && suffixIdx > prefixIdx && suffixIdx <= fullText.length) {
const before = fullText.substring(0, prefixIdx);
const red = fullText.substring(prefixIdx, suffixIdx);
const after = fullText.substring(suffixIdx);
if (red.trim()) {
return `${prefix}|${before}<red>${red}</red>${after}`;
}
}
}
return line;
}
// ============= MAIN =============
// Step 1: Parse Hindi IRV USFM files to get red letter structure
console.log("Parsing Hindi IRV USFM files for red letter verse IDs/structure...");
const allRedData = new Map(); // "bookIdx:chapter:verse" -> {segments, fullyRed}
for (const book of BOOKS) {
const usfmPath = path.join(usfmDir, book.usfmFile);
if (!fs.existsSync(usfmPath)) {
console.log(` Skipping ${book.usfmFile} (not found)`);
continue;
}
const verseData = parseUSFM(usfmPath);
let fullCount = 0;
let partialCount = 0;
for (const [chapVerse, data] of verseData) {
const [chap, verse] = chapVerse.split(":");
// USFM chapters are 1-based, ne.txt uses 0-based
const key = `${book.bookIdx}:${parseInt(chap) - 1}:${parseInt(verse) - 1}`;
allRedData.set(key, data);
if (data.fullyRed) fullCount++;
else partialCount++;
}
console.log(` ${book.usfmFile}: ${fullCount} full, ${partialCount} partial`);
}
console.log(`Total USFM red data: ${allRedData.size} verses`);
// Step 2: Load ne.txt and apply red tags
const neLines = fs.readFileSync(nePath, "utf-8").split("\n");
let taggedFull = 0;
let taggedPartial = 0;
let skippedAlready = 0;
let failedPartial = 0;
const newLines = neLines.map((line) => {
if (!line) return line;
const parts = line.split("|");
if (parts.length < 7) return line;
const bookIdx = parts[1];
const chapIdx = parts[2];
const verseIdx = parts[3];
const key = `${bookIdx}:${chapIdx}:${verseIdx}`;
const redData = allRedData.get(key);
if (!redData) return line;
const text = parts.slice(6).join("|");
if (text.includes("<red>")) {
skippedAlready++;
return line;
}
if (redData.fullyRed) {
// Tag entire verse
const match = text.match(/^([¶\s]*)(.*)/);
const leading = match[1] || "";
const content = match[2] || "";
if (content.trim()) {
taggedFull++;
const prefix = parts.slice(0, 6).join("|");
return `${prefix}|${leading}<red>${content}</red>`;
}
return line;
}
// Partially red verse - try to find boundaries using Nepali speech markers
const boundaries = findRedBoundaries(text, redData.segments);
if (boundaries) {
if (boundaries.type === "full") {
const match = text.match(/^([¶\s]*)(.*)/);
const leading = match[1] || "";
const content = match[2] || "";
if (content.trim()) {
taggedFull++;
const prefix = parts.slice(0, 6).join("|");
return `${prefix}|${leading}<red>${content}</red>`;
}
} else {
const result = applyRedTags(line, boundaries);
if (result !== line) {
taggedPartial++;
return result;
}
}
}
failedPartial++;
return line;
});
fs.writeFileSync(nePath, newLines.join("\n"), "utf-8");
console.log(`\nResults:`);
console.log(` Tagged full: ${taggedFull}`);
console.log(` Tagged partial: ${taggedPartial}`);
console.log(` Already tagged: ${skippedAlready}`);
console.log(` Failed partial: ${failedPartial}`);
console.log(` Total modified: ${taggedFull + taggedPartial}`);