only-bible-app

GIT_CONFIG_PARAMETERS="'http.version=HTTP/1.1'" git clone https://git.pyrossh.dev/only-bible-app.git
Discussions: https://groups.google.com/g/rust-embed-devs

The only bible app you will ever need. No ads. No in-app purchases. No distractions.

Readme
Commits
Files
scripts/addRedLettersNepali.js

Contents
History
Blame
import fs from "fs";
import path from "path";
import { fileURLToPath } from "url";

const __dirname = path.dirname(fileURLToPath(import.meta.url));
const nePath = path.join(__dirname, "files", "ne.txt");
// Reuse Hindi IRV USFM as the source of red letter verse IDs/structure
const usfmDir = path.join(__dirname, "hin2017_usfm");

// USFM file prefix -> ne.txt book index (0-based)
const BOOKS = [
  { usfmFile: "46-MAThin2017.usfm", bookIdx: 39 },
  { usfmFile: "47-MRKhin2017.usfm", bookIdx: 40 },
  { usfmFile: "48-LUKhin2017.usfm", bookIdx: 41 },
  { usfmFile: "49-JHNhin2017.usfm", bookIdx: 42 },
  { usfmFile: "50-ACThin2017.usfm", bookIdx: 43 },
  { usfmFile: "52-1COhin2017.usfm", bookIdx: 45 },
  { usfmFile: "77-2COhin2017.usfm", bookIdx: 46 },
  { usfmFile: "72-REVhin2017.usfm", bookIdx: 65 },
];

// ===== USFM Parsing (reused from addRedLettersHindi.js) =====

function parseUSFM(filePath) {
  const raw = fs.readFileSync(filePath, "utf-8");
  const lines = raw.split("\n");
  const verseData = new Map();
  let currentChapter = 0;

  for (const line of lines) {
    const chapMatch = line.match(/^\\c\s+(\d+)/);
    if (chapMatch) {
      currentChapter = parseInt(chapMatch[1]);
      continue;
    }

    const verseMatch = line.match(/^\\v\s+(\d+)\s+(.*)/);
    if (!verseMatch) {
      if (line.match(/\\wj/) && verseData.size > 0) {
        const keys = [...verseData.keys()].filter((k) => k.startsWith(`${currentChapter}:`));
        if (keys.length > 0) {
          const lastKey = keys[keys.length - 1];
          const entry = verseData.get(lastKey);
          const contSegments = extractRedSegments(line);
          entry.segments.push(...contSegments);
          entry.fullyRed = entry.segments.every((s) => s.red || !s.text.trim());
        }
      }
      continue;
    }

    const verseNum = parseInt(verseMatch[1]);
    const verseContent = verseMatch[2];
    const segments = extractRedSegments(verseContent);
    const hasRed = segments.some((s) => s.red && s.text.trim());

    if (hasRed) {
      const fullyRed = segments.every((s) => s.red || !s.text.trim());
      const key = `${currentChapter}:${verseNum}`;

      if (verseData.has(key)) {
        const entry = verseData.get(key);
        entry.segments.push(...segments);
        entry.fullyRed = entry.fullyRed && fullyRed;
      } else {
        verseData.set(key, { segments, fullyRed });
      }
    }
  }

  return verseData;
}

function extractRedSegments(text) {
  text = text.replace(/\\f\s+.*?\\f\*/g, "");
  text = text.replace(/\\x\s+.*?\\x\*/g, "");
  text = text.replace(/\\bdit\s+.*?\\bdit\*/g, "");

  const segments = [];
  let pos = 0;
  let inRed = false;

  while (pos < text.length) {
    if (!inRed) {
      const wjIdx = text.indexOf("\\wj", pos);
      if (wjIdx === -1) {
        const remaining = cleanUSFM(text.substring(pos));
        if (remaining) segments.push({ text: remaining, red: false });
        break;
      }
      const before = cleanUSFM(text.substring(pos, wjIdx));
      if (before) segments.push({ text: before, red: false });
      const afterMarker = text.indexOf(" ", wjIdx);
      pos = afterMarker !== -1 && afterMarker - wjIdx < 6 ? afterMarker + 1 : wjIdx + 4;
      inRed = true;
    } else {
      const endIdx = text.indexOf("\\wj*", pos);
      const endIdx2 = text.indexOf("\\+wj*", pos);
      let actualEnd;

      if (endIdx === -1 && endIdx2 === -1) {
        const remaining = cleanUSFM(text.substring(pos));
        if (remaining) segments.push({ text: remaining, red: true });
        break;
      }

      if (endIdx === -1) actualEnd = endIdx2;
      else if (endIdx2 === -1) actualEnd = endIdx;
      else actualEnd = Math.min(endIdx, endIdx2);

      const redText = cleanUSFM(text.substring(pos, actualEnd));
      if (redText) segments.push({ text: redText, red: true });

      const isPlus = text[actualEnd + 1] === "+";
      pos = actualEnd + (isPlus ? 5 : 4);

      const nextWj = text.indexOf("\\wj", pos);
      const nextText = text.substring(pos, nextWj !== -1 ? nextWj : text.length);
      const cleanNext = cleanUSFM(nextText).trim();

      if (nextWj !== -1 && !cleanNext) {
        const afterMarker2 = text.indexOf(" ", nextWj);
        pos = afterMarker2 !== -1 && afterMarker2 - nextWj < 6 ? afterMarker2 + 1 : nextWj + 4;
      } else {
        inRed = false;
      }
    }
  }

  return segments;
}

function cleanUSFM(text) {
  return text
    .replace(/\\[a-z+]+\*/g, "")
    .replace(/\\[a-z]+\d?\s?/g, "")
    .replace(/\s+/g, " ")
    .trim();
}

// ===== Nepali-specific split logic =====

/**
 * Nepali speech markers that indicate where Jesus starts/stops speaking.
 * Ordered roughly by specificity/frequency.
 */
const SPEECH_MARKERS = [
  // "answered" patterns
  "उत्तर दिनुभयो,",
  "उत्तर दिनुभयो;",
  "उत्तर दिनुभयो:",
  "उत्तर दिए,",
  "उत्तर दिए;",
  "जवाफ दिनुभयो,",
  "जवाफ दिनुभयो;",
  "जवाफमा भन्नुभयो,",
  // "said" patterns
  "भन्नुभयो,",
  "भन्नुभयो;",
  "भन्नुभयो:",
  "भनुभयो,",
  "भने,",
  "भने;",
  "भने:",
  "भन्यो,",
  "भन्यो;",
  "भन्नुहुन्छ,",
  "भन्नुहुन्छ;",
  // "called out/rebuked" etc.
  "पुकारेर भन्नुभयो,",
  "गर्जेर भन्नुभयो,",
  "डाँटेर भन्नुभयो,",
  "सोध्नुभयो,",
  "सोधे,",
  // Curly quote start after speech marker (fallback)
  'भन्नुभयो, \u201C',
  'भने, \u201C',
];

/**
 * Find the best split point in Nepali verse text where red/non-red boundary should be.
 * Uses the USFM segment structure to know the pattern (prefix/red, red/suffix, both).
 */
function findRedBoundaries(neText, segments) {
  // Combine consecutive same-type segments
  const combined = [];
  for (const seg of segments) {
    if (combined.length > 0 && combined[combined.length - 1].red === seg.red) {
      combined[combined.length - 1].text += " " + seg.text;
    } else {
      combined.push({ ...seg });
    }
  }

  const hasLeadingNonRed = combined.length > 0 && !combined[0].red && combined[0].text.trim();
  const hasTrailingNonRed =
    combined.length > 0 && !combined[combined.length - 1].red && combined[combined.length - 1].text.trim();

  if (combined.every((s) => s.red)) {
    return { type: "full" };
  }

  if (hasLeadingNonRed && !hasTrailingNonRed) {
    // Pattern: narrative + red (Jesus speaks to end of verse)
    const splitIdx = findSplitPoint(neText, "prefix");
    if (splitIdx !== -1) {
      return { type: "prefix", splitIdx };
    }
  } else if (!hasLeadingNonRed && hasTrailingNonRed) {
    // Pattern: red + narrative (Jesus finishes, narrator continues)
    const splitIdx = findSplitPoint(neText, "suffix");
    if (splitIdx !== -1) {
      return { type: "suffix", splitIdx };
    }
  } else if (hasLeadingNonRed && hasTrailingNonRed) {
    // Pattern: narrative + red + narrative
    const prefixIdx = findSplitPoint(neText, "prefix");
    const suffixIdx = findSplitPoint(neText, "suffix");
    if (prefixIdx !== -1 && suffixIdx !== -1 && prefixIdx < suffixIdx) {
      return { type: "both", prefixIdx, suffixIdx };
    }
  }

  return null;
}

/**
 * Find the split point in Nepali text.
 * For "prefix": find where Jesus's speech starts (after speech marker + open quote).
 * For "suffix": find where Jesus's speech ends (closing quote before narrator resumes).
 */
function findSplitPoint(neText, position) {
  if (position === "prefix") {
    // Strategy 1: Find the LAST speech marker in the verse
    // (handles multi-speaker verses where Jesus speaks last)
    let bestIdx = -1;

    for (const marker of SPEECH_MARKERS) {
      let searchFrom = 0;
      let lastFound = -1;
      while (true) {
        const idx = neText.indexOf(marker, searchFrom);
        if (idx === -1) break;
        lastFound = idx;
        searchFrom = idx + 1;
      }
      if (lastFound !== -1) {
        let splitAt = lastFound + marker.length;
        // Skip whitespace after marker
        while (splitAt < neText.length && neText[splitAt] === " ") splitAt++;
        // Skip opening quote mark if present
        if (splitAt < neText.length && (neText[splitAt] === "\u201C" || neText[splitAt] === "'")) {
          splitAt++;
        }
        if (bestIdx === -1 || splitAt > bestIdx) {
          bestIdx = splitAt;
        }
      }
    }

    if (bestIdx !== -1) return bestIdx;

    // Strategy 2: Find the last opening curly quote preceded by comma/space
    // This catches patterns like: ...येशूले तिनीहरूलाई भने, "...
    const quotePattern = /[,;:]\s*\u201C/g;
    let lastMatch = null;
    let m;
    while ((m = quotePattern.exec(neText)) !== null) {
      lastMatch = m;
    }
    if (lastMatch) {
      let splitAt = lastMatch.index + lastMatch[0].length;
      return splitAt;
    }

    return -1;
  } else {
    // suffix: find where Jesus stops speaking
    // Strategy 1: Find the LAST closing curly quote followed by narrative text
    // Look for closing quote \u201D followed by non-quote text
    const lastCloseQuote = neText.lastIndexOf("\u201D");
    if (lastCloseQuote !== -1 && lastCloseQuote < neText.length - 2) {
      // There's text after the closing quote - that's the narrator's part
      return lastCloseQuote + 1;
    }

    // Strategy 2: Look for closing single quote
    const lastSingleClose = neText.lastIndexOf("'");
    if (lastSingleClose !== -1 && lastSingleClose < neText.length - 2) {
      return lastSingleClose + 1;
    }

    return -1;
  }
}

/**
 * Apply red tags to a single ne.txt line based on boundary information.
 */
function applyRedTags(line, boundaries) {
  const parts = line.split("|");
  if (parts.length < 7) return line;

  const prefix = parts.slice(0, 6).join("|");
  let text = parts.slice(6).join("|");

  // Don't double-tag
  if (text.includes("<red>")) return line;

  // Preserve leading ¶ and whitespace
  const match = text.match(/^([¶\s]*)(.*)/);
  const leading = match[1] || "";
  const content = match[2] || "";

  if (!content.trim()) return line;

  if (boundaries.type === "full") {
    return `${prefix}|${leading}<red>${content}</red>`;
  }

  const fullText = leading + content;

  if (boundaries.type === "prefix") {
    const splitIdx = boundaries.splitIdx;
    if (splitIdx >= 0 && splitIdx < fullText.length) {
      const before = fullText.substring(0, splitIdx);
      const after = fullText.substring(splitIdx);
      if (after.trim()) {
        return `${prefix}|${before}<red>${after}</red>`;
      }
    }
  }

  if (boundaries.type === "suffix") {
    const splitIdx = boundaries.splitIdx;
    if (splitIdx > 0 && splitIdx <= fullText.length) {
      const before = fullText.substring(0, splitIdx);
      const after = fullText.substring(splitIdx);
      if (before.trim()) {
        return `${prefix}|${leading}<red>${before.substring(leading.length)}</red>${after}`;
      }
    }
  }

  if (boundaries.type === "both") {
    const { prefixIdx, suffixIdx } = boundaries;
    if (prefixIdx >= 0 && suffixIdx > prefixIdx && suffixIdx <= fullText.length) {
      const before = fullText.substring(0, prefixIdx);
      const red = fullText.substring(prefixIdx, suffixIdx);
      const after = fullText.substring(suffixIdx);
      if (red.trim()) {
        return `${prefix}|${before}<red>${red}</red>${after}`;
      }
    }
  }

  return line;
}

// ============= MAIN =============

// Step 1: Parse Hindi IRV USFM files to get red letter structure
console.log("Parsing Hindi IRV USFM files for red letter verse IDs/structure...");
const allRedData = new Map(); // "bookIdx:chapter:verse" -> {segments, fullyRed}

for (const book of BOOKS) {
  const usfmPath = path.join(usfmDir, book.usfmFile);
  if (!fs.existsSync(usfmPath)) {
    console.log(`  Skipping ${book.usfmFile} (not found)`);
    continue;
  }

  const verseData = parseUSFM(usfmPath);
  let fullCount = 0;
  let partialCount = 0;

  for (const [chapVerse, data] of verseData) {
    const [chap, verse] = chapVerse.split(":");
    // USFM chapters are 1-based, ne.txt uses 0-based
    const key = `${book.bookIdx}:${parseInt(chap) - 1}:${parseInt(verse) - 1}`;
    allRedData.set(key, data);
    if (data.fullyRed) fullCount++;
    else partialCount++;
  }

  console.log(`  ${book.usfmFile}: ${fullCount} full, ${partialCount} partial`);
}

console.log(`Total USFM red data: ${allRedData.size} verses`);

// Step 2: Load ne.txt and apply red tags
const neLines = fs.readFileSync(nePath, "utf-8").split("\n");
let taggedFull = 0;
let taggedPartial = 0;
let skippedAlready = 0;
let failedPartial = 0;

const newLines = neLines.map((line) => {
  if (!line) return line;
  const parts = line.split("|");
  if (parts.length < 7) return line;

  const bookIdx = parts[1];
  const chapIdx = parts[2];
  const verseIdx = parts[3];
  const key = `${bookIdx}:${chapIdx}:${verseIdx}`;

  const redData = allRedData.get(key);
  if (!redData) return line;

  const text = parts.slice(6).join("|");
  if (text.includes("<red>")) {
    skippedAlready++;
    return line;
  }

  if (redData.fullyRed) {
    // Tag entire verse
    const match = text.match(/^([¶\s]*)(.*)/);
    const leading = match[1] || "";
    const content = match[2] || "";
    if (content.trim()) {
      taggedFull++;
      const prefix = parts.slice(0, 6).join("|");
      return `${prefix}|${leading}<red>${content}</red>`;
    }
    return line;
  }

  // Partially red verse - try to find boundaries using Nepali speech markers
  const boundaries = findRedBoundaries(text, redData.segments);

  if (boundaries) {
    if (boundaries.type === "full") {
      const match = text.match(/^([¶\s]*)(.*)/);
      const leading = match[1] || "";
      const content = match[2] || "";
      if (content.trim()) {
        taggedFull++;
        const prefix = parts.slice(0, 6).join("|");
        return `${prefix}|${leading}<red>${content}</red>`;
      }
    } else {
      const result = applyRedTags(line, boundaries);
      if (result !== line) {
        taggedPartial++;
        return result;
      }
    }
  }

  failedPartial++;
  return line;
});

fs.writeFileSync(nePath, newLines.join("\n"), "utf-8");

console.log(`\nResults:`);
console.log(`  Tagged full:     ${taggedFull}`);
console.log(`  Tagged partial:  ${taggedPartial}`);
console.log(`  Already tagged:  ${skippedAlready}`);
console.log(`  Failed partial:  ${failedPartial}`);
console.log(`  Total modified:  ${taggedFull + taggedPartial}`);
~repos /only-bible-app

GIT_CONFIG_PARAMETERS="'http.version=HTTP/1.1'" git clone https://git.pyrossh.dev/only-bible-app.git Discussions: https://groups.google.com/g/rust-embed-devs

The only bible app you will ever need. No ads. No in-app purchases. No distractions.

scripts/addRedLettersNepali.js

GIT_CONFIG_PARAMETERS="'http.version=HTTP/1.1'" git clone https://git.pyrossh.dev/only-bible-app.git
Discussions: https://groups.google.com/g/rust-embed-devs