import axios from "axios";
import { fileURLToPath } from "url";
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const bsbPath = path.join(__dirname, "files", "en_bsb.txt");
// Books that have red letters (Jesus's words) with their BibleHub URL slugs
// bookIndex matches the en_bsb.txt field
{ idx: 39, slug: "matthew", chapters: 28 },
{ idx: 40, slug: "mark", chapters: 16 },
{ idx: 41, slug: "luke", chapters: 24 },
{ idx: 42, slug: "john", chapters: 21 },
{ idx: 43, slug: "acts", chapters: 28 },
{ idx: 45, slug: "1_corinthians", chapters: 16 },
{ idx: 46, slug: "2_corinthians", chapters: 13 },
{ idx: 65, slug: "revelation", chapters: 22 },
function decodeHtmlEntities(str) {
.replace(/“/g, "\u201C")
.replace(/”/g, "\u201D")
.replace(/‘/g, "\u2018")
.replace(/’/g, "\u2019")
.replace(/—/g, "\u2014")
.replace(/–/g, "\u2013")
.replace(/…/g, "\u2026")
.replace(/&#\d+;/g, (m) => String.fromCharCode(parseInt(m.slice(2, -1))));
function stripHtml(str) {
return str.replace(/<[^>]+>/g, "");
function normalizeText(str) {
return str.replace(/\s+/g, " ").trim();
// Parse a chapter page and return verse data: Map<verseNum, { parts: [{text, red}] }>
function parseChapter(html) {
const verses = new Map();
// Extract just the chapter content div
const chapMatch = html.match(/<div class="chap">([\s\S]*?)<\/div>\s*(?:<div|<A name="fn")/);
if (!chapMatch) return verses;
let content = chapMatch[1];
// Remove headings, cross-references, footnote markers
content = content.replace(/<p class="hdg">[\s\S]*?(?=<p class="reg">)/g, "");
content = content.replace(/<span class="cross">[\s\S]*?<\/span>/g, "");
content = content.replace(/<span class="fn">[\s\S]*?<\/span>/g, "");
content = content.replace(/<A name="\d+">/g, "");
content = content.replace(/<p class="indent[^"]*">/g, " ");
content = content.replace(/<p class="reg">/g, "");
// Split at verse reference markers to get per-verse segments
// Pattern: <span class="reftext"><a href="/BOOK/CHAPTER-VERSE.htm"><b>VERSE_NUM</b></a></span>
const versePattern = /<span class="reftext"><a href="[^"]+"><b>(\d+)<\/b><\/a><\/span>/g;
while ((match = versePattern.exec(content)) !== null) {
splits.push({ verseNum: parseInt(match[1]), index: match.index, afterIndex: match.index + match[0].length });
for (let i = 0; i < splits.length; i++) {
const verseNum = splits[i].verseNum;
const start = splits[i].afterIndex;
const end = i + 1 < splits.length ? splits[i + 1].index : content.length;
const segment = content.substring(start, end);
// Parse red spans within this verse segment
const redPattern = /<span class="red">([\s\S]*?)<\/span>/g;
while ((redMatch = redPattern.exec(segment)) !== null) {
if (redMatch.index > lastIdx) {
const before = normalizeText(decodeHtmlEntities(stripHtml(segment.substring(lastIdx, redMatch.index))));
if (before) parts.push({ text: before, red: false });
const redText = normalizeText(decodeHtmlEntities(stripHtml(redMatch[1])));
if (redText) parts.push({ text: redText, red: true });
lastIdx = redMatch.index + redMatch[0].length;
if (lastIdx < segment.length) {
const after = normalizeText(decodeHtmlEntities(stripHtml(segment.substring(lastIdx))));
if (after) parts.push({ text: after, red: false });
if (parts.some((p) => p.red)) {
verses.set(verseNum, parts);
// Given verse text from file and parsed red parts, return text with <red> tags
function applyRedTags(verseText, parts) {
// Check if entire verse is red
const allRed = parts.every((p) => p.red);
// Preserve leading ¶ and whitespace
const match = verseText.match(/^([¶\s]*)(.*)/s);
const leading = match[1] || "";
const content = match[2] || "";
return `${leading}<red>${content}</red>`;
// For partial red, try to locate each red segment in the verse text
for (const part of parts) {
// Try to find the red text in the verse
// Normalize both for comparison, then find position in original
const redNorm = part.text;
// Try exact substring match first
let idx = result.indexOf(redNorm);
// Try matching with flexible whitespace
const escaped = redNorm.replace(/[.*+?^${}()|[\]\\]/g, "\\$&").replace(/\s+/g, "\\s+");
const flexMatch = result.match(new RegExp(escaped));
const found = flexMatch[0];
result = result.substring(0, idx) + "<red>" + found + "</red>" + result.substring(idx + found.length);
// Try with quotes stripped from beginning/end for partial quote matching
// Sometimes the red span includes opening quote but the text starts differently
const stripped = redNorm.replace(/^[\u201C\u201D\u2018\u2019"']+|[\u201C\u201D\u2018\u2019"']+$/g, "");
if (stripped.length > 10) {
idx = result.indexOf(stripped);
// Expand to include surrounding quotes if present
let end = idx + stripped.length;
while (start > 0 && /[\u201C\u201D\u2018\u2019"']/.test(result[start - 1])) start--;
while (end < result.length && /[\u201C\u201D\u2018\u2019"']/.test(result[end])) end++;
const found = result.substring(start, end);
result = result.substring(0, start) + "<red>" + found + "</red>" + result.substring(end);
// Skip if we can't find the text
result = result.substring(0, idx) + "<red>" + redNorm + "</red>" + result.substring(idx + redNorm.length);
// Merge adjacent red tags: </red><red> or </red> <red> etc
result = result.replace(/<\/red>\s*<red>/g, "");
async function fetchChapter(slug, chapter) {
const url = `https://biblehub.com/bsb/${slug}/${chapter}.htm`;
const resp = await axios.get(url, {
headers: { "User-Agent": "Mozilla/5.0" },
console.error(` Failed to fetch ${url}: ${e.message}`);
return new Promise((r) => setTimeout(r, ms));
// Step 1: Read BSB file and strip existing <red> tags
console.log("Reading en_bsb.txt and stripping existing <red> tags...");
let lines = fs.readFileSync(bsbPath, "utf-8").split("\n");
lines = lines.map((line) => line.replace(/<\/?red>/g, ""));
// Index lines by bookIdx:chapterIdx:verseIdx
const lineIndex = new Map();
for (let i = 0; i < lines.length; i++) {
const parts = lines[i].split("|");
if (parts.length < 7) continue;
const key = `${parts[1]}:${parts[2]}:${parts[3]}`;
// Step 2: Scrape each book/chapter
for (const book of BOOKS) {
console.log(`\nProcessing ${book.slug} (${book.chapters} chapters)...`);
for (let ch = 1; ch <= book.chapters; ch++) {
const html = await fetchChapter(book.slug, ch);
const redVerses = parseChapter(html);
if (redVerses.size === 0) continue;
for (const [verseNum, parts] of redVerses) {
// BibleHub uses 1-based verses, our file uses 0-based
const vIdx = verseNum - 1;
const key = `${book.idx}:${chIdx}:${vIdx}`;
const lineIdx = lineIndex.get(key);
if (lineIdx === undefined) {
console.warn(` Verse not found: ${book.slug} ${ch}:${verseNum} (key=${key})`);
const lineParts = lines[lineIdx].split("|");
const prefix = lineParts.slice(0, 6).join("|");
const verseText = lineParts.slice(6).join("|");
const tagged = applyRedTags(verseText, parts);
if (tagged !== verseText) {
lines[lineIdx] = `${prefix}|${tagged}`;
if (parts.every((p) => p.red)) totalFull++;
// Small delay to be polite
console.log(` ${book.slug}: tagged ${bookTagged} verses`);
totalTagged += bookTagged;
// Step 3: Write updated file
fs.writeFileSync(bsbPath, lines.join("\n"), "utf-8");
console.log(`\nDone! Tagged ${totalTagged} verses total (${totalFull} full, ${totalPartial} partial)`);
main().catch(console.error);