~repos /only-bible-app

#kotlin#android#ios

GIT_CONFIG_PARAMETERS="'http.version=HTTP/1.1'" git clone https://git.pyrossh.dev/only-bible-app.git
Discussions: https://groups.google.com/g/rust-embed-devs

The only bible app you will ever need. No ads. No in-app purchases. No distractions.



scripts/fetchAllBible.cjs



const https = require("https");
const fs = require("fs");
const path = require("path");
const cheerio = require("cheerio");
// Usage: node fetchAllBible.cjs <wpLangCode> <outputFile>
// e.g.: node fetchAllBible.cjs pa scripts/files/pa_new.txt
const wpLang = process.argv[2] || "pa";
const outputFile = process.argv[3] || path.join(__dirname, "files", wpLang + "_new.txt");
// Read book names from existing file (if any) or use a reference
const existingFile = path.join(__dirname, "files", wpLang + ".txt");
const bookNames = {};
if (fs.existsSync(existingFile)) {
const lines = fs.readFileSync(existingFile, "utf-8").split("\n");
for (const line of lines) {
if (!line.trim()) continue;
const parts = line.split("|");
if (parts.length >= 2) {
bookNames[parseInt(parts[1])] = parts[0];
}
}
}
// Standard Protestant canon: 66 books with chapter counts from KJV
const BOOKS = [
{ bookId: 0, wpBook: 1, chapters: 50 },
{ bookId: 1, wpBook: 2, chapters: 40 },
{ bookId: 2, wpBook: 3, chapters: 27 },
{ bookId: 3, wpBook: 4, chapters: 36 },
{ bookId: 4, wpBook: 5, chapters: 34 },
{ bookId: 5, wpBook: 6, chapters: 24 },
{ bookId: 6, wpBook: 7, chapters: 21 },
{ bookId: 7, wpBook: 8, chapters: 4 },
{ bookId: 8, wpBook: 9, chapters: 31 },
{ bookId: 9, wpBook: 10, chapters: 24 },
{ bookId: 10, wpBook: 11, chapters: 22 },
{ bookId: 11, wpBook: 12, chapters: 25 },
{ bookId: 12, wpBook: 13, chapters: 29 },
{ bookId: 13, wpBook: 14, chapters: 36 },
{ bookId: 14, wpBook: 15, chapters: 10 },
{ bookId: 15, wpBook: 16, chapters: 13 },
{ bookId: 16, wpBook: 17, chapters: 10 },
{ bookId: 17, wpBook: 18, chapters: 42 },
{ bookId: 18, wpBook: 19, chapters: 150 },
{ bookId: 19, wpBook: 20, chapters: 31 },
{ bookId: 20, wpBook: 21, chapters: 12 },
{ bookId: 21, wpBook: 22, chapters: 8 },
{ bookId: 22, wpBook: 23, chapters: 66 },
{ bookId: 23, wpBook: 24, chapters: 52 },
{ bookId: 24, wpBook: 25, chapters: 5 },
{ bookId: 25, wpBook: 26, chapters: 48 },
{ bookId: 26, wpBook: 27, chapters: 12 },
{ bookId: 27, wpBook: 28, chapters: 14 },
{ bookId: 28, wpBook: 29, chapters: 3 },
{ bookId: 29, wpBook: 30, chapters: 9 },
{ bookId: 30, wpBook: 31, chapters: 1 },
{ bookId: 31, wpBook: 32, chapters: 4 },
{ bookId: 32, wpBook: 33, chapters: 7 },
{ bookId: 33, wpBook: 34, chapters: 3 },
{ bookId: 34, wpBook: 35, chapters: 3 },
{ bookId: 35, wpBook: 36, chapters: 3 },
{ bookId: 36, wpBook: 37, chapters: 2 },
{ bookId: 37, wpBook: 38, chapters: 14 },
{ bookId: 38, wpBook: 39, chapters: 4 },
{ bookId: 39, wpBook: 40, chapters: 28 },
{ bookId: 40, wpBook: 41, chapters: 16 },
{ bookId: 41, wpBook: 42, chapters: 24 },
{ bookId: 42, wpBook: 43, chapters: 21 },
{ bookId: 43, wpBook: 44, chapters: 28 },
{ bookId: 44, wpBook: 45, chapters: 16 },
{ bookId: 45, wpBook: 46, chapters: 16 },
{ bookId: 46, wpBook: 47, chapters: 13 },
{ bookId: 47, wpBook: 48, chapters: 6 },
{ bookId: 48, wpBook: 49, chapters: 6 },
{ bookId: 49, wpBook: 50, chapters: 4 },
{ bookId: 50, wpBook: 51, chapters: 4 },
{ bookId: 51, wpBook: 52, chapters: 5 },
{ bookId: 52, wpBook: 53, chapters: 3 },
{ bookId: 53, wpBook: 54, chapters: 6 },
{ bookId: 54, wpBook: 55, chapters: 4 },
{ bookId: 55, wpBook: 56, chapters: 3 },
{ bookId: 56, wpBook: 57, chapters: 1 },
{ bookId: 57, wpBook: 58, chapters: 13 },
{ bookId: 58, wpBook: 59, chapters: 5 },
{ bookId: 59, wpBook: 60, chapters: 5 },
{ bookId: 60, wpBook: 61, chapters: 3 },
{ bookId: 61, wpBook: 62, chapters: 5 },
{ bookId: 62, wpBook: 63, chapters: 1 },
{ bookId: 63, wpBook: 64, chapters: 1 },
{ bookId: 64, wpBook: 65, chapters: 1 },
{ bookId: 65, wpBook: 66, chapters: 22 },
];
function fetchPage(url) {
return new Promise((resolve, reject) => {
const req = https.get(url, (res) => {
if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
fetchPage(res.headers.location).then(resolve).catch(reject);
return;
}
const chunks = [];
res.on("data", (chunk) => chunks.push(chunk));
res.on("end", () => resolve(Buffer.concat(chunks).toString("utf8")));
res.on("error", reject);
});
req.on("error", reject);
req.setTimeout(15000, () => { req.destroy(); reject(new Error("timeout")); });
});
}
function parseVerses(html) {
const $ = cheerio.load(html);
$("p.ym-noprint").remove();
$(".dimver").remove();
const h3 = $("h3").first();
if (!h3.length) return [];
let contentHtml = "";
h3.nextAll("p").each((_, el) => { contentHtml += $.html(el); });
if (!contentHtml) return [];
const spanRegex = /<span[^>]*class="verse"[^>]*id="(\d+)"[^>]*>\s*\d+\s*<\/span>/g;
const allSpans = [...contentHtml.matchAll(spanRegex)];
const verses = [];
if (allSpans.length > 0) {
const v1Html = contentHtml.substring(0, allSpans[0].index);
const v1Text = cheerio.load(v1Html).text().replace(/\s+/g, " ").trim();
if (v1Text) verses.push(v1Text);
for (let i = 0; i < allSpans.length; i++) {
const spanEnd = allSpans[i].index + allSpans[i][0].length;
const nextStart = i + 1 < allSpans.length ? allSpans[i + 1].index : contentHtml.length;
const segHtml = contentHtml.substring(spanEnd, nextStart);
const segText = cheerio.load(segHtml).text().replace(/\s+/g, " ").trim();
verses.push(segText);
}
} else {
const allText = cheerio.load(contentHtml).text().replace(/\s+/g, " ").trim();
if (allText) verses.push(allText);
}
return verses;
}
function loadHeadingsAndRefs(enFile) {
const lines = fs.readFileSync(enFile, "utf-8").split("\n");
const map = {};
for (const line of lines) {
if (!line.trim()) continue;
const parts = line.split("|");
if (parts.length < 7) continue;
const [, bookId, chIdx, verseIdx, heading, refs] = parts;
const key = `${bookId}|${chIdx}|${verseIdx}`;
map[key] = { heading: heading || "", refs: refs || "" };
}
return map;
}
function delay(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); }
async function main() {
const enFile = path.join(__dirname, "files", "en_kjv.txt");
const headingMap = loadHeadingsAndRefs(enFile);
const allLines = [];
let totalVerses = 0;
let totalChapters = 0;
const issues = [];
for (const book of BOOKS) {
const name = bookNames[book.bookId] || `Book${book.bookId}`;
console.error(`\n=== ${name} (${book.chapters} chapters) ===`);
for (let ch = 1; ch <= book.chapters; ch++) {
const chIdx = ch - 1;
const wpBookStr = String(book.wpBook).padStart(2, "0");
const url = `https://www.wordproject.org/bibles/${wpLang}/${wpBookStr}/${ch}.htm`;
let retries = 3;
let verses = [];
while (retries > 0) {
try {
const html = await fetchPage(url);
verses = parseVerses(html);
break;
} catch (err) {
retries--;
if (retries > 0) {
console.error(` Retry ${name} ch${ch}: ${err.message}`);
await delay(2000);
} else {
console.error(` FAILED ${name} ch${ch}: ${err.message}`);
issues.push(`FAILED: ${name} ch${ch}`);
}
}
}
const enKey = `${book.bookId}|${chIdx}`;
const expectedCount = Object.keys(headingMap).filter((k) => k.startsWith(enKey + "|")).length;
if (verses.length !== expectedCount) {
console.error(` ${name} ch${ch}: got ${verses.length} verses, expected ${expectedCount}`);
issues.push(`MISMATCH: ${name} ch${ch}: got ${verses.length}, expected ${expectedCount}`);
}
for (let i = 0; i < verses.length; i++) {
const key = `${book.bookId}|${chIdx}|${i}`;
const meta = headingMap[key] || { heading: "", refs: "" };
const line = `${name}|${book.bookId}|${chIdx}|${i}|${meta.heading}|${meta.refs}|${verses[i]}`;
allLines.push(line);
}
totalVerses += verses.length;
totalChapters++;
if (totalChapters % 10 === 0) await delay(200);
}
console.error(` Done. Total so far: ${totalVerses} verses in ${totalChapters} chapters`);
}
fs.writeFileSync(outputFile, allLines.join("\n"));
console.error(`\n=== COMPLETE ===`);
console.error(`Total chapters: ${totalChapters}`);
console.error(`Total verses/lines: ${totalVerses}`);
console.error(`Output: ${outputFile}`);
if (issues.length > 0) {
console.error(`\n=== ISSUES (${issues.length}) ===`);
for (const issue of issues) console.error(` ${issue}`);
}
}
main().catch((err) => { console.error("Fatal error:", err); process.exit(1); });