Files
Myrient-Search-Engine/lib/dboptimize.js
Alexandra 3e6f6eeb36 * reimplement searchalikes
* separate kws based on categories
* auto include kws based on field options
* add roman numeral parse
* add number to name
* split out json search alikes to be per category
* add new kws columns to Files
* add search sample for determining if a game is in a series (maybe this could be useful somewhere else, too)
2025-05-18 07:11:37 -06:00

100 lines
3.1 KiB
JavaScript

import debugPrint from "./debugprint.js";
import { bulkIndexFiles } from "./services/elasticsearch.js";
import { File } from "./models/index.js";
import { readFileSync } from "fs";
import { fileURLToPath } from "url";
import { dirname, resolve } from "path";
import { Piscina, FixedQueue } from "piscina";
import { timer } from "./time.js";
let piscina = new Piscina({
filename: resolve("./lib", "dbkwworker.js"),
taskQueue: new FixedQueue(),
});
const BATCH_SIZE = 1000;
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
const relatedKwRoot = "../lib/json/relatedkeywords/";
const catKwPath = resolve(__dirname, relatedKwRoot + "categories.json");
const nameKwpath = resolve(__dirname, relatedKwRoot + "names.json");
const regionKwpath = resolve(__dirname, relatedKwRoot + "regions.json");
//make sure the child object matches the column in the file db model
const keywords = {
filename: JSON.parse(readFileSync(nameKwpath, "utf8")),
category: JSON.parse(readFileSync(catKwPath, "utf8")),
subcategories: JSON.parse(readFileSync(catKwPath, "utf8")),
region: JSON.parse(readFileSync(regionKwpath, "utf8")),
};
export async function optimizeDatabaseKws() {
let proctime = new timer();
let changes = 0;
console.log("Optimizing DB Keywords...");
let dbLength = await File.count();
let optimizeTasks = [];
let resolvedTasks = [];
for (let i = 0; i < dbLength; ) {
singleLineStatus(`Optimizing Keywords: ${i} / ${dbLength}`);
let result = await File.findAndCountAll({
limit: BATCH_SIZE,
offset: i,
});
for (let x = 0; x < result.rows.length; x++) {
debugPrint(`Submitting job for: ${result.rows[x]["filename"]}`);
let data = [];
for (let column in keywords) {
data[column] = result.rows[x][column];
}
optimizeTasks.push(
piscina
.run(
{
data: data,
keywords: keywords,
},
{ name: "optimizeKws" }
)
.catch((err) => {
console.error(err);
})
);
i++;
}
let settledTasks = await Promise.all(optimizeTasks);
resolvedTasks.push(...settledTasks);
debugPrint(`Resolving ${resolvedTasks.length} optimization tasks.`);
for (let y = 0; y < resolvedTasks.length; y++) {
let changed = false;
for (let column in keywords) {
if (result.rows[y][column + "kws"] == resolvedTasks[y][column + "kws"])
continue;
result.rows[y][column + "kws"] = resolvedTasks[y][column + "kws"];
changed = true;
}
if (changed) {
result.rows[y].save();
changes++;
}
}
await bulkIndexFiles(result.rows);
optimizeTasks = [];
resolvedTasks = [];
}
console.log(
`\nCompleted Keyword Optimization for ${changes} row${
changes > 1 || changes == 0 ? "s" : ""
} in ${proctime.elapsed()}.`
);
}
function singleLineStatus(str) {
if (process.stdout.isTTY && process.env.DEBUG != "1") {
process.stdout.clearLine(0);
process.stdout.cursorTo(0);
process.stdout.write(str);
} else {
console.log(str);
}
}