mirror of
https://github.com/alexankitty/Myrient-Search-Engine.git
synced 2026-01-15 16:33:15 -03:00
126 lines
3.7 KiB
JavaScript
126 lines
3.7 KiB
JavaScript
import debugPrint from "../utility/printutils.js";
|
|
import { bulkIndexFiles } from "../services/elasticsearch.js";
|
|
import { File } from "./models/index.js";
|
|
import { readFileSync } from "fs";
|
|
import { fileURLToPath } from "url";
|
|
import { dirname, resolve } from "path";
|
|
import { Piscina, FixedQueue } from "piscina";
|
|
import { Timer } from "../utility/time.js";
|
|
|
|
let piscina = new Piscina({
|
|
filename: resolve("./lib/database/workers", "dbkwworker.js"),
|
|
taskQueue: new FixedQueue(),
|
|
});
|
|
|
|
const BATCH_SIZE = 100;
|
|
const __filename = fileURLToPath(import.meta.url);
|
|
const __dirname = dirname(__filename);
|
|
const relatedKwRoot = "../json/relatedkeywords/";
|
|
const catKwPath = resolve(__dirname, relatedKwRoot + "categories.json");
|
|
const nameKwpath = resolve(__dirname, relatedKwRoot + "names.json");
|
|
const regionKwpath = resolve(__dirname, relatedKwRoot + "regions.json");
|
|
//make sure the child object matches the column in the file db model
|
|
const keywords = {
|
|
filename: JSON.parse(readFileSync(nameKwpath, "utf8")),
|
|
category: JSON.parse(readFileSync(catKwPath, "utf8")),
|
|
subcategories: JSON.parse(readFileSync(catKwPath, "utf8")),
|
|
region: JSON.parse(readFileSync(regionKwpath, "utf8")),
|
|
};
|
|
|
|
export async function optimizeDatabaseKws() {
|
|
let proctime = new Timer();
|
|
let changes = 0;
|
|
console.log("Optimizing DB Keywords...");
|
|
let dbLength = await File.count();
|
|
let optimizeTasks = [];
|
|
let resolvedTasks = [];
|
|
let promiseIndex = 0;
|
|
let currentIndex = 0;
|
|
let result = await File.findAll({
|
|
order: ["id", "filename"],
|
|
attributes: [
|
|
"id",
|
|
"filename",
|
|
"filenamekws",
|
|
"category",
|
|
"categorykws",
|
|
"subcategories",
|
|
"subcategorieskws",
|
|
"region",
|
|
"regionkws",
|
|
"type",
|
|
"nongame"
|
|
],
|
|
});
|
|
for (let i = 0; i < dbLength; ) {
|
|
let loopIndexStart = i;
|
|
singleLineStatus(
|
|
`Optimizing Keywords: ${i} / ${dbLength} ${((i / dbLength) * 100).toFixed(
|
|
2
|
|
)}% (${proctime.elapsed()}) Optimized Rows: ${changes}`
|
|
);
|
|
|
|
for (let x = i; x < currentIndex + BATCH_SIZE; x++) {
|
|
if(x >= dbLength) break; //Abort abandon ship, otherwise we sink
|
|
debugPrint(`Submitting job for: ${result[x].filename}`);
|
|
let data = [];
|
|
for (let column in keywords) {
|
|
data[column] = result[x][column];
|
|
}
|
|
optimizeTasks.push(
|
|
piscina
|
|
.run(
|
|
{
|
|
data: data,
|
|
keywords: keywords,
|
|
},
|
|
{ name: "optimizeKws" }
|
|
)
|
|
.catch((err) => {
|
|
console.error(err);
|
|
})
|
|
);
|
|
i++;
|
|
}
|
|
currentIndex = i;
|
|
let settledTasks = await Promise.all(optimizeTasks);
|
|
resolvedTasks.push(...settledTasks);
|
|
debugPrint(`Resolving ${resolvedTasks.length} optimization tasks.`);
|
|
for (let y = 0; y < resolvedTasks.length; y++) {
|
|
let changed = false;
|
|
for (let column in keywords) {
|
|
if (
|
|
result[promiseIndex][column + "kws"] ==
|
|
resolvedTasks[y][column + "kws"]
|
|
)
|
|
continue;
|
|
result[promiseIndex][column + "kws"] = resolvedTasks[y][column + "kws"];
|
|
changed = true;
|
|
}
|
|
if (changed) {
|
|
await result[promiseIndex].save();
|
|
changes++;
|
|
}
|
|
promiseIndex++;
|
|
}
|
|
await bulkIndexFiles(result.slice(loopIndexStart, currentIndex));
|
|
optimizeTasks = [];
|
|
resolvedTasks = [];
|
|
}
|
|
console.log(
|
|
`\nCompleted Keyword Optimization for ${changes} row${
|
|
changes > 1 || changes == 0 ? "s" : ""
|
|
} in ${proctime.elapsed()}.`
|
|
);
|
|
}
|
|
|
|
function singleLineStatus(str) {
|
|
if (process.stdout.isTTY && process.env.DEBUG != "1") {
|
|
process.stdout.clearLine(0);
|
|
process.stdout.cursorTo(0);
|
|
process.stdout.write(str);
|
|
} else {
|
|
console.log(str);
|
|
}
|
|
}
|