Improvements to regional handling

additional cleanup and stuff
I lost the plot a while ago
This commit is contained in:
Alexandra
2025-05-29 16:08:01 -06:00
parent 2973211b7d
commit 869d9c72eb
10 changed files with 164 additions and 85 deletions

View File

@@ -1,42 +1,63 @@
import { Sequelize } from 'sequelize';
import 'dotenv/config';
import { Sequelize } from "sequelize";
import "dotenv/config";
// Import models
import defineFile from './models/file.js';
import defineQueryCount from './models/queryCount.js';
import defineMetadata from './models/metadata.js'
import defineFile from "./models/file.js";
import defineQueryCount from "./models/queryCount.js";
import defineMetadata from "./models/metadata.js";
const sequelize = new Sequelize(process.env.POSTGRES_DB, process.env.POSTGRES_USER, process.env.POSTGRES_PASSWORD, {
host: process.env.POSTGRES_HOST || 'localhost',
port: process.env.POSTGRES_PORT || 5432,
dialect: 'postgres',
logging: process.env.DEBUG === '1' ? console.log : false
});
const sequelize = new Sequelize(
process.env.POSTGRES_DB,
process.env.POSTGRES_USER,
process.env.POSTGRES_PASSWORD,
{
host: process.env.POSTGRES_HOST || "localhost",
port: process.env.POSTGRES_PORT || 5432,
dialect: "postgres",
logging: process.env.DEBUG === "1" ? console.log : false,
}
);
// Initialize models
export const File = defineFile(sequelize);
export const QueryCount = defineQueryCount(sequelize);
export const Metadata = defineMetadata(sequelize)
Metadata.hasMany(File)
File.belongsTo(Metadata, {as: "details"})
export const Metadata = defineMetadata(sequelize);
Metadata.hasMany(File);
File.belongsTo(Metadata, { as: "details" });
async function enableTrigram() {
const query = `SELECT * from PG_extension where extname = 'pg_trgm'`;
const [result] = await sequelize.query(query, {
type: sequelize.QueryTypes.SELECT,
});
if (!result) {
const enableTrigramQuery = `CREATE EXTENSION pg_trgm`;
await sequelize.query(enableTrigramQuery);
}
}
export async function initDB() {
try {
// First try to connect to postgres directly to create database if needed
const rootSequelize = new Sequelize('postgres', process.env.POSTGRES_USER, process.env.POSTGRES_PASSWORD, {
host: process.env.POSTGRES_HOST || 'localhost',
port: process.env.POSTGRES_PORT || 5432,
dialect: 'postgres',
logging: false
});
const rootSequelize = new Sequelize(
"postgres",
process.env.POSTGRES_USER,
process.env.POSTGRES_PASSWORD,
{
host: process.env.POSTGRES_HOST || "localhost",
port: process.env.POSTGRES_PORT || 5432,
dialect: "postgres",
logging: false,
}
);
try {
// Try to create database if it doesn't exist
await rootSequelize.query(`CREATE DATABASE ${process.env.POSTGRES_DB};`);
console.log('Database did not exist, created.');
console.log("Database did not exist, created.");
} catch (err) {
// Ignore error if database already exists
if (!err.message.includes('already exists')) {
if (!err.message.includes("already exists")) {
throw err;
}
} finally {
@@ -45,36 +66,39 @@ export async function initDB() {
// Now connect to the actual database
await sequelize.authenticate();
console.log('DB connected.');
console.log("DB connected.");
// Get current database schema
const queryInterface = sequelize.getQueryInterface();
const tables = await queryInterface.showAllTables();
if (!tables.includes('Files') || !tables.includes('QueryCounts')) {
if (!tables.includes("Files") || !tables.includes("QueryCounts")) {
// If tables don't exist, create them
console.log('DB doesn\'t exist, creating initial database schema...');
console.log("DB doesn't exist, creating initial database schema...");
await sequelize.sync();
console.log('Database schema created.');
await enableTrigram();
console.log("Database schema created.");
// Initialize QueryCount if it's a new installation
await QueryCount.create({ count: 0 });
} else {
// Auto-migrate existing schema
console.log('Checking for DB migrations...');
console.log("Checking for DB migrations...");
await sequelize.sync({ alter: true });
console.log('DB migrations completed.');
await enableTrigram();
console.log("DB migrations completed.");
}
// Only force sync if explicitly requested
if (process.env.FORCE_FILE_REBUILD === '1') {
if (process.env.FORCE_FILE_REBUILD === "1") {
await sequelize.sync({ force: true });
console.log('DB forcefully synchronized.');
await enableTrigram();
console.log("DB forcefully synchronized.");
}
} catch (error) {
console.error('Unable to connect to the DB:', error);
console.error("Unable to connect to the DB:", error);
process.exit(1);
}
}
export default sequelize;
export default sequelize;

View File

@@ -43,7 +43,7 @@ export async function optimizeDatabaseKws() {
let result = await File.findAndCountAll({
limit: BATCH_SIZE,
offset: i,
order: ["id"],
order: ["id", "filename"],
});
for (let x = 0; x < result.rows.length; x++) {
debugPrint(`Submitting job for: ${result.rows[x]["filename"]}`);

View File

@@ -4,7 +4,6 @@ import { resolve } from "path";
import debugPrint from "./debugprint.js";
import { File } from "./models/index.js";
import { bulkIndexFiles } from "./services/elasticsearch.js";
import { optimizeDatabaseKws } from "./dboptimize.js";
import { Timer } from "./time.js";
let piscina = new Piscina({
@@ -146,7 +145,6 @@ export default async function getAllFiles(catList) {
console.log(`\nFinished crawling Myrient in ${proctime.elapsed()}.`);
await piscina.close();
await optimizeDatabaseKws();
return fileCount;
}

View File

@@ -25,6 +25,7 @@
"Sega Game Gear": "Sega Game Gear",
"Nintendo 3DS": "Nintendo 3DS",
"PlayStation Portable": "Sony PlayStation Portable",
"Wii": "Nintendo Wii",
"Wii U": "Nintendo Wii U",
"N-Gage": "Nokia N-Gage",
"PlayStation Vita": "Sony PlayStation Vita",

View File

@@ -21,9 +21,6 @@ import TaskQueue from "./taskqueue.js";
import { singleLineStatus } from "./debugprint.js";
import { Timer } from "./time.js";
import { readFileSync } from "fs";
import { dirname, resolve } from "path";
import { Model } from "sequelize";
import { Console } from "console";
export default class MetadataSearch {
constructor() {
@@ -51,6 +48,7 @@ export default class MetadataSearch {
"game_localizations.name",
"game_localizations.region",
"game_localizations.region.name",
"game_localizations.cover.image_id",
"platforms.name",
"game_type.type",
"screenshots.image_id",
@@ -64,7 +62,7 @@ export default class MetadataSearch {
if (this.twitchSecrets.client_id && this.twitchSecrets.client_secret) {
this.accessToken = await twitchAccessToken(this.twitchSecrets);
this.client = igdb(this.twitchSecrets.client_id, this.accessToken);
const mapFilePath = "./lib/json/igdb_platform_map.json";
const mapFilePath = "./lib/json/maps/igdb_platform.json";
this.platformMap = JSON.parse(readFileSync(mapFilePath, "utf8"));
if (this.accessToken) {
this.authorized = true;
@@ -116,10 +114,10 @@ export default class MetadataSearch {
let games = await File.findAndCountAll({
where: {
nongame: false,
detailsId: null
//detailsId: null,
},
limit: 1000,
order: ["id"],
order: ["id", "filename"],
});
let count = games.count;
let pages = Math.ceil(games.count / 1000);
@@ -130,10 +128,11 @@ export default class MetadataSearch {
games = await File.findAndCountAll({
where: {
nongame: false,
detailsId: null
//detailsId: null,
},
limit: 1000,
offset: x * 1000,
order: ["id", "filename"],
include: { model: Metadata, as: "details" },
});
for (let y = 0; y < games.rows.length; y++) {
@@ -152,11 +151,27 @@ export default class MetadataSearch {
let md = await Metadata.findByPk(metadata[0].id);
await game.setDetails(md);
await md.addFile(game);
await game.save();
await md.save();
found++;
} else {
//this is much slower and should only be used if the faster full text search can't find it.
let metadata = Metadata.fuzzySearchByText(
this.normalizeName(game.filename),
0.6,
game.category
);
if (metadata) {
await game.setDetails(md);
await md.addFile(game);
await game.save();
await md.save();
found++;
}
}
}
}
console.log(`Completed matching metadata to files in ${timer.elapsed()}`)
console.log(`\nFinished matching metadata to files in ${timer.elapsed()}`);
}
async syncAllMetadata(retrying = false) {
@@ -187,7 +202,8 @@ export default class MetadataSearch {
.pipe(
limit(pageSize),
offset(x * pageSize),
fields(this.gameFields)
fields(this.gameFields),
sort("id")
)
.execute();
for (let y in data) {
@@ -209,8 +225,8 @@ export default class MetadataSearch {
}
retryCount = 0;
}
console.log(`Finished syncing metadata in ${timer.elapsed()}`);
this.matchAllMetadata()
console.log(`\nFinished syncing metadata in ${timer.elapsed()}`);
this.matchAllMetadata();
} catch (error) {
if (error.code === "ERR_BAD_REQUEST" && !retrying) {
this.setupClient();
@@ -220,7 +236,7 @@ export default class MetadataSearch {
}
}
async addMetadataToDb(metadata, game) {
async addMetadataToDb(metadata) {
try {
let md = await Metadata.findByPk(metadata.id);
if (!md) {
@@ -229,15 +245,25 @@ export default class MetadataSearch {
id: metadata.id,
},
{
include: File
include: File,
}
);
}
// I hate this
let coverArt = {
default: metadata.cover?.image_id,
};
for (let x in metadata.game_localizations) {
let gl = metadata.game_localizations[x];
if (gl.region && gl.cover) {
coverArt[gl.region.name] = gl.cover.image_id;
}
}
md.title = metadata.name;
md.description = metadata.summary;
md.rating = metadata.total_rating;
md.coverartid = metadata.cover?.image_id;
md.coverartid = JSON.stringify(coverArt);
md.releasedate = metadata.first_release_date
? new Date(metadata.first_release_date * 1000)
: null;
@@ -254,30 +280,29 @@ export default class MetadataSearch {
md.publishers = metadata.involved_companies
?.filter((ic) => ic.publisher)
?.map((ic) => ic.company.name);
let alternates = [];
//I hate this too
let alternates = new Object();
if (metadata.alternative_names) {
alternates.push(
metadata.alternative_names.map((an) => ({
type: an.comment,
name: an.name,
}))
);
for (let x in metadata.alternative_names) {
let an = metadata.alternative_names[x];
if (an.comment && an.name) {
alternates[an.comment] = an.name;
}
}
}
if (metadata.game_localizations) {
alternates.push(
metadata.game_localizations.map((gn) => ({
type: gn.region.name,
name: gn.name,
}))
);
for (let x in metadata.game_localizations) {
let gl = metadata.game_localizations[x];
if (gl.region.name && gl.name) {
alternates[gl.region.name] = gl.name;
}
}
}
//this needs to remain json as we want the keys to be retained
md.alternatetitles = alternates.length ? JSON.stringify(alternates) : null;
md.alternatetitles = alternates.length
? JSON.stringify(alternates)
: null;
await md.save();
if (game) {
await game.setDetails(md);
await md.addFile(game);
}
} catch (error) {
console.error("Error adding metadata:", error);
}

View File

@@ -23,7 +23,7 @@ export default function (sequelize) {
type: DataTypes.STRING,
},
coverartid: {
type: DataTypes.STRING,
type: DataTypes.STRING(2048),
},
releasedate: {
type: DataTypes.DATEONLY,
@@ -70,13 +70,12 @@ export default function (sequelize) {
const title = instance.title || "";
const alternateTitles =
JSON.parse(instance.alternatetitles || "[]")
.map((title) => title.name)
.join(", ") || "";
const titles = Object.values(alternateTitles).join(', ')
const query = `
SELECT to_tsvector('english', $1 || ', ' || $2)
`;
const [results] = await sequelize.query(query, {
bind: [title, alternateTitles],
bind: [title, titles],
raw: true,
});
instance.searchVector = results[0].to_tsvector;
@@ -92,7 +91,7 @@ export default function (sequelize) {
const query = `
SELECT id FROM "Metadata"
WHERE "searchVector" @@ plainto_tsquery('english', $1) ${platformClause}
ORDER BY ts_rank("searchVector", plainto_tsquery('english', $1 )) ${limitClause}
ORDER BY length(title) ${limitClause}
`;
return await sequelize.query(query, {
model: Metadata,
@@ -101,5 +100,31 @@ export default function (sequelize) {
});
};
Metadata.fuzzySearchByText = async function (
searchQuery,
fuzziness,
platform,
limit = 1
) {
fuzziness = fuzziness || 0.6;
let platformClause = "";
let limitClause = `LIMIT ${limit}`;
if (platform) {
platformClause = `AND '${platform}' = ANY(platforms)`;
}
const query = `
SELECT id FROM "Metadata"
WHERE SIMILARITY(title, $1) > $2 OR WHERE SIMILARITY(alternatetitles, $1) > $2
${platformClause}
ORDER BY length(title) ${limitClause}
`;
return await sequelize.query(query, {
model: Metadata,
bind: [searchQuery, fuzziness],
type: sequelize.QueryTypes.SELECT,
});
};
return Metadata;
}

View File

@@ -1,6 +1,6 @@
import { Client } from "@elastic/elasticsearch";
import debugPrint from "../debugprint.js";
import { File } from "../models/index.js";
import { File, Metadata } from "../models/index.js";
import { Timer } from "../time.js";
const client = new Client({
@@ -181,15 +181,15 @@ export async function search(query, options) {
// Build results with full PostgreSQL records
let results = response.hits.hits.map((hit) => ({
file: {
...recordMap[hit._id]?.dataValues,
...recordMap[hit._id]?.dataValues
},
metadata: {
...recordMap[hit._id]?.details?.dataValues
},
score: hit._score,
highlights: hit.highlight,
}));
//Filter out anything that couldn't be found in postgres
results = results.filter((result) => result.file.filename);
const elapsed = timer.elapsedSeconds();
return {
items: results,

View File

@@ -1,5 +1,5 @@
import getAllFiles from "./lib/dircrawl.js";
import { optimizeDatabaseKws } from "./dboptimize.js";
import { optimizeDatabaseKws } from "./lib/dboptimize.js";
import FileHandler from "./lib/filehandler.js";
import Searcher from "./lib/search.js";
import cron from "node-cron";

View File

@@ -1,7 +1,11 @@
<%
const metadata = romFile.metadata || new Object()
const file = romFile.file || new Object()
const coverUrl = metadata.coverartid ? `/proxy-image?url=https://images.igdb.com/igdb/image/upload/t_cover_big/${metadata.coverartid}.webp` : "/public/images/coverart/nocoverart.png"
const titles = JSON.parse(metadata.alternatetitles)
const coverarts = JSON.parse(metadata.coverartid)
const coverartId = coverarts[file.region] || coverarts.default
const coverUrl = coverartId ? `/proxy-image?url=https://images.igdb.com/igdb/image/upload/t_cover_big/${coverartId}.webp` : "/public/images/coverart/nocoverart.png"
let images = []
if(metadata.screenshots){
images = JSON.parse(metadata.screenshots).map((im) => `/proxy-image?url=https://images.igdb.com/igdb/image/upload/t_720p/${im}.webp`)
@@ -47,12 +51,12 @@
<% } %>
<% if(metadata.developers) {%>
<div>
<p><span class="info"><%= __('search.developed') %></span> <%= JSON.parse(metadata.developers).join(", ") %></p>
<p><span class="info"><%= __('search.developed') %></span> <%= metadata.developers.join(", ") %></p>
</div>
<% } %>
<% if(metadata.publishers) {%>
<div>
<p><span class="info"><%= __('search.published') %></span> <%= JSON.parse(metadata.publishers).join(", ") %></p>
<p><span class="info"><%= __('search.published') %></span> <%= metadata.publishers.join(", ") %></p>
</div>
<% } %>
<% if(metadata.releasedate) {%>
@@ -67,12 +71,12 @@
<% } %>
<% if(metadata.genre) {%>
<div>
<p><span class="info"><%= __('search.genre') %></span> <%= JSON.parse(metadata.genre).join(", ") %></p>
<p><span class="info"><%= __('search.genre') %></span> <%= metadata.genre.join(", ") %></p>
</div>
<% } %>
<% if(metadata.gamemodes) {%>
<div>
<p><span class="info"><%= __('search.modes') %></span> <%= JSON.parse(metadata.gamemodes).join(", ") %></p>
<p><span class="info"><%= __('search.modes') %></span> <%= metadata.gamemodes.join(", ") %></p>
</div>
<% } %>
<div>

View File

@@ -1,7 +1,9 @@
<%
const metadata = result.metadata || new Object()
const file = result.file || new Object()
const coverUrl = metadata.coverartid ? `/proxy-image?url=https://images.igdb.com/igdb/image/upload/t_cover_big/${metadata.coverartid}.webp` : "/public/images/coverart/nocoverart.png"
const coverarts = JSON.parse(metadata.coverartid)
const coverartId = coverarts[file.region] || coverarts.default
const coverUrl = coverartId ? `/proxy-image?url=https://images.igdb.com/igdb/image/upload/t_cover_big/${coverartId}.webp` : "/public/images/coverart/nocoverart.png"
%>
<div class="col-md-auto row align-items-start searchresult">
<div class="cover">
@@ -13,7 +15,7 @@
<span class="infoitem badge badge-secondary"><%= __('search.region') %> <%= file.region %> <%- flags.createFlag(file.region) %></span>
<span class="infoitem badge badge-secondary"><%= __('search.platform') %> <%= file.category %> <%- consoleIcons.createConsoleImage(file.category) %></span>
<% if(metadata.genre){ %>
<span class="infoitem badge badge-secondary"><%= __('search.genre') %> <%= JSON.parse(metadata.genre).join(' / ') %></span>
<span class="infoitem badge badge-secondary"><%= __('search.genre') %> <%= metadata.genre.join(' / ') %></span>
<% } %>
</p>
<% if(metadata.title) {%>