From c231f7ffc952fb0ee9fe8e060bd49d995df2c814 Mon Sep 17 00:00:00 2001 From: Alexandra Date: Thu, 29 May 2025 10:10:34 -0600 Subject: [PATCH] redo metadata, opting to pull everything in at once. --- lib/debugprint.js | 10 ++ lib/dircrawl.js | 30 ++-- lib/json/igdb_platform_map.json | 94 +++++++++++ lib/metadatasearch.js | 287 ++++++++++++++------------------ lib/models/metadata.js | 157 +++++++++++------ 5 files changed, 353 insertions(+), 225 deletions(-) create mode 100644 lib/json/igdb_platform_map.json diff --git a/lib/debugprint.js b/lib/debugprint.js index af920ba..a79c937 100644 --- a/lib/debugprint.js +++ b/lib/debugprint.js @@ -8,4 +8,14 @@ export function debugPrintDir(string){ if(process.env.DEBUG == "1"){ console.dir(string) } +} + +export function singleLineStatus(string){ + if(process.stdout.isTTY && process.env.DEBUG != "1"){ + process.stdout.clearLine(0); + process.stdout.cursorTo(0); + process.stdout.write(string); + } else { + console.log(string); + } } \ No newline at end of file diff --git a/lib/dircrawl.js b/lib/dircrawl.js index 4ac9809..caca3d6 100644 --- a/lib/dircrawl.js +++ b/lib/dircrawl.js @@ -2,8 +2,8 @@ import { getTableRows, parseOutFile } from "./fileworker.js"; import { Piscina, FixedQueue } from "piscina"; import { resolve } from "path"; import debugPrint from "./debugprint.js"; -import { File } from './models/index.js'; -import { bulkIndexFiles } from './services/elasticsearch.js'; +import { File } from "./models/index.js"; +import { bulkIndexFiles } from "./services/elasticsearch.js"; import { optimizeDatabaseKws } from "./dboptimize.js"; import { Timer } from "./time.js"; @@ -15,7 +15,7 @@ let piscina = new Piscina({ const BATCH_SIZE = 1000; // Process files in batches for better performance export default async function getAllFiles(catList) { - var proctime = new Timer() + var proctime = new Timer(); const url = "https://myrient.erista.me/files/"; let parentRows = await getTableRows({ url: url, base: "" }); let parents = []; @@ -94,7 +94,9 @@ export default async function getAllFiles(catList) { } fetchTasks = []; - dirStatus = `Directories Remaining: ${dirs.length}, Files Found: ${fileCount}`; + dirStatus = `Directories Remaining: ${ + dirs.length + }, Files Found: ${fileCount} (${proctime.elapsed()}`; } if (dirs.length == 0 && parseTasks.length > 0) { @@ -122,7 +124,9 @@ export default async function getAllFiles(catList) { } parseTasks = []; - dirStatus = `Directories Remaining: ${dirs.length}, Files Found: ${fileCount}`; + dirStatus = `Directories Remaining: ${ + dirs.length + }, Files Found: ${fileCount} (${proctime.elapsed()}`; } if (dirStatus) { @@ -153,7 +157,7 @@ async function processBatch(files) { for (let i = 0; i < files.length; i += chunkSize) { const chunk = files.slice(i, i + chunkSize); const dbFiles = await File.bulkCreate( - chunk.map(file => ({ + chunk.map((file) => ({ filename: file.filename, path: file.path, size: file.size, @@ -162,20 +166,24 @@ async function processBatch(files) { date: file.date, region: file.region, group: file.group, - nongame: file.nongame + nongame: file.nongame, })), { returning: true, - updateOnDuplicate: ['path'] + updateOnDuplicate: ["path"], } ); // Index chunk in Elasticsearch await bulkIndexFiles(dbFiles); - debugPrint(`Processed ${i + chunk.length} of ${files.length} files in current batch`); + debugPrint( + `Processed ${i + chunk.length} of ${ + files.length + } files in current batch` + ); } } catch (error) { - console.error('Error processing batch:', error); + console.error("Error processing batch:", error); } } @@ -205,4 +213,4 @@ function singleLineStatus(str) { } else { console.log(str); } -} \ No newline at end of file +} diff --git a/lib/json/igdb_platform_map.json b/lib/json/igdb_platform_map.json new file mode 100644 index 0000000..1df4aa7 --- /dev/null +++ b/lib/json/igdb_platform_map.json @@ -0,0 +1,94 @@ +{ + "Linux": "IBM PC Compatible", + "PC (Microsoft Windows)": "IBM PC Compatible", + "PlayStation": "Sony PlayStation 1", + "PlayStation 2": "Sony PlayStation 2", + "PlayStation 3": "Sony PlayStation 3", + "Xbox": "Microsoft Xbox", + "Xbox 360": "Microsoft Xbox 360", + "Commodore C64/128/MAX": "Commodore 64", + "Amiga": "Commodore Amiga", + "Nintendo Entertainment System": "Nintendo Entertainment System", + "Nintendo DS": "Nintendo DS", + "Nintendo GameCube": "Nintendo GameCube", + "Game Boy Color": "Nintendo Game Boy Color", + "Dreamcast": "Sega Dreamcast", + "Game Boy Advance": "Nintendo Game Boy Advance", + "Amstrad CPC": "Amstrad CPC", + "ZX Spectrum": "Sinclair ZX Spectrum", + "MSX": "Microsoft MSX", + "Sega Mega Drive/Genesis": "Sega Mega Drive", + "Sega 32X": "Sega 32X", + "Sega Saturn": "Sega Saturn", + "Game Boy": "Nintendo Game Boy", + "iOS": "Mobile", + "Sega Game Gear": "Sega Game Gear", + "Nintendo 3DS": "Nintendo 3DS", + "PlayStation Portable": "Sony PlayStation Portable", + "Wii U": "Nintendo Wii U", + "N-Gage": "Nokia N-Gage", + "PlayStation Vita": "Sony PlayStation Vita", + "3DO Interactive Multiplayer": "Panasonic 3DO", + "Family Computer Disk System": "Nintendo Family Computer Disk System", + "MSX2": "Microsoft MSX2", + "Atari 7800": "Atari 7800", + "Atari Lynx": "Atari Lynx", + "Atari Jaguar": "Atari Jaguar", + "Sega Master System/Mark III": "Sega Master System", + "Atari 8-bit": "Atari 8-bit", + "Atari 5200": "Atari 5200", + "Intellivision": "Mattel Intellivision", + "Vectrex": "GCE Vectrex", + "Commodore VIC-20": "Commodore VIC-20", + "Sharp X1": "Sharp X1", + "Sega CD": "Sega CD", + "Neo Geo MVS": "SNK NeoGeo Pocket", + "SG-1000": "Sega SG-1000", + "TurboGrafx-16/PC Engine": "NEC PC Engine", + "Virtual Boy": "Nintendo Virtual Boy", + "Microvision": "Microvision", + "Bally Astrocade": "Bally Astrocade", + "Commodore Plus/4": "Commodore Plus-4", + "Apple IIGS": "Apple IIGS", + "Philips CD-i": "Philips CD-i", + "Neo Geo Pocket": "SNK NeoGeo Pocket", + "Neo Geo Pocket Color": "SNK NeoGeo Pocket Color", + "Sharp X68000": "Sharp X68000", + "Fairchild Channel F": "Fairchild Channel F", + "PC Engine SuperGrafx": "NEC PC Engine", + "Texas Instruments TI-99": "Texas Instruments TI-99-4A", + "Odyssey 2 / Videopac G7000": "Magnavox Odyssey 2", + "Neo Geo CD": "SNK NeoGeo Pocket", + "New Nintendo 3DS": "New Nintendo 3DS", + "PC-9800 Series": "NEC PC-98", + "FM-7": "Fujitsu FM-7", + "Pokémon mini": "Nintendo Pokemon Mini", + "PlayStation 5": "Sony PlayStation 5", + "Xbox Series X|S": "Microsoft Xbox Series X|S", + "Google Stadia": "Google", + "DVD Player": "DVD-Video", + "Blu-ray Player": "BD-Video", + "Zeebo": "Zeebo", + "PC-FX": "NEC PC-FX", + "Game & Watch": "Nintendo Game & Watch", + "Sega Pico": "Sega PICO", + "Sinclair ZX81": "Sinclair ZX Spectrum", + "Sharp MZ-2200": "Sharp MZ-2200", + "Epoch Cassette Vision": "Epoch Game Pocket Computer", + "Epoch Super Cassette Vision": "Epoch Super Cassette Vision", + "Game.com": "Tiger Game.com", + "Casio Loopy": "Casio Loopy", + "Mega Duck/Cougar Boy": "Welback Mega Duck", + "Leapster": "LeapFrog Leapster", + "Leapster Explorer/LeadPad Explorer": "LeapFrog LeapPad", + "Watara/QuickShot Supervision": "Watara SuperVision", + "64DD": "Nintendo 64DD", + "Arduboy": "Arduboy Inc Arduboy", + "V.Smile": "VTech V.Smile", + "Arcadia 2001": "Emerson Arcadia 2001", + "Gizmondo": "Tiger Gizmondo", + "Apple Pippin": "Apple-Bandai Pippin", + "Panasonic M2": "Panasonic M2", + "Super A'Can": "Funtech Super Acan", + "Sega CD 32X": "Sega CD" +} \ No newline at end of file diff --git a/lib/metadatasearch.js b/lib/metadatasearch.js index 8f9b434..72a72ad 100644 --- a/lib/metadatasearch.js +++ b/lib/metadatasearch.js @@ -17,8 +17,11 @@ import { offset, } from "@phalcode/ts-igdb-client"; import { File, Metadata } from "./database.js"; -import debugPrint from "./debugprint.js"; import TaskQueue from "./taskqueue.js"; +import { singleLineStatus } from "./debugprint.js"; +import { Timer } from "./time.js"; +import { readFileSync } from "fs"; +import { dirname, resolve } from "path"; export default class MetadataSearch { constructor() { @@ -49,16 +52,23 @@ export default class MetadataSearch { "platforms.name", "game_type.type", "screenshots.image_id", - "videos.video_id" + "videos.video_id", ]; + getPlatformMapping() { + + } + async setupClient() { try { if (this.twitchSecrets.client_id && this.twitchSecrets.client_secret) { this.accessToken = await twitchAccessToken(this.twitchSecrets); this.client = igdb(this.twitchSecrets.client_id, this.accessToken); + const mapFilePath = "./lib/json/igdb_platform_map.json"; + this.platformMap = JSON.parse(readFileSync(mapFilePath, "utf8")); if (this.accessToken) { this.authorized = true; + this.syncAllMetadata(); return; } } @@ -68,88 +78,6 @@ export default class MetadataSearch { } } - async getMetadata(query, retrying = false) { - try { - if (!this.authorized) return; - const { data } = await this.client - .multi(...this.buildGameMultiQuery(query)) - .execute(); - return data; - } catch (error) { - if (error === "ERR_BAD_REQUEST" && !retrying) { - this.setupClient(); - return this.getMetadata(query, true); - } - console.error("Failed to retrieve metadata:", error); - } - } - - buildGameMultiQuery(query) { - let multiQuery = []; - for (let x in query) { - multiQuery.push( - request("games") - .alias(x) - .pipe( - fields(this.gameFields), - or( - and( - ...this.buildAndClauses("name", "~", query[x].name), - where("game_type.type", "!=", "Mod"), - where("game_type.type", "!=", "DLC"), - ...this.buildPlatformClause("~", query[x].platform) - ), - and( - ...this.buildAndClauses( - "alternative_names.name", - "~", - query[x].name - ), - where("game_type.type", "!=", "Mod"), - where("game_type.type", "!=", "DLC"), - ...this.buildPlatformClause("~", query[x].platform) - ), - and( - ...this.buildAndClauses( - "game_localizations.name", - "~", - query[x].name - ), - where("game_type.type", "!=", "Mod"), - where("game_type.type", "!=", "DLC"), - ...this.buildPlatformClause("~", query[x].platform) - ) - ), - sort("name", "asc"), - limit(1) - ) - ); - } - return multiQuery; - } - - buildAndClauses(field, op, string) { - let andClauses = []; - let name = [...new Set(string.split(" "))].filter((n) => n); //dedupe; - for (let x in name) { - andClauses.push(where(field, op, name[x], WhereFlags.CONTAINS)); - } - return andClauses; - } - - buildPlatformClause(op, string) { - if (string == "Others") return []; - //special garbage because SOMEONE doesn't value consistency - string = string.replace("Nintendo Wii", "Wii"); - string = string.replace("Nintendo Game Boy", "Game Boy"); - string = string.replace("Nintendo Satellaview", "Satellaview"); - string = string.replace("Sony PlayStation", "PlayStation"); - string = string.replace("Microsoft Xbox", "Xbox"); - string = string.replace("Commodore 64", "Commodore C64"); - string = string.replace("Commodore Amiga", "Amiga"); - return [where("platforms.name", op, string, WhereFlags.CONTAINS)]; - } - normalizeName(filename) { if (!filename) return; return filename @@ -161,58 +89,102 @@ export default class MetadataSearch { .trim(); } - async getGamesMetadata(games) { + async getIGDBGamesCount(retrying = false) { try { - if (!this.authorized || !games.length) return; - let gameQuery = []; - for (let x in games) { - if (!(await games[x].getDetails())) - if (!games[x].nongame) { - if (!games[x].blockmetadata) { - gameQuery.push({ - name: this.normalizeName(games[x].filename), - platform: games[x].category, - id: x, - }); - } - } - } - if (!gameQuery.length) return; - let gameMetas = await this.queue.enqueue(this.getMetadata, this, gameQuery) - debugPrint(JSON.stringify(gameMetas, null, 2)); - if (!gameMetas.length) return; - for (let x in gameMetas) { - if (gameMetas[x].result.length) { - await this.addMetadataToDb( - gameMetas[x].result[0], - games[gameQuery[x].id] - ); - } else { - games[x].blockmetadata = true; - games[x].save(); - } - } + if (!this.authorized) return 0; + const { data } = await this.client + .request("games/count") + .pipe( + and( + where("game_type.type", "!=", "Mod"), + where("game_type.type", "!=", "DLC") + ) + ) + .execute(); + return data.count; } catch (error) { - console.error("Error getting metadata:", error); + if (error.code === "ERR_BAD_REQUEST" && !retrying) { + this.setupClient(); + return this.getIGDBGamesCount(true); + } + console.error("Error getting IGDB games count:", error); + return 0; } } - async queueGetGamesMetadata(games) { - try { - await this.getGamesMetadata(games); //we don't actually care as long as it finishes - let details = await Promise.all(games.map((game) => game.getDetails())); - let combined = []; - //make sure the metadata gets included with the gamedata - for (let x in games) { - combined.push({ - file: games[x].dataValues, - metadata: details[x]?.dataValues, - }); + async matchAllMetadata() { + let games = await File.findAndCountAll({ + where: { + nongame: false, + }, + limit: 1000, + }); + for (let x in games) { + let game = games[x]; + let metadata = await Metadata.searchByText(game.filename, game.category); + if (metadata) { + await game.setDetails(metadata); + await metadata.addFile(game); + } + } + } + + async syncAllMetadata(retrying = false) { + try { + const timer = new Timer(); + if (!this.authorized) { + console.log( + "Twitch credentials are unavailable or invalid; metadata sync is unavailable." + ); + return; + } + console.log("Syncing all metadata..."); + let count = await this.getIGDBGamesCount(); + let pageSize = 500; + let pages = Math.ceil(count / pageSize); + let retryCount = 0; + for (let x = 0; x < pages; x++) { + if (retryCount == 5) continue; + singleLineStatus( + `Syncing metadata: ${x * 500} / ${count} ${( + ((x * 500) / count) * + 100 + ).toFixed(2)}% (${timer.elapsed()})` + ); + try { + let { data } = await this.client + .request("games") + .pipe( + limit(pageSize), + offset(x * pageSize), + fields(this.gameFields) + ) + .execute(); + for (let y in data) { + await this.addMetadataToDb(data[y]); + } + } catch (error) { + if (error.code === "ERR_BAD_RESPONSE") { + x--; + await this.sleep(1000); + retryCount++; + console.log( + `Retrieving metadata at offset ${ + x * 500 + } failed. Retry count: ${retryCount}` + ); + continue; + } + throw error; //hoist it up + } + retryCount = 0; } - return combined; } catch (error) { - console.error("Error getting metadata:", error); - return [] + if (error.code === "ERR_BAD_REQUEST" && !retrying) { + this.setupClient(); + return this.syncAllMetadata(true); + } + console.error("Error syncing all metadata:", error); } } @@ -223,23 +195,6 @@ export default class MetadataSearch { md = await Metadata.build( { id: metadata.id, - title: metadata.name, - - description: metadata.summary, - rating: metadata.total_rating, - coverartid: metadata.cover?.image_id, - releasedate: metadata.first_release_date - ? new Date(metadata.first_release_date * 1000) - : null, - genre: JSON.stringify(metadata.genres?.map((genre) => genre.name)), - gamemodes: JSON.stringify( - metadata.game_modes?.map((gm) => gm.name) - ), - platforms: JSON.stringify( - metadata.platforms?.map((platform) => platform.name) - ), - screenshots: JSON.stringify(metadata.screenshots?.map((ss) => ss.image_id)), - videos: JSON.stringify(metadata.videos?.map((v) => v.video_id)) }, { returning: true, @@ -248,17 +203,25 @@ export default class MetadataSearch { } ); } - //these don't work right unless I do them after the fact. - md.developers = JSON.stringify( - metadata.involved_companies - ?.filter((ic) => ic.developer) - ?.map((ic) => ic.company.name) - ); - md.publishers = JSON.stringify( - metadata.involved_companies - ?.filter((ic) => ic.publisher) - ?.map((ic) => ic.company.name) - ); + md.title = metadata.name; + + md.description = metadata.summary; + md.rating = metadata.total_rating; + md.coverartid = metadata.cover?.image_id; + md.releasedate = metadata.first_release_date + ? new Date(metadata.first_release_date * 1000) + : null; + md.genre = metadata.genres?.map((genre) => genre.name); + md.gamemodes = metadata.game_modes?.map((gm) => gm.name); + md.platforms = metadata.platforms?.map((platform) => this.platformMap[platform.name] || platform.name); + md.screenshots = metadata.screenshots?.map((ss) => ss.image_id); + md.videos = metadata.videos?.map((v) => v.video_id); + md.developers = metadata.involved_companies + ?.filter((ic) => ic.developer) + ?.map((ic) => ic.company.name); + md.publishers = metadata.involved_companies + ?.filter((ic) => ic.publisher) + ?.map((ic) => ic.company.name); let alternates = []; if (metadata.alternative_names) { alternates.push( @@ -276,12 +239,18 @@ export default class MetadataSearch { })) ); } + //this needs to remain json as we want the keys to be retained md.alternatetiles = JSON.stringify(alternates); await md.save(); - await game.setDetails(md); - await md.addFile(game); + if (game) { + await game.setDetails(md); + await md.addFile(game); + } } catch (error) { console.error("Error adding metadata:", error); } } + async sleep(delay) { + return new Promise((resolve) => setTimeout(resolve, delay)); + } } diff --git a/lib/models/metadata.js b/lib/models/metadata.js index 5fb7d93..a4d5b97 100644 --- a/lib/models/metadata.js +++ b/lib/models/metadata.js @@ -1,58 +1,105 @@ -import { DataTypes } from "sequelize" +import { DataTypes, DATE } from "sequelize"; export default function (sequelize) { - const Metadata = sequelize.define('Metadata', { - id: {//these will match the igdbid to make things a little easier - type: DataTypes.INTEGER, - primaryKey: true, + const Metadata = sequelize.define( + "Metadata", + { + id: { + //these will match the igdbid to make things a little easier + type: DataTypes.INTEGER, + primaryKey: true, + }, + title: { + type: DataTypes.STRING, + allowNull: false, + }, + alternatetitles: { + type: DataTypes.STRING(1024), + }, + description: { + type: DataTypes.STRING(16384), + }, + rating: { + type: DataTypes.STRING, + }, + coverartid: { + type: DataTypes.STRING, + }, + releasedate: { + type: DataTypes.DATEONLY, + }, + genre: { + type: DataTypes.ARRAY(DataTypes.STRING), + }, + developers: { + type: DataTypes.ARRAY(DataTypes.STRING), + }, + publishers: { + type: DataTypes.ARRAY(DataTypes.STRING), + }, + gamemodes: { + type: DataTypes.ARRAY(DataTypes.STRING), + }, + platforms: { + type: DataTypes.ARRAY(DataTypes.STRING), + }, + screenshots: { + type: DataTypes.ARRAY(DataTypes.STRING), + }, + videos: { + type: DataTypes.ARRAY(DataTypes.STRING), + }, + searchVector: { + type: DataTypes.TSVECTOR, + allowNull: true, + }, + }, + { + indexes: [ + { fields: ["title"] }, + { + name: "metadata_search_idx", + using: "gin", + fields: ["searchVector"], }, - title: { - type: DataTypes.STRING, - allowNull: false - }, - alternatetitles: { - type: DataTypes.STRING - }, - description: { - type: DataTypes.STRING(2048) - }, - rating: { - type: DataTypes.STRING - }, - coverartid: { - type: DataTypes.STRING - }, - releasedate: { - type: DataTypes.DATEONLY - }, - //anything that stores as json make the limit much higher - genre: { - type: DataTypes.STRING(2048) - }, - developers: { - type: DataTypes.STRING(2048) - }, - publishers: { - type: DataTypes.STRING(2048) - }, - gamemodes:{ - type: DataTypes.STRING(2048) - }, - platforms: { - type: DataTypes.STRING(2048) - }, - screenshots: { - type: DataTypes.STRING(2048) - }, - videos:{ - type: DataTypes.STRING(2048) - } - }, { - indexes: [ - { fields: ['title'] }, - { fields: ['description'] },//If this slows down the db may want to not index this. - ] - }) - - return Metadata -} \ No newline at end of file + ], + } + ); + + Metadata.beforeSave("addVector", async (instance) => { + const title = instance.title || ""; + const query = ` + SELECT to_tsvector('english', $1) + `; + const [results] = await sequelize.query(query, { + bind: [title], + raw: true, + }); + instance.searchVector = results[0].to_tsvector; + }); + + // Add a class method for full-text search + Metadata.searchByText = async function (searchQuery, platform, limit = 1) { + let platformClause = ""; + let limitClause = `limit ${limit}`; + if (platform) { + platformClause = `AND '${platform}' = ANY(platforms)`; + } + const query = ` + SELECT * FROM "Metadata" + WHERE "searchVector" @@ plainto_tsquery('english', :search) :platformClause + ORDER BY ts_rank("searchVector", plainto_tsquery('english', :search)) DESC :limit + `; + return await sequelize.query(query, { + model: Metadata, + replacements: { + search: searchQuery, + platformClause: platformClause, + limit: limitClause, + }, + type: sequelize.QueryTypes.SELECT, + }); + }; + + return Metadata; +}