]> git.r.bdr.sh - rbdr/prompt/blame - bin/wordnet_to_json.js
Version the corpus
[rbdr/prompt] / bin / wordnet_to_json.js
CommitLineData
38431710
RBR
1#!/usr/bin/env node
2
3import { basename, join } from 'path';
4import { mkdir, readFile, writeFile } from 'fs/promises';
5import { debuglog } from 'util';
6
7const internals = {
8 kParseRegex: /[0-9]+-(?<type>[anrv])\s+lemma\s+(?<term>.*)/,
9 kTermMapping: {
10 a: 'adjectives',
11 n: 'nouns',
12 v: 'verbs',
13 r: 'adverbs'
14 },
15
16 debug: debuglog('wordnet_to_json'),
17
18 // Validates that inputs are present
19
20 validateInputs() {
21 if (process.argv.length < 4) {
22 internals.printUsage();
23 throw new Error(`Insufficient arguments, expected 2, found ${process.argv.length - 2}`);
24 }
25 },
26
27 // Prints the usage of the program
28
29 printUsage() {
30
31 console.error('Usage:');
32 console.error(`${basename(process.argv[1])} <path/to/wordnet_file.tab> <path/to/data/dir>`);
33 },
34
35 // Loads the contents of a wordnet tab file.
36
37 async load(pathToTab) {
38
39 try {
40 return await readFile(pathToTab, { encoding: 'utf8' });
41 }
42 catch (error) {
43 internals.debug(error.stack);
44 throw new Error(`Could not read tab file at ${pathToTab}`);
45 }
46 },
47
48 // Parses a wordner tab file and turns it into a JSON structure
49
50 async parse(wordnetTab) {
51
52 try {
53 return wordnetTab.split('\n').reduce((parsedTerms, currentTerm) => {
54 const matches = currentTerm.match(internals.kParseRegex);
55 if (matches) {
56 parsedTerms[internals.kTermMapping[matches.groups.type]].push(matches.groups.term);
57 }
58 return parsedTerms;
59 }, {
60 adjectives: [],
61 nouns: [],
62 verbs: [],
63 adverbs: []
64 });
65 }
66 catch (error) {
67 internals.debug(error.stack);
68 throw new Error('Could not parse wordnet data.');
69 }
70 },
71
72 async write(parsedTerms, dataDirectory) {
73
74 await internals.createDataDirectory(dataDirectory);
75 try {
76 for (const [type, terms] of Object.entries(parsedTerms)) {
77 const targetFile = join(dataDirectory, `${type}.json`);
78 await writeFile(targetFile, JSON.stringify(terms, null, 2));
79 }
80 }
81 catch (error) {
82 internals.debug(error.stack);
83 throw new Error(`Could not write wordnet data to ${dataDirectory}.`);
84 }
85 },
86
87 async createDataDirectory(dataDirectory) {
88
89 try {
90 await mkdir(dataDirectory, { recursive: true });
91 }
92 catch (error) {
93 internals.debug(error.stack);
94 throw new Error(`Could not create data directory at ${dataDirectory}`);
95 }
96 },
97
98 async run() {
99
100 internals.validateInputs();
101 const wordnetTab = await internals.load(process.argv[2]);
102 const parsedTerms = await internals.parse(wordnetTab);
103 await internals.write(parsedTerms, process.argv[3]);
104 }
105};
106
107internals.run()
108 .then(() => process.exit(0))
109 .catch((err) => {
110
111 console.error(err.message || err);
112 process.exit(1);
113 });