]>
Commit | Line | Data |
---|---|---|
38431710 RBR |
1 | #!/usr/bin/env node |
2 | ||
3 | import { basename, join } from 'path'; | |
4 | import { mkdir, readFile, writeFile } from 'fs/promises'; | |
5 | import { debuglog } from 'util'; | |
6 | ||
7 | const internals = { | |
8 | kParseRegex: /[0-9]+-(?<type>[anrv])\s+lemma\s+(?<term>.*)/, | |
9 | kTermMapping: { | |
10 | a: 'adjectives', | |
11 | n: 'nouns', | |
12 | v: 'verbs', | |
13 | r: 'adverbs' | |
14 | }, | |
15 | ||
16 | debug: debuglog('wordnet_to_json'), | |
17 | ||
18 | // Validates that inputs are present | |
19 | ||
20 | validateInputs() { | |
21 | if (process.argv.length < 4) { | |
22 | internals.printUsage(); | |
23 | throw new Error(`Insufficient arguments, expected 2, found ${process.argv.length - 2}`); | |
24 | } | |
25 | }, | |
26 | ||
27 | // Prints the usage of the program | |
28 | ||
29 | printUsage() { | |
30 | ||
31 | console.error('Usage:'); | |
32 | console.error(`${basename(process.argv[1])} <path/to/wordnet_file.tab> <path/to/data/dir>`); | |
33 | }, | |
34 | ||
35 | // Loads the contents of a wordnet tab file. | |
36 | ||
37 | async load(pathToTab) { | |
38 | ||
39 | try { | |
40 | return await readFile(pathToTab, { encoding: 'utf8' }); | |
41 | } | |
42 | catch (error) { | |
43 | internals.debug(error.stack); | |
44 | throw new Error(`Could not read tab file at ${pathToTab}`); | |
45 | } | |
46 | }, | |
47 | ||
48 | // Parses a wordner tab file and turns it into a JSON structure | |
49 | ||
50 | async parse(wordnetTab) { | |
51 | ||
52 | try { | |
53 | return wordnetTab.split('\n').reduce((parsedTerms, currentTerm) => { | |
54 | const matches = currentTerm.match(internals.kParseRegex); | |
55 | if (matches) { | |
56 | parsedTerms[internals.kTermMapping[matches.groups.type]].push(matches.groups.term); | |
57 | } | |
58 | return parsedTerms; | |
59 | }, { | |
60 | adjectives: [], | |
61 | nouns: [], | |
62 | verbs: [], | |
63 | adverbs: [] | |
64 | }); | |
65 | } | |
66 | catch (error) { | |
67 | internals.debug(error.stack); | |
68 | throw new Error('Could not parse wordnet data.'); | |
69 | } | |
70 | }, | |
71 | ||
72 | async write(parsedTerms, dataDirectory) { | |
73 | ||
74 | await internals.createDataDirectory(dataDirectory); | |
75 | try { | |
76 | for (const [type, terms] of Object.entries(parsedTerms)) { | |
77 | const targetFile = join(dataDirectory, `${type}.json`); | |
78 | await writeFile(targetFile, JSON.stringify(terms, null, 2)); | |
79 | } | |
80 | } | |
81 | catch (error) { | |
82 | internals.debug(error.stack); | |
83 | throw new Error(`Could not write wordnet data to ${dataDirectory}.`); | |
84 | } | |
85 | }, | |
86 | ||
87 | async createDataDirectory(dataDirectory) { | |
88 | ||
89 | try { | |
90 | await mkdir(dataDirectory, { recursive: true }); | |
91 | } | |
92 | catch (error) { | |
93 | internals.debug(error.stack); | |
94 | throw new Error(`Could not create data directory at ${dataDirectory}`); | |
95 | } | |
96 | }, | |
97 | ||
98 | async run() { | |
99 | ||
100 | internals.validateInputs(); | |
101 | const wordnetTab = await internals.load(process.argv[2]); | |
102 | const parsedTerms = await internals.parse(wordnetTab); | |
103 | await internals.write(parsedTerms, process.argv[3]); | |
104 | } | |
105 | }; | |
106 | ||
107 | internals.run() | |
108 | .then(() => process.exit(0)) | |
109 | .catch((err) => { | |
110 | ||
111 | console.error(err.message || err); | |
112 | process.exit(1); | |
113 | }); |