diccionario del 19 de marzo del 2026
This commit is contained in:
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
|||||||
|
node_modules
|
||||||
9
README.md
Normal file
9
README.md
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
# Léxico Botánico de La Ventosa en formato JSON
|
||||||
|
|
||||||
|
Este es una compilación en formato JSON del léxico publicado en
|
||||||
|
```
|
||||||
|
Pérez Báez, Gabriela and Kaufman, Terrence. 2019. La Ventosa Diidxazá Lexico-Botanical Dictionary.
|
||||||
|
Dictionaria 9. 1-952. DOI: 10.5281/zenodo.14024635 (Available online at https://dictionaria.clld.org/contributions/diidxaza, Accessed on 2026-03-19.)
|
||||||
|
```
|
||||||
|
|
||||||
|
Se han extraído los campos: "headword", "Spanish" y "Diidxazá" para exportarlos a `diccionario.json` con los campos "diidxaza", "castellano" y "descripcion", respectivamente.
|
||||||
4762
diccionario.json
Normal file
4762
diccionario.json
Normal file
File diff suppressed because it is too large
Load Diff
2205
html/a.html
Normal file
2205
html/a.html
Normal file
File diff suppressed because it is too large
Load Diff
2380
html/b.html
Normal file
2380
html/b.html
Normal file
File diff suppressed because it is too large
Load Diff
1873
html/c.html
Normal file
1873
html/c.html
Normal file
File diff suppressed because it is too large
Load Diff
2056
html/d.html
Normal file
2056
html/d.html
Normal file
File diff suppressed because it is too large
Load Diff
2063
html/e.html
Normal file
2063
html/e.html
Normal file
File diff suppressed because it is too large
Load Diff
1937
html/f.html
Normal file
1937
html/f.html
Normal file
File diff suppressed because it is too large
Load Diff
1728
html/g.html
Normal file
1728
html/g.html
Normal file
File diff suppressed because it is too large
Load Diff
1982
html/h.html
Normal file
1982
html/h.html
Normal file
File diff suppressed because it is too large
Load Diff
2450
html/i.html
Normal file
2450
html/i.html
Normal file
File diff suppressed because it is too large
Load Diff
1137
html/j.html
Normal file
1137
html/j.html
Normal file
File diff suppressed because it is too large
Load Diff
10
package.json
Normal file
10
package.json
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
{
|
||||||
|
"name": "lexico-botanico-laventosa",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"main": "index.js",
|
||||||
|
"author": "Raymundo Vásquez Ruiz <raymundo.vr@protonmail.com>",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"cheerio": "^1.2.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
33
src/index.js
Normal file
33
src/index.js
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
const cheerio = require("cheerio");
|
||||||
|
const { readdir, readFile, writeFile } = require("fs/promises");
|
||||||
|
const PATH = "./html";
|
||||||
|
(async () => {
|
||||||
|
const files = await readdir(PATH);
|
||||||
|
const diccionario = [];
|
||||||
|
for (const f of files) {
|
||||||
|
try {
|
||||||
|
const archDesc = await readFile(`${PATH}/${f}`, "utf-8");
|
||||||
|
const $ = cheerio.loadBuffer(archDesc);
|
||||||
|
const trows = $("table > tbody").children("tr");
|
||||||
|
for (let i = 0; i < trows.length; i++) {
|
||||||
|
const tds = $(trows[i]).find("td");
|
||||||
|
if (tds.length !== 6) {
|
||||||
|
console.warn("Esta fila no tiene 6 columnas:", i);
|
||||||
|
}
|
||||||
|
const [_b, d0, _s, _e, c, d1] = tds;
|
||||||
|
const diidxaza = $(d0).find("a").attr("title");
|
||||||
|
const castellano = $(c).find("span").text().replace(/\s\s+/g, ' ');
|
||||||
|
const descripcion = $(d1).find("span").text().replace(/\s\s+/g, ' ');
|
||||||
|
diccionario.push({
|
||||||
|
diidxaza,
|
||||||
|
castellano,
|
||||||
|
descripcion
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
console.error(`No se puede exportar ${f}:`, err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
await writeFile("./diccionario.json", JSON.stringify(diccionario));
|
||||||
|
console.log("Se escribieron", diccionario.length, "entradas en el diccionario.")
|
||||||
|
})();
|
||||||
174
yarn.lock
Normal file
174
yarn.lock
Normal file
@@ -0,0 +1,174 @@
|
|||||||
|
# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.
|
||||||
|
# yarn lockfile v1
|
||||||
|
|
||||||
|
|
||||||
|
boolbase@^1.0.0:
|
||||||
|
version "1.0.0"
|
||||||
|
resolved "https://registry.yarnpkg.com/boolbase/-/boolbase-1.0.0.tgz#68dff5fbe60c51eb37725ea9e3ed310dcc1e776e"
|
||||||
|
integrity sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==
|
||||||
|
|
||||||
|
cheerio-select@^2.1.0:
|
||||||
|
version "2.1.0"
|
||||||
|
resolved "https://registry.yarnpkg.com/cheerio-select/-/cheerio-select-2.1.0.tgz#4d8673286b8126ca2a8e42740d5e3c4884ae21b4"
|
||||||
|
integrity sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==
|
||||||
|
dependencies:
|
||||||
|
boolbase "^1.0.0"
|
||||||
|
css-select "^5.1.0"
|
||||||
|
css-what "^6.1.0"
|
||||||
|
domelementtype "^2.3.0"
|
||||||
|
domhandler "^5.0.3"
|
||||||
|
domutils "^3.0.1"
|
||||||
|
|
||||||
|
cheerio@^1.2.0:
|
||||||
|
version "1.2.0"
|
||||||
|
resolved "https://registry.yarnpkg.com/cheerio/-/cheerio-1.2.0.tgz#f23b777c49021ead7475dcf3390d3535a7f896d6"
|
||||||
|
integrity sha512-WDrybc/gKFpTYQutKIK6UvfcuxijIZfMfXaYm8NMsPQxSYvf+13fXUJ4rztGGbJcBQ/GF55gvrZ0Bc0bj/mqvg==
|
||||||
|
dependencies:
|
||||||
|
cheerio-select "^2.1.0"
|
||||||
|
dom-serializer "^2.0.0"
|
||||||
|
domhandler "^5.0.3"
|
||||||
|
domutils "^3.2.2"
|
||||||
|
encoding-sniffer "^0.2.1"
|
||||||
|
htmlparser2 "^10.1.0"
|
||||||
|
parse5 "^7.3.0"
|
||||||
|
parse5-htmlparser2-tree-adapter "^7.1.0"
|
||||||
|
parse5-parser-stream "^7.1.2"
|
||||||
|
undici "^7.19.0"
|
||||||
|
whatwg-mimetype "^4.0.0"
|
||||||
|
|
||||||
|
css-select@^5.1.0:
|
||||||
|
version "5.2.2"
|
||||||
|
resolved "https://registry.yarnpkg.com/css-select/-/css-select-5.2.2.tgz#01b6e8d163637bb2dd6c982ca4ed65863682786e"
|
||||||
|
integrity sha512-TizTzUddG/xYLA3NXodFM0fSbNizXjOKhqiQQwvhlspadZokn1KDy0NZFS0wuEubIYAV5/c1/lAr0TaaFXEXzw==
|
||||||
|
dependencies:
|
||||||
|
boolbase "^1.0.0"
|
||||||
|
css-what "^6.1.0"
|
||||||
|
domhandler "^5.0.2"
|
||||||
|
domutils "^3.0.1"
|
||||||
|
nth-check "^2.0.1"
|
||||||
|
|
||||||
|
css-what@^6.1.0:
|
||||||
|
version "6.2.2"
|
||||||
|
resolved "https://registry.yarnpkg.com/css-what/-/css-what-6.2.2.tgz#cdcc8f9b6977719fdfbd1de7aec24abf756b9dea"
|
||||||
|
integrity sha512-u/O3vwbptzhMs3L1fQE82ZSLHQQfto5gyZzwteVIEyeaY5Fc7R4dapF/BvRoSYFeqfBk4m0V1Vafq5Pjv25wvA==
|
||||||
|
|
||||||
|
dom-serializer@^2.0.0:
|
||||||
|
version "2.0.0"
|
||||||
|
resolved "https://registry.yarnpkg.com/dom-serializer/-/dom-serializer-2.0.0.tgz#e41b802e1eedf9f6cae183ce5e622d789d7d8e53"
|
||||||
|
integrity sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==
|
||||||
|
dependencies:
|
||||||
|
domelementtype "^2.3.0"
|
||||||
|
domhandler "^5.0.2"
|
||||||
|
entities "^4.2.0"
|
||||||
|
|
||||||
|
domelementtype@^2.3.0:
|
||||||
|
version "2.3.0"
|
||||||
|
resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-2.3.0.tgz#5c45e8e869952626331d7aab326d01daf65d589d"
|
||||||
|
integrity sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==
|
||||||
|
|
||||||
|
domhandler@^5.0.2, domhandler@^5.0.3:
|
||||||
|
version "5.0.3"
|
||||||
|
resolved "https://registry.yarnpkg.com/domhandler/-/domhandler-5.0.3.tgz#cc385f7f751f1d1fc650c21374804254538c7d31"
|
||||||
|
integrity sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==
|
||||||
|
dependencies:
|
||||||
|
domelementtype "^2.3.0"
|
||||||
|
|
||||||
|
domutils@^3.0.1, domutils@^3.2.2:
|
||||||
|
version "3.2.2"
|
||||||
|
resolved "https://registry.yarnpkg.com/domutils/-/domutils-3.2.2.tgz#edbfe2b668b0c1d97c24baf0f1062b132221bc78"
|
||||||
|
integrity sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw==
|
||||||
|
dependencies:
|
||||||
|
dom-serializer "^2.0.0"
|
||||||
|
domelementtype "^2.3.0"
|
||||||
|
domhandler "^5.0.3"
|
||||||
|
|
||||||
|
encoding-sniffer@^0.2.1:
|
||||||
|
version "0.2.1"
|
||||||
|
resolved "https://registry.yarnpkg.com/encoding-sniffer/-/encoding-sniffer-0.2.1.tgz#396ec97ac22ce5a037ba44af1992ac9d46a7b819"
|
||||||
|
integrity sha512-5gvq20T6vfpekVtqrYQsSCFZ1wEg5+wW0/QaZMWkFr6BqD3NfKs0rLCx4rrVlSWJeZb5NBJgVLswK/w2MWU+Gw==
|
||||||
|
dependencies:
|
||||||
|
iconv-lite "^0.6.3"
|
||||||
|
whatwg-encoding "^3.1.1"
|
||||||
|
|
||||||
|
entities@^4.2.0:
|
||||||
|
version "4.5.0"
|
||||||
|
resolved "https://registry.yarnpkg.com/entities/-/entities-4.5.0.tgz#5d268ea5e7113ec74c4d033b79ea5a35a488fb48"
|
||||||
|
integrity sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==
|
||||||
|
|
||||||
|
entities@^6.0.0:
|
||||||
|
version "6.0.1"
|
||||||
|
resolved "https://registry.yarnpkg.com/entities/-/entities-6.0.1.tgz#c28c34a43379ca7f61d074130b2f5f7020a30694"
|
||||||
|
integrity sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g==
|
||||||
|
|
||||||
|
entities@^7.0.1:
|
||||||
|
version "7.0.1"
|
||||||
|
resolved "https://registry.yarnpkg.com/entities/-/entities-7.0.1.tgz#26e8a88889db63417dcb9a1e79a3f1bc92b5976b"
|
||||||
|
integrity sha512-TWrgLOFUQTH994YUyl1yT4uyavY5nNB5muff+RtWaqNVCAK408b5ZnnbNAUEWLTCpum9w6arT70i1XdQ4UeOPA==
|
||||||
|
|
||||||
|
htmlparser2@^10.1.0:
|
||||||
|
version "10.1.0"
|
||||||
|
resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-10.1.0.tgz#fe3f2e12c73b6e462d4e10395db9c1119e4d6ae4"
|
||||||
|
integrity sha512-VTZkM9GWRAtEpveh7MSF6SjjrpNVNNVJfFup7xTY3UpFtm67foy9HDVXneLtFVt4pMz5kZtgNcvCniNFb1hlEQ==
|
||||||
|
dependencies:
|
||||||
|
domelementtype "^2.3.0"
|
||||||
|
domhandler "^5.0.3"
|
||||||
|
domutils "^3.2.2"
|
||||||
|
entities "^7.0.1"
|
||||||
|
|
||||||
|
iconv-lite@0.6.3, iconv-lite@^0.6.3:
|
||||||
|
version "0.6.3"
|
||||||
|
resolved "https://registry.yarnpkg.com/iconv-lite/-/iconv-lite-0.6.3.tgz#a52f80bf38da1952eb5c681790719871a1a72501"
|
||||||
|
integrity sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==
|
||||||
|
dependencies:
|
||||||
|
safer-buffer ">= 2.1.2 < 3.0.0"
|
||||||
|
|
||||||
|
nth-check@^2.0.1:
|
||||||
|
version "2.1.1"
|
||||||
|
resolved "https://registry.yarnpkg.com/nth-check/-/nth-check-2.1.1.tgz#c9eab428effce36cd6b92c924bdb000ef1f1ed1d"
|
||||||
|
integrity sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==
|
||||||
|
dependencies:
|
||||||
|
boolbase "^1.0.0"
|
||||||
|
|
||||||
|
parse5-htmlparser2-tree-adapter@^7.1.0:
|
||||||
|
version "7.1.0"
|
||||||
|
resolved "https://registry.yarnpkg.com/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.1.0.tgz#b5a806548ed893a43e24ccb42fbb78069311e81b"
|
||||||
|
integrity sha512-ruw5xyKs6lrpo9x9rCZqZZnIUntICjQAd0Wsmp396Ul9lN/h+ifgVV1x1gZHi8euej6wTfpqX8j+BFQxF0NS/g==
|
||||||
|
dependencies:
|
||||||
|
domhandler "^5.0.3"
|
||||||
|
parse5 "^7.0.0"
|
||||||
|
|
||||||
|
parse5-parser-stream@^7.1.2:
|
||||||
|
version "7.1.2"
|
||||||
|
resolved "https://registry.yarnpkg.com/parse5-parser-stream/-/parse5-parser-stream-7.1.2.tgz#d7c20eadc37968d272e2c02660fff92dd27e60e1"
|
||||||
|
integrity sha512-JyeQc9iwFLn5TbvvqACIF/VXG6abODeB3Fwmv/TGdLk2LfbWkaySGY72at4+Ty7EkPZj854u4CrICqNk2qIbow==
|
||||||
|
dependencies:
|
||||||
|
parse5 "^7.0.0"
|
||||||
|
|
||||||
|
parse5@^7.0.0, parse5@^7.3.0:
|
||||||
|
version "7.3.0"
|
||||||
|
resolved "https://registry.yarnpkg.com/parse5/-/parse5-7.3.0.tgz#d7e224fa72399c7a175099f45fc2ad024b05ec05"
|
||||||
|
integrity sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw==
|
||||||
|
dependencies:
|
||||||
|
entities "^6.0.0"
|
||||||
|
|
||||||
|
"safer-buffer@>= 2.1.2 < 3.0.0":
|
||||||
|
version "2.1.2"
|
||||||
|
resolved "https://registry.yarnpkg.com/safer-buffer/-/safer-buffer-2.1.2.tgz#44fa161b0187b9549dd84bb91802f9bd8385cd6a"
|
||||||
|
integrity sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==
|
||||||
|
|
||||||
|
undici@^7.19.0:
|
||||||
|
version "7.24.4"
|
||||||
|
resolved "https://registry.yarnpkg.com/undici/-/undici-7.24.4.tgz#873bce680d7c6354c941399fd4e8ea4563de4ea7"
|
||||||
|
integrity sha512-BM/JzwwaRXxrLdElV2Uo6cTLEjhSb3WXboncJamZ15NgUURmvlXvxa6xkwIOILIjPNo9i8ku136ZvWV0Uly8+w==
|
||||||
|
|
||||||
|
whatwg-encoding@^3.1.1:
|
||||||
|
version "3.1.1"
|
||||||
|
resolved "https://registry.yarnpkg.com/whatwg-encoding/-/whatwg-encoding-3.1.1.tgz#d0f4ef769905d426e1688f3e34381a99b60b76e5"
|
||||||
|
integrity sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==
|
||||||
|
dependencies:
|
||||||
|
iconv-lite "0.6.3"
|
||||||
|
|
||||||
|
whatwg-mimetype@^4.0.0:
|
||||||
|
version "4.0.0"
|
||||||
|
resolved "https://registry.yarnpkg.com/whatwg-mimetype/-/whatwg-mimetype-4.0.0.tgz#bc1bf94a985dc50388d54a9258ac405c3ca2fc0a"
|
||||||
|
integrity sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==
|
||||||
Reference in New Issue
Block a user