-
Notifications
You must be signed in to change notification settings - Fork 297
/
scrapeHtml.js
93 lines (71 loc) · 2.03 KB
/
scrapeHtml.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
"use strict";
var fs = require("fs");
var path = require("path");
var htmls =
{
"<a href>": '<a href="fake.html">link</a>',
"<area href/>": '<area href="fake.html"/>',
"<audio src>": '<audio src="fake.ogg"></audio>',
"<blockquote cite>": '<blockquote cite="fake.html">quote</blockquote>',
"<del cite>": '<del cite="fake.html">deleted</del>',
"<embed src/>": '<embed src="fake.swf"/>',
"<form action>": '<form action="fake.html">fields</form>',
"<iframe longdesc>": '<iframe longdesc="fake.html"></iframe>',
"<iframe src>": '<iframe src="fake.html"></iframe>',
"<img longdesc/>": '<img longdesc="fake.html"/>',
"<img src/>": '<img src="fake.png"/>',
"<input src/>": '<input src="fake.png"/>',
"<ins cite>": '<ins cite="fake.html">inserted</ins>',
"<link href/>": '<link href="fake.css"/>',
"<menuitem icon/>": '<menuitem icon="fake.png"/>',
"<meta http-equiv=\"refresh\" content/>": '<meta http-equiv="refresh" content="5; url=fake.html"/>',
"<object data>": '<object data="fake.swf"></object>',
"<q cite>": '<q cite="fake.html">quote</q>',
"<script src>": '<script src="fake.js"></script>',
"<source src/>": '<source src="fake.ogg"/>',
"<track src/>": '<track src="fake.vtt"/>',
"<video src>": '<video src="fake.ogg"></video>'
};
function generate()
{
saveFile( path.normalize( __dirname + "/../json/scrapeHtml.json" ) );
}
function generateData()
{
var base,htmlBase,url;
var i;
var output = {};
for (i in htmls)
{
output[i] =
{
skipOrOnly: "skip",
html: htmls[i],
length: 1,
link:
{
url: { original:"" },
html:
{
selector: "",
tagName: "",
attrName: "",
tag: "",
text: ""
}
}
};
}
return output;
}
function generateString()
{
// Extra line break for unix/git
return JSON.stringify(generateData(), null, "\t") + "\n";
}
function saveFile(location)
{
fs.writeFileSync(location, generateString());
console.log("Written to: "+ location);
}
generate();