This is a read-only snapshot of the ComputerCraft forums, taken in April 2020.
Left4Cake's profile picture

Html reader of sorts.

Started by Left4Cake, 18 February 2013 - 04:07 PM
Left4Cake #1
Posted 18 February 2013 - 05:07 PM
Did this just for the heck of it. Its not that great but it works to an extent.
Its opens tArgs[1] (the first augment) on the computers.

Scroll up and down with either the arrows or W and S.
Press Backspace, or End to Quit
Enter Brings up a list of URLs, but Firebox (read below) or some other program is necessary to download the page.

You can combine this with Firebox to open web pages from the internet. Just change the edit variable in firebox to this program. and then run "Firebox http://put.somesite.here"

pastebin get nm5sT4KD html

Spoiler


--[[
Intializing
]]--

local tArgs = { ... }
veiw = "page"
shell.run("clear")
file = io.open( tArgs[1] )
split = function(s, pattern, maxsplit)
  local pattern = pattern or ' '
  local maxsplit = maxsplit or -1
  local s = s
  local t = {}
  local patsz = #pattern
  while maxsplit ~= 0 do
		local curpos = 1
		local found = string.find(s, pattern)
		if found ~= nil then
		  table.insert(t, string.sub(s, curpos, found - 1))
		  curpos = found + patsz
		  s = string.sub(s, curpos)
		else
		  table.insert(t, string.sub(s, curpos))
		  break
		end
		maxsplit = maxsplit - 1
		if maxsplit == 0 then
		  table.insert(t, string.sub(s, curpos - patsz - 1))
		end
  end
  return t
end


--[[
Reading Html
]]--

-- where to find links
herf = {"herf='(.-)'", "Herf='(.-)'", "HERF='(.-)'",
"herf=\"(.-)\"", "Href=\"(.-)\"", "HREF=\"(.-)\"",
"herf=(.-) ", "Href=(.-) ", "HREF=(.-) ",
"herf=(.-)>", "Href=(.-)>", "HREF=(.-)>",
"href='(.-)'", "Href='(.-)'", "HREF='(.-)'",
"href=\"(.-)\"", "Herf=\"(.-)\"", "HERF=\"(.-)\"",
"href=(.-)>", "Herf=(.-)>", "HERF=(.-)>",
"href=(.-) ", "Herf=(.-) ", "HERF=(.-) ",
"src='(.-)'", "Src='(.-)'", "SRC='(.-)'",
"src=\"(.-)\"", "Src=\"(.-)\"", "SRC=\"(.-)\"",
"src=(.-)>", "Src=(.-)>", "SRC=(.-)>",
"src=(.-) ", "Src=(.-) ", "SRC=(.-) ",
}

-- Tags to remove
remove = {"<!DOCTYPE(.-)>", "<!doctype(.-)>",
"<!--(.-)-->", "<!--(.-)-->",
"<html(.-)>", "<HTML(.-)>", "</html>", "</HTML>",
"<head>(.-)</head>", "<HEAD>(.-)</HEAD>",
"<body>", "<body(.-)>", "</body>", "<BODY>", "<BODY(.-)>", "</BODY>",
"<HEAD>(.-)</HEAD>",
"<form(.-)>", "</form>", "<FORM(.-)>", "</FORM>",
"<input(.-)>", "</input>", "<INPUT(.-)>", "</INPUT>",
"<script>(.-)</script>","<SCRIPT>(.-)</SCRIPT>",
"<script(.-)>", "</script>", "<SCRIPT(.-)>", "</SCRIPT>",
"<span(.-)>", "</span>", "<SPAN(.-)>", "</SPAN>",
"<div(.-)</div>", "<div(.-)>", "</div>",
"<Iframe(.-)</Iframe>", "<iframe(.-)</iframe>",
"<a(.-)>", "</a>", "<A(.-)>", "</A>",
"<TH>", "<TH(.-)>", "</TH>", "<th>", "<th(.-)>", "</th>",
"<img(.-)>", "</img>", "<IMG(.-)>", "</IMG>",
"<TD>", "<TD(.-)>", "</TD>", "<td>", "<td(.-)>", "</td>",
"<ol>", "<ol(.-)>", "</ol>", "<OL>", "<OL(.-)>", "</OL>",
"<sub>", "</sub>", "<SUB>", "</SUB>",
"</ul>", "</UL>",
"</NAV>", "</Nav>", "</nav>",
"<FOOTER>", "<FOOTER(.-)>", "</FOOTER>",
"<Footer>", "<Footer(.-)>", "</Footer>",
"<footer>", "<footer(.-)>", "</footer>",
}
-- Bullets
Bullets = { "<li>", "<LI>", "<li(.-)>", "<LI(.-)>" }

-- Tags to add spaces too
newline = {"<br(.-)>", "<BR(.-)>", "<br />", "<BR />", "<br/>", "<BR/>", "</br>", "</BR>",
"<h1>", "<h1(.-)>", "</h1>", "<H1>", "<H1(.-)>", "</H1>",
"<h2>", "<h2(.-)>", "</h2>", "<H2>", "<H2(.-)>", "</H2>",
"<h3>", "<h3(.-)>",  "</h3>", "<H3>", "<H3(.-)>", "</H3>",
"<h4>", "<h4(.-)>",  "</h4>", "<H4>", "<H4(.-)>", "</H4>",
"<h5>", "<h5(.-)>",  "</h5>", "<H5>", "<H5(.-)>", "</H5>",
"<HEADER>", "<HEADER(.-)>", "</HEADER>",
"<Header>", "<Header(.-)>", "</Header>",
"<header>", "<header(.-)>", "</header>",
"<ARTICLE>", "<ARTICLE(.-)>", "</ARTICLE>",
"<Article>", "<Article(.-)>", "</Article>",
"<article>", "<article(.-)>", "</article>",
"<p(.-)>", "</p>", "<P(.-)>", "</P>",
"<TR(.-)>", "</TR>", "<tr(.-)>", "</tr>",
"<TABLE(.-)>", "<Table(.-)>", "<table(.-)>", "</TABLE>", "</Table>", "</table>",
"<ul>", "<UL>", "</li>", "</LI>"
}

-- Predefing Variables
htmlpage = ""
links = {"None"}
viewpage = {}
center={}
hr={}
centerenabled = false

-- Reading filedata
while true do
filedata = file:read()
if filedata then
  htmlpage = htmlpage .. filedata
else break end
end
file:close()

-- Get all outgoing links
i = 1
ia = 1

j = 1
while herf[j] do
while string.find(htmlpage, herf[j]) do
links[ia] = string.match(htmlpage, herf[j])
htmlpage = string.gsub(htmlpage, herf[j], "", 1)
ia = ia + 1
end
j = j + 1
end


-- Remove useless tags
i = 1
while remove[i] do
while string.find(htmlpage, remove[i]) do
htmlpage = string.gsub(htmlpage, remove[i], "")
end
i = i + 1
end

-- HTML Entities - Spaces
while string.find(htmlpage, "&amp;#160;") do
htmlpage = string.gsub(htmlpage, "&amp;#160;", " ")
end
while string.find(htmlpage, "&amp;nbsp;") do
htmlpage = string.gsub(htmlpage, "&amp;nbsp;", " ")
end

-- HTML Entities - inverted exclamation mark
while string.find(htmlpage, "&amp;#161;") do
htmlpage = string.gsub(htmlpage, "&amp;#161;", "¡")
end
while string.find(htmlpage, "&amp;iexcl;") do
htmlpage = string.gsub(htmlpage, "&amp;iexcl;", " ")
end

-- HTML Entities - right pointing guillemet
while string.find(htmlpage, "&amp;raquo;") do
htmlpage = string.gsub(htmlpage, "&amp;raquo;", ">>")
end

while string.find(htmlpage, "&amp;#169;") do
htmlpage = string.gsub(htmlpage, "&amp;#169;", "(c)")
end
while string.find(htmlpage, "&amp;copy;") do
htmlpage = string.gsub(htmlpage, "&amp;copy;", "(c)")
end

-- Reduce Spaces
while string.find(htmlpage, "  ") do
htmlpage = string.gsub(htmlpage, "  ", " ")
end
while string.find(htmlpage, " ") do
htmlpage = string.gsub(htmlpage, " ", " ")
end


-- Add Bullet points
i = 1
while Bullets[i] do
while string.find(htmlpage, Bullets[i]) do
htmlpage = string.gsub(htmlpage, Bullets[i], " * ")
end
i = i + 1
end

-- adding new line where needed
i = 1
while newline[i] do
htmlpage = string.gsub(htmlpage, newline[i], " ?!@ ")
i = i + 1
end

viewpage = split(htmlpage, "?!@")
i = 1
while viewpage[i] do

-- Style Tags


--center
donotdeisablecenter = true
if string.find(string.lower(viewpage[i]), "<center>") then
  centerenabled = true
  donotdeisablecenter = false
  viewpage[i] = string.gsub(viewpage[i], "<center>", "")
  viewpage[i] = string.gsub(viewpage[i], "<Center>", "")
  viewpage[i] = string.gsub(viewpage[i], "<CENTER>", "")
end
if string.find(string.lower(viewpage[i]), "</center>") then
  if donotdeisablecenter then centerenabled = false end
  viewpage[i] = string.gsub(viewpage[i], "</center>", "")
  viewpage[i] = string.gsub(viewpage[i], "</Center>", "")
  viewpage[i] = string.gsub(viewpage[i], "</CENTER>", "")
end

if centerenabled then
  center[i] = 1
else
  center[i] = false
end

--Horzonle Line
if string.find(string.lower(viewpage[i]), "<hr>") then
  viewpage[i] = string.gsub(viewpage[i], "<hr>", "")
  viewpage[i] = string.gsub(viewpage[i], "<Hr>", "")
  viewpage[i] = string.gsub(viewpage[i], "<HR>", "")
  hr[i] = true
else
  hr[i] = false
end


i = i + 1
end


--[[
Displaying Result
]]--

i = 0
while true do

-- Displaying Page


if veiw == "page" then
shell.run("clear")
y = 1
j = i + y
w,h = term.getSize()
while viewpage[j] and y <= h do

--set x to center
if center[j] == 1 then
				  x = math.ceil((w / 2) - (viewpage[j]:len() / 2))
				  if x < 1 then
					   x = 1
				  end
else
-- not center
	   x = 1
end
term.setCursorPos(x,y)
term.clearLine()
-- Draw Horizontle line
if hr[j] then
hrX = 1
while hrX <= w do
term.setCursorPos(hrX,y)
write("-")
hrX = hrX + 1
end
term.setCursorPos(x,y + 1)
term.clearLine()
end
-- Line wrap handaling
stringSize = string.len(viewpage[j])
while stringSize > w do
stringSize = stringSize - w
y = y + 1
end
--print text
write(viewpage[j])
  y = y + 1
  j = i + y
end
local evt, arg1, arg2 = os.pullEvent()
if arg1 == 17 or arg1 == 200 then
if viewpage[i + 21] then
  i = i + 1
end
elseif arg1 == 31 or arg1 == 208 then
if viewpage[i - 1] then
  i = i - 1
end
elseif arg1 == 14 or arg1 == 207 then
shell.run("clear")
break
elseif arg1 == 28 and links[1] ~= "None" then
veiw = "links"
end

-- Displaying Links
elseif veiw == "links" then
shell.run("clear")
print("Use the arrows and enter to chose.")
j=0
max=-1
while true do
j=j+1
max=max+1
if links[j]==njl then break end
end
pointer = 1
term.setCursorPos(1,10)
term.clearLine(none)
print("->"..pointer..":"..links[pointer])
while true do

list=1
while list < 10 do
term.setCursorPos(1,10-list)
term.clearLine(none)
if links[pointer - (list)] then
write(""..pointer - (list)..":"..links[pointer - (list)])
end
list=list+1
end
term.setCursorPos(1,10)
selectedItemString = "->"..pointer..":"..links[pointer] .. "<-"
if string.len(selectedItemString) > w then
diffrenct = string.len(selectedItemString) - w
selectedItemString = string.sub(selectedItemString, diffrenct)

end
term.clearLine(none)
print(selectedItemString)
list=1
while list< 10 do
term.setCursorPos(1,10+list)
term.clearLine(none)
if links[pointer + (list)] then
write(""..pointer + (list)..":".. links[pointer + (list)])
end
list=list+1
end
local evt,arg1,arg2 = os.pullEvent()
if arg1 == 14 or arg1 == 207 then veiw = "page" break end
if arg1 == 200 and pointer > 1 then
pointer = pointer - 1
elseif arg1 == 208 and pointer < max then
pointer = pointer + 1
end
if arg1 == 28 then veiw = "none" break end

end


else
shell.run("clear")
shell.run("Firebox", links[pointer])
break
end


end
tesla1889 #2
Posted 18 February 2013 - 05:57 PM
that's extremely useful. kinda similar to one of my projects, but props for using the actual HTML :)/>
Left4Cake #3
Posted 19 February 2013 - 04:38 AM
It still need tweaked a bunch. My hope was that it could be used to at least read text of most web sites, and I would love to some how work out anchor handling but don't get your hopes up.
Mailmanq! #4
Posted 19 February 2013 - 05:00 AM
Amazing! I wanted a way to read just the text from websites!
Left4Cake #5
Posted 12 April 2013 - 08:45 AM
Update the code a bit.

More tag handaling
A new form of tag handling the places bullet points
Starting working on handling line that are longer then the screen is wide.
Now you an exit the program.
FuuuAInfiniteLoop(F.A.I.L) #6
Posted 12 April 2013 - 01:27 PM
I made one also and a XML parser they are on my signature, you can improve your from mine, i dont mind
MudkipTheEpic #7
Posted 12 April 2013 - 01:54 PM
I thought you just redistributed yours from someone else's urielsalis. Or am I wrong?
Left4Cake #8
Posted 13 April 2013 - 06:10 AM
I made one also and a XML parser they are on my signature, you can improve your from mine, i dont mind

I will keep that in mind.

Also I do get annoyed when people post about a link in their signature. So many Google searches let me to people who post that but changed their signature since the post.

I thought you just redistributed yours from someone else's urielsalis. Or am I wrong?

Yeah, he did port it from somewhere else, he dose give credit in his post.


[Original post here.]

Update: Tweek the tags a bit, and starting working on url handling.
FuuuAInfiniteLoop(F.A.I.L) #9
Posted 13 April 2013 - 10:07 AM
I thought you just redistributed yours from someone else's urielsalis. Or am I wrong?
I rewrite something things from that, i believe that the tables and some functions are the only unchanged(or i added some things to the table?)