—
– Created by: Takaaki Naganoya
– Created on: 2019/09/22
– Modified on: 2019/10/07
—
– Copyright © 2019 Piyomaru Software, All Rights Reserved
—
use AppleScript version "2.4"
use scripting additions
use framework "Foundation"
use framework "HTMLReader" –https://github.com/nolanw/HTMLReader
use aLib : script "arrayLib"
property NSUUID : a reference to current application’s NSUUID
property NSString : a reference to current application’s NSString
property HTMLDocument : a reference to current application’s HTMLDocument
property NSMutableArray : a reference to current application’s NSMutableArray
property NSJSONSerialization : a reference to current application’s NSJSONSerialization
set aTag to "table"
set indRes to getVisibleElementIndexList(aTag) of me
if indRes = false or indRes = {} then
display notification "No Visible Table in Web browser"
return
end if
tell application "Safari"
tell front document
set aSource to source
end tell
end tell
repeat with i in indRes
set inList to filterATableAndPaseCells(aSource, i, aTag) of me
if inList = false or inList = {} then return
set aUUID to current application’s NSUUID’s UUID()’s UUIDString() as text
set aNewFile to ((path to desktop) as string) & aUUID & ".csv"
saveAsCSV(inList, aNewFile) of me
tell application "Numbers"
activate
open (aNewFile as alias)
end tell
end repeat
on filterATableAndPaseCells(aSource as string, targInd as integer, aTag as string)
set aHTML to current application’s HTMLDocument’s documentWithString:(aSource as string)
–Table要素をリストアップ
set eList to (aHTML’s nodesMatchingSelector:aTag) as list
set aObj to contents of item (targInd + 1) of eList
–Count columns of Table Header (Count only)
set aTableHeader to (aObj’s nodesMatchingSelector:"tr")’s firstObject()
set hList to aTableHeader’s nodesMatchingSelector:"th"
set hStrList to {}
repeat with i1 in hList
set hCellStr to i1’s textContent() as string
set the end of hStrList to (hCellStr)
end repeat
set hLen to length of hStrList –count columns
–Acquire whole table body contents
set aTableBody to (aObj’s nodesMatchingSelector:"tbody")’s firstObject()
set bList to (aTableBody’s nodesMatchingSelector:"tr") as list
set rCount to (length of bList) –count rows
–行単位ループ
set yCount to 1
set attrList to make2DBlankArray(hLen, rCount) of aLib
repeat with i2 in bList
set bb2List to {}
set i3 to (i2’s nodesMatchingSelector:"th") as list
if i3 = {} then
set i3 to (i2’s nodesMatchingSelector:"td") as list
end if
–カラム単位ループ
set xCount to 1
repeat with i4 in i3
set anAttr to i4’s attributes()
set colAtr to (anAttr’s valueForKey:"colspan")
set rowAttr to (anAttr’s valueForKey:"rowspan")
set cellStr to i4’s textContent() as string
if colAtr is not equal to missing value then
–colspan処理
set colNum to colAtr as integer
set attrList to xFill(xCount, yCount, attrList, cellStr, colNum) of aLib
else if rowAttr is not equal to missing value then
–rowspan処理
set rowNum to rowAttr as integer
set attrList to yFill(xCount, yCount, attrList, cellStr, rowNum) of aLib
else if cellStr is not equal to "" then
–通常処理
repeat with ii from xCount to hLen
set aRes to getItemByXY(ii, yCount, attrList, "") of aLib
if aRes = "" then
set attrList to setItemByXY(ii, yCount, attrList, cellStr) of aLib
exit repeat
else
set xCount to xCount + 1
end if
end repeat
end if
set xCount to xCount + 1
end repeat
set yCount to yCount + 1
end repeat
return attrList
end filterATableAndPaseCells
–Safariのウィンドウ上で表示中のDOM Elementsを座標計算して返す
on getVisibleElementIndexList(aTag as string)
tell application "Safari"
set dCount to count every document
if dCount = 0 then return false
set jRes to do JavaScript "var winWidth = window.innerWidth,
winHeight = window.innerHeight,
winLeft = window.scrollX,
winTop = window.scrollY,
winBottom = winTop + winHeight,
winRight = winLeft + winWidth,
elementsArray = document.body.getElementsByTagName(’" & aTag & "’),
elemLen = elementsArray.length,
inView = [];
var step;
for (step = 0 ; step < elemLen ; step++) {
var tmpElem = document.body.getElementsByTagName(’" & aTag & "’)[step];
var bVar = tmpElem.getBoundingClientRect();
if (bVar.top > 0 && bVar.top < winHeight) {
inView.push(step);
}
}
JSON.stringify(inView);" in front document
set jList to parseJSONAsList(jRes) of me
return jList
end tell
end getVisibleElementIndexList
on parseJSONAsList(jsRes as string)
set jsonString to NSString’s stringWithString:jsRes
set jsonData to jsonString’s dataUsingEncoding:(current application’s NSUTF8StringEncoding)
set aJsonDict to NSJSONSerialization’s JSONObjectWithData:jsonData options:0 |error|:(missing value)
return aJsonDict as list
end parseJSONAsList
–Save 2D List to CSV file
on saveAsCSV(aList as list, aPath)
set crlfChar to (string id 13) & (string id 10)
set LF to (string id 10)
set wholeText to ""
repeat with i in aList
set newLine to {}
–Sanitize (Double Quote)
repeat with ii in i
set jj to ii as text
set kk to repChar(jj, string id 34, (string id 34) & (string id 34)) of me –Escape Double Quote
set the end of newLine to kk
end repeat
–Change Delimiter
set aLineText to ""
set curDelim to AppleScript’s text item delimiters
set AppleScript’s text item delimiters to "\",\""
set aLineList to newLine as text
set AppleScript’s text item delimiters to curDelim
set aLineText to repChar(aLineList, return, "") of me –delete return
set aLineText to repChar(aLineText, LF, "") of me –delete lf
set wholeText to wholeText & "\"" & aLineText & "\"" & crlfChar –line terminator: CR+LF
end repeat
if (aPath as string) does not end with ".csv" then
set bPath to aPath & ".csv" as Unicode text
else
set bPath to aPath as Unicode text
end if
writeToFileAsUTF8(wholeText, bPath, false) of me
end saveAsCSV
on writeToFileAsUTF8(this_data, target_file, append_data)
tell current application
try
set the target_file to the target_file as text
set the open_target_file to open for access file target_file with write permission
if append_data is false then set eof of the open_target_file to 0
write this_data as «class utf8» to the open_target_file starting at eof
close access the open_target_file
return true
on error error_message
try
close access file target_file
end try
return error_message
end try
end tell
end writeToFileAsUTF8
on repChar(origText as text, targChar as text, repChar as text)
set curDelim to AppleScript’s text item delimiters
set AppleScript’s text item delimiters to targChar
set tmpList to text items of origText
set AppleScript’s text item delimiters to repChar
set retText to tmpList as string
set AppleScript’s text item delimiters to curDelim
return retText
end repChar
配列に添字的なデータを指定してアクセス – AppleScriptの穴 says:
[…] 「Safariで現在見えている表を抽出してCSV書き出しv3」などの処理に使っています。多分、この処理は本ルーチンが存在していたからできたものです。 […]
2019年に書いた価値あるAppleScript – AppleScriptの穴 says:
[…] 2019年で一番価値の高いAppleScriptといえば、「Safariで現在見えている表を抽出してCSV書き出しv3」でしょう。Webブラウザ上に見えている範囲のオブジェクトだけを処理対象にする、という […]
bill says:
Crashes on any almanac.com table
Takaaki Naganoya says:
Is there any table there ?