aboutsummaryrefslogtreecommitdiff
path: root/vis.lua
diff options
context:
space:
mode:
authorMarc André Tanner <mat@brain-dump.org>2016-12-03 17:01:25 +0100
committerMarc André Tanner <mat@brain-dump.org>2016-12-03 20:03:44 +0100
commit52cff9b8a81f641403df55cf5e081f6f1212d2fd (patch)
tree167bf61f4dae3da5261b33757ece8c70c0d7cf44 /vis.lua
parent8941f1138521317839e74dc55df275d584dc4350 (diff)
downloadvis-52cff9b8a81f641403df55cf5e081f6f1212d2fd.tar.gz
vis-52cff9b8a81f641403df55cf5e081f6f1212d2fd.tar.xz
vis: use file(1) for file type detection
File type detection works as follows: 1) strip off suffixes to be ignored and test against a set of known file extensions 2) run `file -bL --mime-type` and check against a set of known mime types 3) read out the first few bytes of the file and pass them to custom Lua file type detection functions For now the configured file extensions are literal strings which are matched against the end of the file name. Maybe we should use Lua patterns instead. We will need to add more mime types to our mapping table. For now only the `bash` file type was associated with the text/x-shellscript mime type.
Diffstat (limited to 'vis.lua')
-rw-r--r--vis.lua558
1 files changed, 384 insertions, 174 deletions
diff --git a/vis.lua b/vis.lua
index 4445ce8..bc5b618 100644
--- a/vis.lua
+++ b/vis.lua
@@ -71,144 +71,371 @@ end)
vis.ftdetect = {}
-vis.ftdetect.ignoresuffixes = "~|.orig|.bak|.old|.new|.dpkg-dist|.dpkg-old|.dpkg-new|.dpkg-bak|.pacsave|.pacnew"
-
-vis.ftdetect.filetypes = {
- actionscript = { ext = ".as|.asc" },
- ada = { ext = ".adb|.ads" },
- ansi_c = { ext = ".c|.C|.h" },
- antlr = { ext = ".g|.g4" },
- apdl = { ext = ".ans|.inp|.mac" },
- apl = { ext = ".apl" },
- applescript = { ext = ".applescript" },
- asm = { ext = ".asm|.ASM|.s|.S" },
- asp = { ext = ".asa|.asp|.hta" },
- autoit = { ext = ".au3|.a3x" },
- awk = { ext = ".awk", app = "awk|nawk|mawk|gawk" },
- bash = { ext = ".bash|.csh|.sh|.zsh", name = "%.bashrc|%.bash_profile|%.configure", app = "bash|csh|sh|zsh|ash|dash|tcsh" },
- batch = { ext = ".bat|.cmd" },
- bibtex = { ext = ".bib" },
- boo = { ext = ".boo" },
- caml = { ext = ".caml|.ml|.mli|.mll|.mly" },
- chuck = { ext = ".ck" },
- cmake = { ext = ".cmake|.cmake.in|.ctest|.ctest.in" },
- coffeescript = { ext = ".coffee", app = "coffee" },
- cpp = { ext = ".cpp|.cxx|.c++|.cc|.hh|.hpp|.hxx|.h++" },
- crystal = { ext = ".cr" },
- csharp = { ext = ".cs" },
- css = { ext = ".css" },
- cuda = { ext = ".cu|.cuh" },
- dart = { ext = ".dart", app = "dart" },
- desktop = { ext = ".desktop" },
- diff = { ext = ".diff|.patch" },
- dmd = { ext = ".d|.di", app = "rdmd" },
- dockerfile = { name = "Dockerfile" },
- dot = { ext = ".dot" },
- dsv = { name = "group|gshadow|passwd|shadow" },
- eiffel = { ext = ".e|.eif" },
- elixir = { ext = ".ex|.exs" },
- erlang = { ext = ".erl|.hrl", app = "escript" },
- faust = { ext = ".dsp" },
- fish = { ext = ".fish", app = "fish" },
- forth = { ext = ".forth|.frt|.fs", app = "gforth" },
- fortran = { ext = ".f|.for|.ftn|.fpp|.f77|.f90|.f95|.f03|.f08" },
- fsharp = { ext = ".fs" },
- fstab = { name = "fstab" },
- gap = { ext = ".g|.gd|.gi|.gap" },
- gettext = { ext = ".po|.pot" },
- gherkin = { ext = ".feature" },
- glsl = { ext = ".glslf|.glslv" },
- gnuplot = { ext = ".dem|.plt" },
- go = { ext = ".go" },
- groovy = { ext = ".groovy|.gvy", app = "groovy" },
- gtkrc = { name = "%.?gtkrc.*" },
- haskell = { ext = ".hs", app = "ghci" },
- html = { ext = ".htm|.html|.shtm|.shtml|.xhtml" },
- icon = { ext = ".icn" },
- idl = { ext = ".idl|.odl" },
- inform = { ext = ".inf|.ni" },
- ini = { ext = ".cfg|.cnf|.inf|.ini|.reg" },
- io_lang = { ext = ".io" },
- java = { ext = ".bsh|.java" },
- javascript = { ext = ".js|.jsfl", app = "jsc|node|rhino" },
- json = { ext = ".json" },
- jsp = { ext = ".jsp" },
- latex = { ext = ".bbl|.dtx|.ins|.ltx|.tex|.sty" },
- ledger = { ext = ".ledger|.journal" },
- less = { ext = ".less" },
- lilypond = { ext = ".lily|.ly" },
- lisp = { ext = ".cl|.el|.lisp|.lsp", app = "sbcl|clisp" },
- litcoffee = { ext = ".litcoffee" },
- lua = { ext = ".lua", app = "lua" },
- makefile = { ext = ".iface|.mak|.mk", name = "GNUmakefile|makefile|Makefile" },
- man = { ext = ".1|.2|.3|.4|.5|.6|.7|.8|.9|.1x|.2x|.3x|.4x|.5x|.6x|.7x|.8x|.9x" },
- markdown = { ext = ".md|.markdown" },
- moonscript = { ext = ".moon" },
- nemerle = { ext = ".n" },
- networkd = { ext = ".link|.network|.netdev" },
- nim = { ext = ".nim", app = "nim" },
- nsis = { ext = ".nsh|.nsi|.nsis" },
- objective_c = { ext = ".m|.mm|.objc" },
- pascal = { ext = ".dpk|.dpr|.p|.pas" },
- perl = { ext = ".al|.perl|.pl|.pm|.pod", app = "perl[%d.%a-]*|rakudo" },
- php = { ext = ".inc|.php|.php3|.php4|.phtml", app = "php[%d]*" },
- pico8 = { ext = ".p8" },
- pike = { ext = ".pike|.pmod" },
- pkgbuild = { name = "PKGBUILD" },
- powershell = { ext = ".ps1" },
- prolog = { ext = ".prolog" },
- props = { ext = ".props|.properties" },
- protobuf = { ext = ".proto" },
- ps = { ext = ".eps|.ps" },
- pure = { ext = ".pure" },
- python = { ext = ".sc|.py|.pyw", app = "python[%d.]*" },
- rebol = { ext = ".r|.reb" },
- rest = { ext = ".rst" },
- rexx = { ext = ".orx|.rex" },
- rhtml = { ext = ".erb|.rhtml" },
- rstats = { ext = ".R|.Rout|.Rhistory|.Rt|Rout.save|Rout.fail" },
- ruby = { ext = ".Rakefile|.rake|.rb|.rbw", app = "ruby" },
- rust = { ext = ".rs" },
- sass = { ext = ".sass|.scss" },
- scala = { ext = ".scala" },
- scheme = { ext = ".sch|.scm" },
- smalltalk = { ext = ".changes|.st|.sources" },
- sml = { ext = ".sml|.fun|.sig" },
- snobol4 = { ext = ".sno|.SNO" },
- sql = { ext = ".ddl|.sql" },
- systemd = { ext = ".automount|.device|.mount|.path|.scope|.service|.slice|.socket|.swap|.target|.timer" },
- taskpaper = { ext = ".taskpaper" },
- tcl = { ext = ".tcl|.tk" },
- texinfo = { ext = ".texi" },
- toml = { ext = ".toml" },
- vala = { ext = ".vala" },
- vb = { ext = ".asa|.bas|.cls|.ctl|.dob|.dsm|.dsr|.frm|.pag|.vb|.vba|.vbs" },
- vcard = { ext = ".vcf|.vcard" },
- verilog = { ext = ".v|.ver" },
- vhdl = { ext = ".vh|.vhd|.vhdl" },
- wsf = { ext = ".wsf" },
- xml = { ext = ".dtd|.svg|.xml|.xsd|.xsl|.xslt|.xul" },
- xtend = { ext = ".xtend" },
- yaml = { ext = ".yaml" },
+vis.ftdetect.ignoresuffixes = {
+ "~", ".orig", ".bak", ".old", ".new"
}
--- array of filetype detecting functions (win, filename, shebang, app) -> string
-vis.ftdetect.customdetectors = {
- function(file, data)
- if data:sub(1, 5) == '<?xml' then
- return 'xml'
- end
- end,
+vis.ftdetect.filetypes = {
+ actionscript = {
+ ext = { ".as", ".asc" },
+ },
+ ada = {
+ ext = { ".adb", ".ads" },
+ },
+ ansi_c = {
+ ext = { ".c", ".C", ".h" },
+ },
+ antlr = {
+ ext = { ".g", ".g4" },
+ },
+ apdl = {
+ ext = { ".ans", ".inp", ".mac" },
+ },
+ apl = {
+ ext = { ".apl" }
+ },
+ applescript = {
+ ext = { ".applescript" },
+ },
+ asm = {
+ ext = { ".asm", ".ASM", ".s", ".S" },
+ },
+ asp = {
+ ext = { ".asa", ".asp", ".hta" },
+ },
+ autoit = {
+ ext = { ".au3", ".a3x" },
+ },
+ awk = {
+ ext = { ".awk" },
+ },
+ bash = {
+ ext = { ".bash", ".csh", ".sh", ".zsh" },
+ mime = { "text/x-shellscript" },
+ },
+ batch = {
+ ext = { ".bat", ".cmd" },
+ },
+ bibtex = {
+ ext = { ".bib" },
+ },
+ boo = {
+ ext = { ".boo" },
+ },
+ caml = {
+ ext = { ".caml", ".ml", ".mli", ".mll", ".mly" },
+ },
+ chuck = {
+ ext = { ".ck" },
+ },
+ cmake = {
+ ext = { ".cmake", ".cmake.in", ".ctest", ".ctest.in" },
+ },
+ coffeescript = {
+ ext = { ".coffee" },
+ },
+ cpp = {
+ ext = { ".cpp", ".cxx", ".c++", ".cc", ".hh", ".hpp", ".hxx", ".h++" },
+ },
+ crystal = {
+ ext = { ".cr" },
+ },
+ csharp = {
+ ext = { ".cs" },
+ },
+ css = {
+ ext = { ".css" },
+ },
+ cuda = {
+ ext = { ".cu", ".cuh" },
+ },
+ dart = {
+ ext = { ".dart" },
+ },
+ desktop = {
+ ext = { ".desktop" },
+ },
+ diff = {
+ ext = { ".diff", ".patch" },
+ },
+ dmd = {
+ ext = { ".d", ".di" },
+ },
+ dockerfile = {
+ ext = { "Dockerfile" },
+ },
+ dot = {
+ ext = { ".dot" },
+ },
+ dsv = {
+ ext = { "group", "gshadow", "passwd", "shadow" },
+ },
+ eiffel = {
+ ext = { ".e", ".eif" },
+ },
+ elixir = {
+ ext = { ".ex", ".exs" },
+ },
+ erlang = {
+ ext = { ".erl", ".hrl" },
+ },
+ faust = {
+ ext = { ".dsp" },
+ },
+ fish = {
+ ext = { ".fish" },
+ },
+ forth = {
+ ext = { ".forth", ".frt", ".fs" },
+ },
+ fortran = {
+ ext = { ".f", ".for", ".ftn", ".fpp", ".f77", ".f90", ".f95", ".f03", ".f08" },
+ },
+ fsharp = {
+ ext = { ".fs" },
+ },
+ fstab = {
+ ext = { "fstab" },
+ },
+ gap = {
+ ext = { ".g", ".gd", ".gi", ".gap" },
+ },
+ gettext = {
+ ext = { ".po", ".pot" },
+ },
+ gherkin = {
+ ext = { ".feature" },
+ },
+ glsl = {
+ ext = { ".glslf", ".glslv" },
+ },
+ gnuplot = {
+ ext = { ".dem", ".plt" },
+ },
+ go = {
+ ext = { ".go" },
+ },
+ groovy = {
+ ext = { ".groovy", ".gvy" },
+ },
+ gtkrc = {
+ ext = { ".gtkrc" },
+ },
+ haskell = {
+ ext = { ".hs" },
+ },
+ html = {
+ ext = { ".htm", ".html", ".shtm", ".shtml", ".xhtml" },
+ },
+ icon = {
+ ext = { ".icn" },
+ },
+ idl = {
+ ext = { ".idl", ".odl" },
+ },
+ inform = {
+ ext = { ".inf", ".ni" },
+ },
+ ini = {
+ ext = { ".cfg", ".cnf", ".inf", ".ini", ".reg" },
+ },
+ io_lang = {
+ ext = { ".io" },
+ },
+ java = {
+ ext = { ".bsh", ".java" },
+ },
+ javascript = {
+ ext = { ".js", ".jsfl" },
+ },
+ json = {
+ ext = { ".json" },
+ },
+ jsp = {
+ ext = { ".jsp" },
+ },
+ latex = {
+ ext = { ".bbl", ".dtx", ".ins", ".ltx", ".tex", ".sty" },
+ },
+ ledger = {
+ ext = { ".ledger", ".journal" },
+ },
+ less = {
+ ext = { ".less" },
+ },
+ lilypond = {
+ ext = { ".lily", ".ly" },
+ },
+ lisp = {
+ ext = { ".cl", ".el", ".lisp", ".lsp" },
+ },
+ litcoffee = {
+ ext = { ".litcoffee" },
+ },
+ lua = {
+ ext = { ".lua" },
+ },
+ makefile = {
+ ext = { ".iface", ".mak", ".mk", "GNUmakefile", "makefile", "Makefile" },
+ },
+ man = {
+ ext = { ".1", ".2", ".3", ".4", ".5", ".6", ".7", ".8", ".9", ".1x", ".2x", ".3x", ".4x", ".5x", ".6x", ".7x", ".8x", ".9x" },
+ },
+ markdown = {
+ ext = { ".md", ".markdown" },
+ },
+ moonscript = {
+ ext = { ".moon" },
+ },
+ nemerle = {
+ ext = { ".n" },
+ },
+ networkd = {
+ ext = { ".link", ".network", ".netdev" },
+ },
+ nim = {
+ ext = { ".nim" },
+ },
+ nsis = {
+ ext = { ".nsh", ".nsi", ".nsis" },
+ },
+ objective_c = {
+ ext = { ".m", ".mm", ".objc" },
+ },
+ pascal = {
+ ext = { ".dpk", ".dpr", ".p", ".pas" },
+ },
+ perl = {
+ ext = { ".al", ".perl", ".pl", ".pm", ".pod" },
+ },
+ php = {
+ ext = { ".inc", ".php", ".php3", ".php4", ".phtml" },
+ },
+ pico8 = {
+ ext = { ".p8" },
+ },
+ pike = {
+ ext = { ".pike", ".pmod" },
+ },
+ pkgbuild = {
+ ext = { "PKGBUILD" },
+ },
+ powershell = {
+ ext = { ".ps1" },
+ },
+ prolog = {
+ ext = { ".prolog" },
+ },
+ props = {
+ ext = { ".props", ".properties" },
+ },
+ protobuf = {
+ ext = { ".proto" },
+ },
+ ps = {
+ ext = { ".eps", ".ps" },
+ },
+ pure = {
+ ext = { ".pure" },
+ },
+ python = {
+ ext = { ".sc", ".py", ".pyw" },
+ },
+ rebol = {
+ ext = { ".r", ".reb" },
+ },
+ rest = {
+ ext = { ".rst" },
+ },
+ rexx = {
+ ext = { ".orx", ".rex" },
+ },
+ rhtml = {
+ ext = { ".erb", ".rhtml" },
+ },
+ rstats = {
+ ext = { ".R", ".Rout", ".Rhistory", ".Rt", "Rout.save", "Rout.fail" },
+ },
+ ruby = {
+ ext = { ".Rakefile", ".rake", ".rb", ".rbw" },
+ },
+ rust = {
+ ext = { ".rs" },
+ },
+ sass = {
+ ext = { ".sass", ".scss" },
+ },
+ scala = {
+ ext = { ".scala" },
+ },
+ scheme = {
+ ext = { ".sch", ".scm" },
+ },
+ smalltalk = {
+ ext = { ".changes", ".st", ".sources" },
+ },
+ sml = {
+ ext = { ".sml", ".fun", ".sig" },
+ },
+ snobol4 = {
+ ext = { ".sno", ".SNO" },
+ },
+ sql= {
+ ext = { ".ddl", ".sql" },
+ },
+ systemd = {
+ ext = { ".automount", ".device", ".mount", ".path", ".scope", ".service", ".slice", ".socket", ".swap", ".target", ".timer" },
+ },
+ taskpaper = {
+ ext = { ".taskpaper" },
+ },
+ tcl = {
+ ext = { ".tcl", ".tk" },
+ },
+ texinfo = {
+ ext = { ".texi" },
+ },
+ toml = {
+ ext = { ".toml" },
+ },
+ vala = {
+ ext = { ".vala" }
+ },
+ vb = {
+ ext = { ".asa", ".bas", ".cls", ".ctl", ".dob", ".dsm", ".dsr", ".frm", ".pag", ".vb", ".vba", ".vbs" },
+ },
+ vcard = {
+ ext = { ".vcf", ".vcard" },
+ },
+ verilog = {
+ ext = { ".v", ".ver" },
+ },
+ vhdl = {
+ ext = { ".vh", ".vhd", ".vhdl" },
+ },
+ wsf = {
+ ext = { ".wsf" },
+ },
+ xml = {
+ ext = { ".dtd", ".svg", ".xml", ".xsd", ".xsl", ".xslt", ".xul" },
+ },
+ xtend = {
+ ext = {".xtend" },
+ },
+ yaml = {
+ ext = { ".yaml" },
+ },
}
vis.filetype_detect = function(win)
+ local name = win.file.name
-- remove ignored suffixes from filename
- local sanitizedfn = win.file.name
+ local sanitizedfn = name
if sanitizedfn ~= nil then
sanitizedfn = sanitizedfn:gsub('^.*/', '')
repeat
local changed = false
- for pattern in vis.ftdetect.ignoresuffixes:gmatch('[^|]+') do
+ for _, pattern in pairs(vis.ftdetect.ignoresuffixes) do
if #sanitizedfn >= #pattern then
local s, e = sanitizedfn:find(pattern, -#pattern, true)
if e == #sanitizedfn then
@@ -220,34 +447,13 @@ vis.filetype_detect = function(win)
until not changed
end
- local data = win.file:content(0, 256);
-
- -- find out via shebang which application would run our file
- local shebang
- local app
- if data:sub(1, 2) == '#!' then
- shebang = data:gsub('^#!%s*', ''):gsub('\n.*$', '')
- app = shebang:gsub('^/usr/bin/env%s*', ''):gsub('%s.*$', ''):gsub('^.*/', '')
- if #app == 0 then
- app = nil
- end
- end
-
- -- call custom detectors if any
- for _, func in pairs(vis.ftdetect.customdetectors) do
- local fres = func(win.file, data, sanitizedfn, shebang, app)
- if fres ~= nil then
- win.syntax = fres
- return
- end
- end
-
- -- detect filetype by application mentioned in the shebang
- if app ~= nil then
+ -- detect filetype by filename ending with a configured extension
+ if sanitizedfn ~= nil then
for lang, ft in pairs(vis.ftdetect.filetypes) do
- if ft.app ~= nil then
- for pattern in ft.app:gmatch('[^|]+') do
- if app:match('^'..pattern..'$') ~= nil then
+ for _, pattern in pairs(ft.ext or {}) do
+ if #sanitizedfn >= #pattern then
+ local s, e = sanitizedfn:find(pattern, -#pattern, true)
+ if e == #sanitizedfn then
win.syntax = lang
return
end
@@ -256,26 +462,18 @@ vis.filetype_detect = function(win)
end
end
- -- detect filetype by filename
- if sanitizedfn ~= nil then
- -- try filetypes recognizable by exact filename or filename pattern rather than extension
- for lang, ft in pairs(vis.ftdetect.filetypes) do
- if ft.name ~= nil then
- for pattern in ft.name:gmatch('[^|]+') do
- if sanitizedfn:match('^'..pattern..'$') ~= nil then
- win.syntax = lang
- return
- end
- end
+ -- run file(1) to determine mime type
+ if name ~= nil then
+ local file = io.popen(string.format("file -bL --mime-type -- '%s'", name:gsub("'", "'\\''")))
+ if file then
+ local mime = file:read('*all')
+ if mime then
+ mime = mime:gsub('%s*$', '')
end
- end
- -- then try to determine filetype by filename extention
- for lang, ft in pairs(vis.ftdetect.filetypes) do
- if ft.ext ~= nil then
- for pattern in ft.ext:gmatch('[^|]+') do
- if #sanitizedfn >= #pattern then
- local s, e = sanitizedfn:find(pattern, -#pattern, true)
- if e == #sanitizedfn then
+ if mime and #mime > 0 then
+ for lang, ft in pairs(vis.ftdetect.filetypes) do
+ for _, ft_mime in pairs(ft.mime or {}) do
+ if mime == ft_mime then
win.syntax = lang
return
end
@@ -285,6 +483,18 @@ vis.filetype_detect = function(win)
end
end
+ -- pass first few bytes of file to custom file type detector functions
+ local file = win.file
+ local data = file:content(0, 256)
+ if data and #data > 0 then
+ for lang, ft in pairs(vis.ftdetect.filetypes) do
+ if type(ft.detect) == 'function' and ft.detect(file, data) then
+ win.syntax = lang
+ return
+ end
+ end
+ end
+
win.syntax = nil
end