From 52cff9b8a81f641403df55cf5e081f6f1212d2fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc=20Andr=C3=A9=20Tanner?= Date: Sat, 3 Dec 2016 17:01:25 +0100 Subject: vis: use file(1) for file type detection File type detection works as follows: 1) strip off suffixes to be ignored and test against a set of known file extensions 2) run `file -bL --mime-type` and check against a set of known mime types 3) read out the first few bytes of the file and pass them to custom Lua file type detection functions For now the configured file extensions are literal strings which are matched against the end of the file name. Maybe we should use Lua patterns instead. We will need to add more mime types to our mapping table. For now only the `bash` file type was associated with the text/x-shellscript mime type. --- vis.lua | 558 ++++++++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 384 insertions(+), 174 deletions(-) diff --git a/vis.lua b/vis.lua index 4445ce8..bc5b618 100644 --- a/vis.lua +++ b/vis.lua @@ -71,144 +71,371 @@ end) vis.ftdetect = {} -vis.ftdetect.ignoresuffixes = "~|.orig|.bak|.old|.new|.dpkg-dist|.dpkg-old|.dpkg-new|.dpkg-bak|.pacsave|.pacnew" - -vis.ftdetect.filetypes = { - actionscript = { ext = ".as|.asc" }, - ada = { ext = ".adb|.ads" }, - ansi_c = { ext = ".c|.C|.h" }, - antlr = { ext = ".g|.g4" }, - apdl = { ext = ".ans|.inp|.mac" }, - apl = { ext = ".apl" }, - applescript = { ext = ".applescript" }, - asm = { ext = ".asm|.ASM|.s|.S" }, - asp = { ext = ".asa|.asp|.hta" }, - autoit = { ext = ".au3|.a3x" }, - awk = { ext = ".awk", app = "awk|nawk|mawk|gawk" }, - bash = { ext = ".bash|.csh|.sh|.zsh", name = "%.bashrc|%.bash_profile|%.configure", app = "bash|csh|sh|zsh|ash|dash|tcsh" }, - batch = { ext = ".bat|.cmd" }, - bibtex = { ext = ".bib" }, - boo = { ext = ".boo" }, - caml = { ext = ".caml|.ml|.mli|.mll|.mly" }, - chuck = { ext = ".ck" }, - cmake = { ext = ".cmake|.cmake.in|.ctest|.ctest.in" }, - coffeescript = { ext = ".coffee", app = "coffee" }, - cpp = { ext = ".cpp|.cxx|.c++|.cc|.hh|.hpp|.hxx|.h++" }, - crystal = { ext = ".cr" }, - csharp = { ext = ".cs" }, - css = { ext = ".css" }, - cuda = { ext = ".cu|.cuh" }, - dart = { ext = ".dart", app = "dart" }, - desktop = { ext = ".desktop" }, - diff = { ext = ".diff|.patch" }, - dmd = { ext = ".d|.di", app = "rdmd" }, - dockerfile = { name = "Dockerfile" }, - dot = { ext = ".dot" }, - dsv = { name = "group|gshadow|passwd|shadow" }, - eiffel = { ext = ".e|.eif" }, - elixir = { ext = ".ex|.exs" }, - erlang = { ext = ".erl|.hrl", app = "escript" }, - faust = { ext = ".dsp" }, - fish = { ext = ".fish", app = "fish" }, - forth = { ext = ".forth|.frt|.fs", app = "gforth" }, - fortran = { ext = ".f|.for|.ftn|.fpp|.f77|.f90|.f95|.f03|.f08" }, - fsharp = { ext = ".fs" }, - fstab = { name = "fstab" }, - gap = { ext = ".g|.gd|.gi|.gap" }, - gettext = { ext = ".po|.pot" }, - gherkin = { ext = ".feature" }, - glsl = { ext = ".glslf|.glslv" }, - gnuplot = { ext = ".dem|.plt" }, - go = { ext = ".go" }, - groovy = { ext = ".groovy|.gvy", app = "groovy" }, - gtkrc = { name = "%.?gtkrc.*" }, - haskell = { ext = ".hs", app = "ghci" }, - html = { ext = ".htm|.html|.shtm|.shtml|.xhtml" }, - icon = { ext = ".icn" }, - idl = { ext = ".idl|.odl" }, - inform = { ext = ".inf|.ni" }, - ini = { ext = ".cfg|.cnf|.inf|.ini|.reg" }, - io_lang = { ext = ".io" }, - java = { ext = ".bsh|.java" }, - javascript = { ext = ".js|.jsfl", app = "jsc|node|rhino" }, - json = { ext = ".json" }, - jsp = { ext = ".jsp" }, - latex = { ext = ".bbl|.dtx|.ins|.ltx|.tex|.sty" }, - ledger = { ext = ".ledger|.journal" }, - less = { ext = ".less" }, - lilypond = { ext = ".lily|.ly" }, - lisp = { ext = ".cl|.el|.lisp|.lsp", app = "sbcl|clisp" }, - litcoffee = { ext = ".litcoffee" }, - lua = { ext = ".lua", app = "lua" }, - makefile = { ext = ".iface|.mak|.mk", name = "GNUmakefile|makefile|Makefile" }, - man = { ext = ".1|.2|.3|.4|.5|.6|.7|.8|.9|.1x|.2x|.3x|.4x|.5x|.6x|.7x|.8x|.9x" }, - markdown = { ext = ".md|.markdown" }, - moonscript = { ext = ".moon" }, - nemerle = { ext = ".n" }, - networkd = { ext = ".link|.network|.netdev" }, - nim = { ext = ".nim", app = "nim" }, - nsis = { ext = ".nsh|.nsi|.nsis" }, - objective_c = { ext = ".m|.mm|.objc" }, - pascal = { ext = ".dpk|.dpr|.p|.pas" }, - perl = { ext = ".al|.perl|.pl|.pm|.pod", app = "perl[%d.%a-]*|rakudo" }, - php = { ext = ".inc|.php|.php3|.php4|.phtml", app = "php[%d]*" }, - pico8 = { ext = ".p8" }, - pike = { ext = ".pike|.pmod" }, - pkgbuild = { name = "PKGBUILD" }, - powershell = { ext = ".ps1" }, - prolog = { ext = ".prolog" }, - props = { ext = ".props|.properties" }, - protobuf = { ext = ".proto" }, - ps = { ext = ".eps|.ps" }, - pure = { ext = ".pure" }, - python = { ext = ".sc|.py|.pyw", app = "python[%d.]*" }, - rebol = { ext = ".r|.reb" }, - rest = { ext = ".rst" }, - rexx = { ext = ".orx|.rex" }, - rhtml = { ext = ".erb|.rhtml" }, - rstats = { ext = ".R|.Rout|.Rhistory|.Rt|Rout.save|Rout.fail" }, - ruby = { ext = ".Rakefile|.rake|.rb|.rbw", app = "ruby" }, - rust = { ext = ".rs" }, - sass = { ext = ".sass|.scss" }, - scala = { ext = ".scala" }, - scheme = { ext = ".sch|.scm" }, - smalltalk = { ext = ".changes|.st|.sources" }, - sml = { ext = ".sml|.fun|.sig" }, - snobol4 = { ext = ".sno|.SNO" }, - sql = { ext = ".ddl|.sql" }, - systemd = { ext = ".automount|.device|.mount|.path|.scope|.service|.slice|.socket|.swap|.target|.timer" }, - taskpaper = { ext = ".taskpaper" }, - tcl = { ext = ".tcl|.tk" }, - texinfo = { ext = ".texi" }, - toml = { ext = ".toml" }, - vala = { ext = ".vala" }, - vb = { ext = ".asa|.bas|.cls|.ctl|.dob|.dsm|.dsr|.frm|.pag|.vb|.vba|.vbs" }, - vcard = { ext = ".vcf|.vcard" }, - verilog = { ext = ".v|.ver" }, - vhdl = { ext = ".vh|.vhd|.vhdl" }, - wsf = { ext = ".wsf" }, - xml = { ext = ".dtd|.svg|.xml|.xsd|.xsl|.xslt|.xul" }, - xtend = { ext = ".xtend" }, - yaml = { ext = ".yaml" }, +vis.ftdetect.ignoresuffixes = { + "~", ".orig", ".bak", ".old", ".new" } --- array of filetype detecting functions (win, filename, shebang, app) -> string -vis.ftdetect.customdetectors = { - function(file, data) - if data:sub(1, 5) == '= #pattern then local s, e = sanitizedfn:find(pattern, -#pattern, true) if e == #sanitizedfn then @@ -220,34 +447,13 @@ vis.filetype_detect = function(win) until not changed end - local data = win.file:content(0, 256); - - -- find out via shebang which application would run our file - local shebang - local app - if data:sub(1, 2) == '#!' then - shebang = data:gsub('^#!%s*', ''):gsub('\n.*$', '') - app = shebang:gsub('^/usr/bin/env%s*', ''):gsub('%s.*$', ''):gsub('^.*/', '') - if #app == 0 then - app = nil - end - end - - -- call custom detectors if any - for _, func in pairs(vis.ftdetect.customdetectors) do - local fres = func(win.file, data, sanitizedfn, shebang, app) - if fres ~= nil then - win.syntax = fres - return - end - end - - -- detect filetype by application mentioned in the shebang - if app ~= nil then + -- detect filetype by filename ending with a configured extension + if sanitizedfn ~= nil then for lang, ft in pairs(vis.ftdetect.filetypes) do - if ft.app ~= nil then - for pattern in ft.app:gmatch('[^|]+') do - if app:match('^'..pattern..'$') ~= nil then + for _, pattern in pairs(ft.ext or {}) do + if #sanitizedfn >= #pattern then + local s, e = sanitizedfn:find(pattern, -#pattern, true) + if e == #sanitizedfn then win.syntax = lang return end @@ -256,26 +462,18 @@ vis.filetype_detect = function(win) end end - -- detect filetype by filename - if sanitizedfn ~= nil then - -- try filetypes recognizable by exact filename or filename pattern rather than extension - for lang, ft in pairs(vis.ftdetect.filetypes) do - if ft.name ~= nil then - for pattern in ft.name:gmatch('[^|]+') do - if sanitizedfn:match('^'..pattern..'$') ~= nil then - win.syntax = lang - return - end - end + -- run file(1) to determine mime type + if name ~= nil then + local file = io.popen(string.format("file -bL --mime-type -- '%s'", name:gsub("'", "'\\''"))) + if file then + local mime = file:read('*all') + if mime then + mime = mime:gsub('%s*$', '') end - end - -- then try to determine filetype by filename extention - for lang, ft in pairs(vis.ftdetect.filetypes) do - if ft.ext ~= nil then - for pattern in ft.ext:gmatch('[^|]+') do - if #sanitizedfn >= #pattern then - local s, e = sanitizedfn:find(pattern, -#pattern, true) - if e == #sanitizedfn then + if mime and #mime > 0 then + for lang, ft in pairs(vis.ftdetect.filetypes) do + for _, ft_mime in pairs(ft.mime or {}) do + if mime == ft_mime then win.syntax = lang return end @@ -285,6 +483,18 @@ vis.filetype_detect = function(win) end end + -- pass first few bytes of file to custom file type detector functions + local file = win.file + local data = file:content(0, 256) + if data and #data > 0 then + for lang, ft in pairs(vis.ftdetect.filetypes) do + if type(ft.detect) == 'function' and ft.detect(file, data) then + win.syntax = lang + return + end + end + end + win.syntax = nil end -- cgit v1.2.3