#!/usr/bin/env runghc {-# LANGUAGE OverloadedStrings #-} {- Hakyll file for building Gwern.net Author: gwern Date: 2010-10-01 When: Time-stamp: "2024-10-18 20:50:45 gwern" License: CC-0 Debian dependencies: $ sudo apt-get install libghc-hakyll-dev libghc-pandoc-dev libghc-filestore-dev libghc-tagsoup-dev imagemagick rsync git libghc-aeson-dev libghc-missingh-dev libghc-digest-dev tidy gridsite-clients (GHC is needed for Haskell; Hakyll & Pandoc do the heavy lifting of compiling Markdown files to HTML; tag soup & ImageMagick are runtime dependencies used to help optimize images, and rsync for the server/git upload to hosting/Github respectively.) Demo command (for the full script, with all static checks & generation & optimizations, see `sync.sh`): -} import Control.Monad (when, unless, (<=<)) import Data.Char (toLower) import Data.List (intercalate, isInfixOf, isSuffixOf) import qualified Data.Map.Strict as M (lookup) import Data.Maybe (fromMaybe) import System.Environment (getArgs, withArgs, lookupEnv) import Hakyll (compile, composeRoutes, constField, fromGlob, symlinkFileCompiler, copyFileCompiler, dateField, defaultContext, defaultHakyllReaderOptions, field, getMetadata, getMetadataField, lookupString, defaultHakyllWriterOptions, getRoute, gsubRoute, hakyll, idRoute, itemIdentifier, loadAndApplyTemplate, match, modificationTimeField, mapContext, pandocCompilerWithTransformM, route, setExtension, pathField, preprocess, boolField, toFilePath, templateCompiler, version, Compiler, Context, Item, unsafeCompiler, noResult, getUnderlying, escapeHtml, (.&&.), complement) import Text.Pandoc (nullAttr, runPure, runWithDefaultPartials, compileTemplate, def, pandocExtensions, readerExtensions, readMarkdown, writeHtml5String, Block(..), HTMLMathMethod(MathJax), defaultMathJaxURL, Inline(..), ObfuscationMethod(NoObfuscation), Pandoc(..), WriterOptions(..), nullMeta) import Text.Pandoc.Walk (walk, walkM) import Network.HTTP (urlEncode) import System.IO.Unsafe (unsafePerformIO) import qualified Data.Text as T (append, filter, isInfixOf, pack, unpack, length) -- local custom modules: import Image (invertImageInline, imageMagickDimensions, addImgDimensions, imageLinkHeightWidthSet) import Inflation (nominalToRealInflationAdjuster) import Interwiki (convertInterwikiLinks) import LinkArchive (localizeLink, readArchiveMetadataAndCheck, ArchiveMetadata) import LinkAuto (linkAuto) import LinkBacklink (getBackLinkCheck, getLinkBibLinkCheck, getSimilarLinkCheck) import LinkMetadata (addPageLinkWalk, readLinkMetadataSlow, writeAnnotationFragments, createAnnotations, hasAnnotation) import LinkMetadataTypes (Metadata) import Tags (tagsToLinksDiv) import Typography (linebreakingTransform, typographyTransform, titlecaseInline, completionProgressHTML) import Utils (printGreen, replace, deleteMany, replaceChecked, safeHtmlWriterOptions, simplifiedHTMLString, inlinesToText, flattenLinksInInlines, delete, toHTML) import Test (testAll) import Config.Misc (cd, currentYear) import Metadata.Date (dateRangeDuration) main :: IO () main = do arg <- System.Environment.lookupEnv "SLOW" -- whether to do the more expensive stuff; Hakyll eats the CLI arguments, so we pass it in as an exported environment variable instead let slow = "true" == fromMaybe "" arg args <- getArgs let args' = filter (/="build") args let annotationBuildAllForce = filter (=="--annotation-rebuild") args' let annotationOneShot = filter (=="--annotation-missing-one-shot") args' -- NOTE: reset the `getArgs` to pass through just the first argument (ie. "build", converting it back to `hakyll build`), as `hakyll` internally calls `getArgs` and will fatally error out if we don't delete our own arguments: withArgs [head args] $ hakyll $ do preprocess cd preprocess $ printGreen ("Local archives parsing…" :: String) am <- preprocess readArchiveMetadataAndCheck preprocess $ printGreen ("Popup annotations parsing…" :: String) meta <- preprocess readLinkMetadataSlow if not (null annotationBuildAllForce) then preprocess $ do printGreen ("Rewriting all annotations…" :: String) writeAnnotationFragments am meta False else do preprocess $ do printGreen ("Writing missing annotations…" :: String) writeAnnotationFragments am meta True if not (null annotationOneShot) then preprocess $ printGreen "Finished writing missing annotations, and one-shot mode specified, so exiting now." else do when slow $ preprocess testAll preprocess $ printGreen ("Begin site compilation…" :: String) let targets = if null args' then fromGlob "**.md" .&&. complement "doc/www/**.md" -- exclude any temporary Markdown files in /doc/www/misc/ or mirrored somehow, but compile ones anywhere else else fromGlob $ head args' unless (null args') $ preprocess (printGreen "Essay targets specified, so compiling just: " >> print targets) match targets $ do -- strip extension since users shouldn't care if HTML3-5/XHTML/etc (cool URLs); delete apostrophes/commas & replace spaces with hyphens -- as people keep screwing them up endlessly: (and in nginx, we auto-replace all EN DASH & EM DASH in URLs with hyphens) route $ gsubRoute "," (const "") `composeRoutes` gsubRoute "'" (const "") `composeRoutes` gsubRoute " " (const "-") `composeRoutes` setExtension "" -- let readerOptions = defaultHakyllReaderOptions compile $ do ident <- getUnderlying indexpM <- getMetadataField ident "index" let indexp = fromMaybe "" indexpM pandocCompilerWithTransformM readerOptions woptions (unsafeCompiler . pandocTransform meta am indexp) >>= loadAndApplyTemplate "static/template/default.html" (postCtx meta) >>= imgUrls let static = route idRoute >> compile copyFileCompiler when (null args') $ version "static" $ mapM_ (`match` static) ["metadata/**"] -- we want to overwrite annotations in-place with various post-processing things -- handle the simple static non-.md files; we define this after the pages because the pages' compilation has side-effects which may create new static files (archives & downsized images) let staticSymlink = route idRoute >> compile symlinkFileCompiler -- WARNING: custom optimization requiring forked Hakyll installation; see https://github.com/jaspervdj/hakyll/issues/786 when (null args') $ version "static" $ mapM_ (`match` staticSymlink) [ "doc/**", "**.hs", "**.sh", "**.txt", "**.html", "**.md", "**.css", "**.R", "**.conf", "**.php", "**.svg", "**.png", "**.jpg", -- skip "static/build/**" because of the temporary files "static/css/**", "static/font/**", "static/img/**", "static/include/**", "static/nginx/**", "static/redirect/**", "static/template/**", "static/**.conf", "static/**.css", "static/**.gif", "static/**.git", "static/**.gitignore", "static/**.hs", "static/**.html", "static/**.ico", "static/**.js", "static/**.net", "static/**.png", "static/**.R", "static/**.sh", "static/**.svg", "static/**.ttf", "static/**.otf", "static/**.php", "static/**.py", "static/**.wasm", "static/**.el", "static/LICENSE", "static/build/.htaccess", "static/build/upload", "static/build/newsletter-lint", "static/build/gwa", "static/build/crossref", "static/build/compressPdf", "static/build/compressJPG2", "test-include", "atom.xml"] -- copy stub of deprecated RSS feed match "static/template/*.html" $ compile templateCompiler woptions :: Text.Pandoc.WriterOptions woptions = defaultHakyllWriterOptions{ writerSectionDivs = True, writerTableOfContents = True, writerColumns = 130, writerTemplate = Just tocTemplate, writerTOCDepth = 4, -- we use MathJax directly to bypass Texmath; this enables features like colored equations: -- https://docs.mathjax.org/en/latest/input/tex/extensions/color.html http://mirrors.ctan.org/macros/latex/required/graphics/color.pdf#page=4 eg. "Roses are $\color{red}{\text{beautiful red}}$, violets are $\color{blue}{\text{lovely blue}}$" or "${\color{red} x} + {\color{blue} y}$" writerHTMLMathMethod = MathJax defaultMathJaxURL, writerEmailObfuscation = NoObfuscation } where -- below copied from https://github.com/jaspervdj/hakyll/blob/e8ed369edaae1808dffcc22d1c8fb1df7880e065/web/site.hs#L73 because god knows I don't know what this type bullshit is either: -- "When did it get so hard to compile a string to a Pandoc template?" tocTemplate = either error id $ either (error . show) id $ runPure $ runWithDefaultPartials $ compileTemplate "" $ T.pack $ "
$toc$
" ++ noScriptTemplate ++ "$body$" -- we do the main $body$ substitution inside default.html so we can inject stuff inside the #markdownBody wrapper; the div is closed there -- NOTE: we need to do the site-wide `