]> git.r.bdr.sh - rbdr/r.bdr.sh/blob - _plugins/generate_sitemap.rb
Changes to fix old experiments
[rbdr/r.bdr.sh] / _plugins / generate_sitemap.rb
1 # Jekyll sitemap page generator.
2 # http://recursive-design.com/projects/jekyll-plugins/
3 #
4 # Version: 0.1.8 (201108151628)
5 #
6 # Copyright (c) 2010 Dave Perrett, http://recursive-design.com/
7 # Licensed under the MIT license (http://www.opensource.org/licenses/mit-license.php)
8 #
9 # A generator that creates a sitemap.xml page for jekyll sites, suitable for submission to
10 # google etc.
11 #
12 # To use it, simply drop this script into the _plugins directory of your Jekyll site.
13 #
14 # When you compile your jekyll site, this plugin will loop through the list of pages in your
15 # site, and generate an entry in sitemap.xml for each one.
16
17 require 'pathname'
18
19 module Jekyll
20
21
22 # Monkey-patch an accessor for a page's containing folder, since
23 # we need it to generate the sitemap.
24 class Page
25 def subfolder
26 @dir
27 end
28 end
29
30
31 # Sub-class Jekyll::StaticFile to allow recovery from unimportant exception
32 # when writing the sitemap file.
33 class StaticSitemapFile < StaticFile
34 def write(dest)
35 super(dest) rescue ArgumentError
36 true
37 end
38 end
39
40
41 # Generates a sitemap.xml file containing URLs of all pages and posts.
42 class SitemapGenerator < Generator
43 safe true
44 priority :low
45
46 # Generates the sitemap.xml file.
47 #
48 # +site+ is the global Site object.
49 def generate(site)
50 # Create the destination folder if necessary.
51 site_folder = site.config['destination']
52 unless File.directory?(site_folder)
53 p = Pathname.new(site_folder)
54 p.mkdir
55 end
56
57 # Write the contents of sitemap.xml.
58 File.open(File.join(site_folder, 'sitemap.xml'), 'w') do |f|
59 f.write(generate_header())
60 f.write(generate_content(site))
61 f.write(generate_footer())
62 f.close
63 end
64
65 # Add a static file entry for the zip file, otherwise Site::cleanup will remove it.
66 site.static_files << Jekyll::StaticSitemapFile.new(site, site.dest, '/', 'sitemap.xml')
67 end
68
69 private
70
71 # Returns the XML header.
72 def generate_header
73 "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">"
74 end
75
76 # Returns a string containing the the XML entries.
77 #
78 # +site+ is the global Site object.
79 def generate_content(site)
80 result = ''
81
82 # First, try to find any stand-alone pages.
83 site.pages.each{ |page|
84 path = page.subfolder + '/' + page.name
85
86 # Skip files that don't exist yet (e.g. paginator pages)
87 if FileTest.exist?(path)
88
89 mod_date = File.mtime(site.source + path)
90
91 # Use the user-specified permalink if one is given.
92 if page.permalink
93 path = page.permalink
94 else
95 # Be smart about the output filename.
96 path.gsub!(/.md$/, ".html")
97 end
98
99 # Ignore SASS, SCSS, and CSS files
100 if path=~/.(sass|scss|css)$/
101 next
102 end
103
104 # Remove the trailing 'index.html' if there is one, and just output the folder name.
105 if path=~/\/index.html$/
106 path = path[0..-11]
107 end
108
109 if page.data.has_key?('changefreq')
110 changefreq = page.data["changefreq"]
111 else
112 changefreq = ""
113 end
114
115 unless path =~/error/
116 result += entry(path, mod_date, changefreq, site)
117 end
118
119 end
120 }
121
122 # Next, find all the posts.
123 posts = site.site_payload['site']['posts']
124 for post in posts do
125 if post.data.has_key?('changefreq')
126 changefreq = post.data["changefreq"]
127 else
128 changefreq = "never"
129 end
130 url = post.url
131 url = url[0..-11] if url=~/\/index.html$/
132 result += entry(url, post.date, changefreq, site)
133 end
134
135 result
136 end
137
138 # Returns the XML footer.
139 def generate_footer
140 "\n</urlset>"
141 end
142
143 # Creates an XML entry from the given path and date.
144 #
145 # +path+ is the URL path to the page.
146 # +date+ is the date the file was modified (in the case of regular pages), or published (for blog posts).
147 # +changefreq+ is the frequency with which the page is expected to change (this information is used by
148 # e.g. the Googlebot). This may be specified in the page's YAML front matter. If it is not set, nothing
149 # is output for this property.
150 def entry(path, date, changefreq, site)
151 # Remove the trailing slash from the baseurl if it is present, for consistency.
152 baseurl = site.config['baseurl']
153 baseurl = baseurl[0..-2] if baseurl=~/\/$/
154
155 "
156 <url>
157 <loc>#{baseurl}#{path}</loc>
158 <lastmod>#{date.strftime("%Y-%m-%d")}</lastmod>#{if changefreq.length > 0
159 "\n <changefreq>#{changefreq}</changefreq>" end}
160 </url>"
161 end
162
163 end
164
165 end