]>
Commit | Line | Data |
---|---|---|
1 | # Jekyll sitemap page generator. | |
2 | # http://recursive-design.com/projects/jekyll-plugins/ | |
3 | # | |
4 | # Version: 0.1.8 (201108151628) | |
5 | # | |
6 | # Copyright (c) 2010 Dave Perrett, http://recursive-design.com/ | |
7 | # Licensed under the MIT license (http://www.opensource.org/licenses/mit-license.php) | |
8 | # | |
9 | # A generator that creates a sitemap.xml page for jekyll sites, suitable for submission to | |
10 | # google etc. | |
11 | # | |
12 | # To use it, simply drop this script into the _plugins directory of your Jekyll site. | |
13 | # | |
14 | # When you compile your jekyll site, this plugin will loop through the list of pages in your | |
15 | # site, and generate an entry in sitemap.xml for each one. | |
16 | ||
17 | require 'pathname' | |
18 | ||
19 | module Jekyll | |
20 | ||
21 | ||
22 | # Monkey-patch an accessor for a page's containing folder, since | |
23 | # we need it to generate the sitemap. | |
24 | class Page | |
25 | def subfolder | |
26 | @dir | |
27 | end | |
28 | end | |
29 | ||
30 | ||
31 | # Sub-class Jekyll::StaticFile to allow recovery from unimportant exception | |
32 | # when writing the sitemap file. | |
33 | class StaticSitemapFile < StaticFile | |
34 | def write(dest) | |
35 | super(dest) rescue ArgumentError | |
36 | true | |
37 | end | |
38 | end | |
39 | ||
40 | ||
41 | # Generates a sitemap.xml file containing URLs of all pages and posts. | |
42 | class SitemapGenerator < Generator | |
43 | safe true | |
44 | priority :low | |
45 | ||
46 | # Generates the sitemap.xml file. | |
47 | # | |
48 | # +site+ is the global Site object. | |
49 | def generate(site) | |
50 | # Create the destination folder if necessary. | |
51 | site_folder = site.config['destination'] | |
52 | unless File.directory?(site_folder) | |
53 | p = Pathname.new(site_folder) | |
54 | p.mkdir | |
55 | end | |
56 | ||
57 | # Write the contents of sitemap.xml. | |
58 | File.open(File.join(site_folder, 'sitemap.xml'), 'w') do |f| | |
59 | f.write(generate_header()) | |
60 | f.write(generate_content(site)) | |
61 | f.write(generate_footer()) | |
62 | f.close | |
63 | end | |
64 | ||
65 | # Add a static file entry for the zip file, otherwise Site::cleanup will remove it. | |
66 | site.static_files << Jekyll::StaticSitemapFile.new(site, site.dest, '/', 'sitemap.xml') | |
67 | end | |
68 | ||
69 | private | |
70 | ||
71 | # Returns the XML header. | |
72 | def generate_header | |
73 | "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">" | |
74 | end | |
75 | ||
76 | # Returns a string containing the the XML entries. | |
77 | # | |
78 | # +site+ is the global Site object. | |
79 | def generate_content(site) | |
80 | result = '' | |
81 | ||
82 | # First, try to find any stand-alone pages. | |
83 | site.pages.each{ |page| | |
84 | path = page.subfolder + '/' + page.name | |
85 | ||
86 | # Skip files that don't exist yet (e.g. paginator pages) | |
87 | if FileTest.exist?(path) | |
88 | ||
89 | mod_date = File.mtime(site.source + path) | |
90 | ||
91 | # Use the user-specified permalink if one is given. | |
92 | if page.permalink | |
93 | path = page.permalink | |
94 | else | |
95 | # Be smart about the output filename. | |
96 | path.gsub!(/.md$/, ".html") | |
97 | end | |
98 | ||
99 | # Ignore SASS, SCSS, and CSS files | |
100 | if path=~/.(sass|scss|css)$/ | |
101 | next | |
102 | end | |
103 | ||
104 | # Remove the trailing 'index.html' if there is one, and just output the folder name. | |
105 | if path=~/\/index.html$/ | |
106 | path = path[0..-11] | |
107 | end | |
108 | ||
109 | if page.data.has_key?('changefreq') | |
110 | changefreq = page.data["changefreq"] | |
111 | else | |
112 | changefreq = "" | |
113 | end | |
114 | ||
115 | unless path =~/error/ | |
116 | result += entry(path, mod_date, changefreq, site) | |
117 | end | |
118 | ||
119 | end | |
120 | } | |
121 | ||
122 | # Next, find all the posts. | |
123 | posts = site.site_payload['site']['posts'] | |
124 | for post in posts do | |
125 | if post.data.has_key?('changefreq') | |
126 | changefreq = post.data["changefreq"] | |
127 | else | |
128 | changefreq = "never" | |
129 | end | |
130 | url = post.url | |
131 | url = url[0..-11] if url=~/\/index.html$/ | |
132 | result += entry(url, post.date, changefreq, site) | |
133 | end | |
134 | ||
135 | result | |
136 | end | |
137 | ||
138 | # Returns the XML footer. | |
139 | def generate_footer | |
140 | "\n</urlset>" | |
141 | end | |
142 | ||
143 | # Creates an XML entry from the given path and date. | |
144 | # | |
145 | # +path+ is the URL path to the page. | |
146 | # +date+ is the date the file was modified (in the case of regular pages), or published (for blog posts). | |
147 | # +changefreq+ is the frequency with which the page is expected to change (this information is used by | |
148 | # e.g. the Googlebot). This may be specified in the page's YAML front matter. If it is not set, nothing | |
149 | # is output for this property. | |
150 | def entry(path, date, changefreq, site) | |
151 | # Remove the trailing slash from the baseurl if it is present, for consistency. | |
152 | baseurl = site.config['baseurl'] | |
153 | baseurl = baseurl[0..-2] if baseurl=~/\/$/ | |
154 | ||
155 | " | |
156 | <url> | |
157 | <loc>#{baseurl}#{path}</loc> | |
158 | <lastmod>#{date.strftime("%Y-%m-%d")}</lastmod>#{if changefreq.length > 0 | |
159 | "\n <changefreq>#{changefreq}</changefreq>" end} | |
160 | </url>" | |
161 | end | |
162 | ||
163 | end | |
164 | ||
165 | end |