A dynamic standard sitemap.xml with Google image extension implemented as a Java Servlet

Thursday March 07, 2013 ()

A sitemap.xml generated on demand is very helpful for websites that has lots of pages and with pages that are added frequently. According to Google, sitemaps help their crawlers find pages from websites. This is true for new websites or for pages that has fewer links to them.

The servlet herein described is based on sitemaps.org protocol with Google image extension. We used JDOM2 library to create the XML sitemap structure. Below is how a standard sitemap looks like with the Google image extension. Please note that image extension is supported only by Google. Bing/Yahoo ignores them.

<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" 
     xmlns:image="http://www.google.com/schemas/sitemap-image/1.1">
<url>
    <loc>
        page url
    </loc>
    <!-- the following are all optional -->
    <lastmod>2013-01-01</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.8</priority>   
</url>
<url>
    <loc>
        page url
    </loc>
    <!-- the following are all optional -->
    <lastmod>2013-01-01</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.8</priority>   

    <!-- Google image extention -->
    <image:image>
        <image:loc>
            image url
        </image:loc>
        <!-- the following are all optional -->
        <image:caption>
            Caption of the image
        <image:caption>
        <image:geo_location />
        <image:title />
        <image:license />
    </image:image>
</url>
<url>
    <loc>
        page url
    </loc>    
    <!-- the following are all optional -->
    <lastmod>2013-01-01</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.5</priority>   
</url>
<url>
</urlset> 

Please see the sitemap.xml protocol website for explanation of these optional elements. Image extension options are also explained here at the Google Webmaster support"

Here is the servlet we use to generate our sitemap. It generates only the required sitemap XML elements. Pages are generated from a database.

package packagename;

import java.io.IOException;
import java.io.PrintWriter;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.HashMap;
import java.util.Map;
import javax.servlet.ServletException;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.Namespace;
import org.jdom2.output.Format;
import org.jdom2.output.XMLOutputter;


@WebServlet(name = "sitemap", urlPatterns = {"/sitemap.xml"})
public class sitemap extends HttpServlet {

    protected void processRequest(HttpServletRequest request,
            HttpServletResponse response)
            throws ServletException, IOException {

        PrintWriter out = response.getWriter();
        response.setContentType("application/xml;charset=UTF-8");

        try {

            final String XMLNS = "http://www.sitemaps.org/schemas/sitemap/0.9";
            final String IMAGENS = "http://www.google.com/schemas/sitemap-image/1.1";

            Namespace xmlns = Namespace.getNamespace(XMLNS);
            Element urlset = new Element("urlset", xmlns);
            Namespace imagens = Namespace.getNamespace("image", IMAGENS);
            urlset.addNamespaceDeclaration(imagens);

            // add the actual sitemap contents
            addUrlItems(urlset, xmlns, imagens);

            Document sitemap = new Document();
            sitemap.setRootElement(urlset);

            XMLOutputter xmlOutput = new XMLOutputter();
            xmlOutput.setFormat(Format.getPrettyFormat().setEncoding("UTF-8"));

            xmlOutput.output(sitemap, out);

        } finally {
            out.flush();
            out.close();
        }
    }

    // Add sitemap items

    void addUrlItems(Element urlset, Namespace xmlns, Namespace imagens) {
        
        StringBuilder q = new StringBuilder();

        q.append("select ").
                append("url, images,   ...").
                append("...");

        try (
                // Change the way you connect to your database.
                Connection conn = DatasourceConnection.getConnection();
                Statement stmt = conn.createStatement();
                ResultSet result = stmt.executeQuery(q.toString())) {

            while (result.next()) {

                // This is for demonstration purposes only.  Our
                // page url is generated based on some parameters.
                String url = result.getString(1)

                Element item = new Element("url", xmlns);
                item.addContent(new Element("loc", xmlns).addContent(url);

                // In our implementation, this is a database text field that 
                // contains all the images in this record which is a page.
                // Each image entry is separated by a semi-colon.

                String media = result.getString(2);

                if (media != null && media.trim().length() > 0 ) {
                    String [] pageImages = media.split (";");
                    for (String m : pageImages) {
                        Element image = new Element("image", imagens);
                        image.addContent(new Element("loc", imagens).addContent(m));
                        item.addContent(image);
                    }
                }

                urlset.addContent(item);
            }

        } catch (Exception e) {
            // log
        }
    }


    @Override
    protected void doGet(HttpServletRequest request, HttpServletResponse response)
            throws ServletException, IOException {
        processRequest(request, response);
    }


    @Override
    protected void doPost(HttpServletRequest request, HttpServletResponse response)
            throws ServletException, IOException {
        processRequest(request, response);
    }


    @Override
    public String getServletInfo() {
        return "Short description";
    }
}

As shown in the servlet annotation in line 22 above, the servlet urlPatterns we used is "/sitemap.xml". You sumbit them to Google or Bing/Yahoo as http://yourdomain.com/sitemap.xml.

A sitemap may contain no more than 50,000 URLs and must be no larger than 10MB (10,485,760 bytes). Each URL can have up to 1000 images. If you exceed this requirement, you may need to spit you sitemaps into separate files and submit them to Google of Bing/Yahoo individually. If preferred you may also use a sitemap index file. Sitemap index has the following format:

<?xml version="1.0" encoding="UTF-8"?>
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
   <sitemap>
      <loc>https://kahimyang.com/sitemap1.xml</loc>
      <lastmod>2004-10-01T18:23:17+00:00</lastmod>
   </sitemap>

   <sitemap>
      <loc>https://kahimyang.com/sitemap2.xml.gz</loc>
      <lastmod>2013-01-01</lastmod>
   </sitemap>
</sitemapindex>

Note that you can also submit a zipped sitemap.xml as shown in line 9 above.

For more information please visit the sitemap protocol here http://www.sitemaps.org/protocol.html. Google Image sitemap protocol is available here at the Google Webmaster support.

That's it Good Luck.


3,253

Comments (A dynamic standard sitemap.xml with Google image extension implemented as a Java Servlet)