This is a script I wrote to import my blog posts from a custom CMS into WordPress. Some things have been hard coded because I needed to do this quickly but you can easily change the script to adapt to your needs. A couple of things worth noting:
- You need to be using Windows and have IIS server
- I was importing from a MySql database and I have the MySql .NET connector installed on my machine
- This is a quick and dirty script and it grabs only the posts and comments (no images etc)
Here’s the wordpress importer script (it’s an asp.net http handler):
<%@ WebHandler Language="C#" %> using System; using System.Web; using System.IO; using System.Xml; using System.Collections.Generic; using MySql.Data.MySqlClient; public class GisToWP : IHttpHandler { public void ProcessRequest (HttpContext context) { string website = "http://www.example.co.uk/"; string username = "MrCoder"; string title = "Blog Title"; string desc = "Another wordpress blog"; // dataset System.Data.DataSet db = new System.Data.DataSet(); string conString = "Database=database;Data Source=example.com;User Id=username;Password=password;"; // get the articles/blogposts using (MySqlConnection con = new MySqlConnection(conString)) { con.Open(); MySqlCommand cmd = new MySqlCommand(); cmd.CommandType = System.Data.CommandType.Text; cmd.CommandText = "select * from articles where article_status = 1"; // change this cmd.Connection = con; MySqlDataAdapter dr = new MySqlDataAdapter(); dr.SelectCommand = cmd; dr.Fill(db, "articles"); con.Close(); } // get the comments using (MySqlConnection con = new MySqlConnection(conString)) { con.Open(); MySqlCommand cmd = new MySqlCommand(); cmd.CommandType = System.Data.CommandType.Text; cmd.CommandText = "select * from tblComments where StatusID = 1"; // change this cmd.Connection = con; MySqlDataAdapter dr = new MySqlDataAdapter(); dr.SelectCommand = cmd; dr.Fill(db, "comments"); con.Close(); } // get the users using (MySqlConnection con = new MySqlConnection(conString)) { con.Open(); MySqlCommand cmd = new MySqlCommand(); cmd.CommandType = System.Data.CommandType.Text; cmd.CommandText = "select * from tblUsers"; // change this cmd.Connection = con; MySqlDataAdapter dr = new MySqlDataAdapter(); dr.SelectCommand = cmd; dr.Fill(db, "users"); con.Close(); } // get the categories using (MySqlConnection con = new MySqlConnection(conString)) { con.Open(); MySqlCommand cmd = new MySqlCommand(); cmd.CommandType = System.Data.CommandType.Text; cmd.CommandText = "select * from article_categories where status = 1"; // change this cmd.Connection = con; MySqlDataAdapter dr = new MySqlDataAdapter(); dr.SelectCommand = cmd; dr.Fill(db, "categories"); con.Close(); } // link articles to comments db.Relations.Add("article_comment", db.Tables["articles"].Columns["article_id"], db.Tables["comments"].Columns["ArticleID"], false); // link articles to categories db.Relations.Add("article_category", db.Tables["articles"].Columns["cat_id"], db.Tables["categories"].Columns["cat_id"], false); // link comments to users db.Relations.Add("comment_user", db.Tables["comments"].Columns["UserID"], db.Tables["users"].Columns["UserID"], false); // set primary keys db.Tables["articles"].PrimaryKey = new System.Data.DataColumn[1] { db.Tables["articles"].Columns["article_id"] }; db.Tables["comments"].PrimaryKey = new System.Data.DataColumn[1] { db.Tables["comments"].Columns["CommentID"] }; db.Tables["users"].PrimaryKey = new System.Data.DataColumn[1] { db.Tables["users"].Columns["UserID"] }; db.Tables["categories"].PrimaryKey = new System.Data.DataColumn[1] { db.Tables["categories"].Columns["cat_id"] }; // write the xml file using (MemoryStream ms = new MemoryStream()) // use memory stream { using (TextWriter textWriter = new StreamWriter(ms, System.Text.Encoding.UTF8)) { XmlTextWriter writer = new XmlTextWriter(textWriter); writer.Formatting = Formatting.Indented; writer.WriteStartDocument(); writer.WriteStartElement("rss"); writer.WriteAttributeString("version", "2.0"); writer.WriteAttributeString("xmlns:excerpt", "http://wordpress.org/export/1.0/excerpt/"); writer.WriteAttributeString("xmlns:content", "http://purl.org/rss/1.0/modules/content/"); writer.WriteAttributeString("xmlns:wfw", "http://wellformedweb.org/CommentAPI/"); writer.WriteAttributeString("xmlns:dc", "http://purl.org/dc/elements/1.1/"); writer.WriteAttributeString("xmlns:wp", "http://wordpress.org/export/1.0/"); writer.WriteStartElement("channel"); writer.WriteElementString("title", title); writer.WriteElementString("link", website); writer.WriteElementString("description", desc); writer.WriteElementString("pubDate", DateTime.Now.ToString("ddd, dd MMM yyyy hh:mm:ss +0000")); writer.WriteElementString("generator", "http://wordpress.org/?v=2.9.2"); writer.WriteElementString("language", "en"); writer.WriteElementString("wp:wxr_version", "1.0"); writer.WriteElementString("wp:base_site_url", website); writer.WriteElementString("wp:base_blog_url", website); // categories foreach (System.Data.DataRow cat in db.Tables["categories"].Rows) { writer.WriteStartElement("wp:category"); writer.WriteElementString("wp:category_nicename", cat["cat_name"].ToString()); writer.WriteElementString("wp:category_parent", String.Empty); writer.WriteStartElement("wp:cat_name"); writer.WriteCData(cat["cat_real_name"].ToString()); writer.WriteEndElement(); // wp:cat_name writer.WriteEndElement(); // wp:category } writer.WriteStartElement("xhtml:meta"); writer.WriteAttributeString("xmlns:xhtml", "http://www.w3.org/1999/xhtml"); writer.WriteAttributeString("name", "robots"); writer.WriteAttributeString("content", "index"); writer.WriteEndElement(); // loop through all articles foreach (System.Data.DataRow article in db.Tables["articles"].Rows) { writer.WriteStartElement("item"); // category name string categorySlug = db.Tables["categories"].Rows.Find(article["cat_id"])["cat_name"].ToString(); string categoryName = db.Tables["categories"].Rows.Find(article["cat_id"])["cat_real_name"].ToString(); writer.WriteElementString("title", article["article_title"].ToString()); writer.WriteElementString("link", String.Format("{0}{1}/{2}/", website, categorySlug, article["article_permalink"].ToString())); writer.WriteElementString("pubDate", ((DateTime)article["post_datetime"]).ToString("ddd, dd MMM yyyy hh:mm:ss +0000")); writer.WriteStartElement("dc:creator"); writer.WriteCData(username); writer.WriteEndElement(); writer.WriteStartElement("category"); writer.WriteCData(categorySlug); writer.WriteEndElement(); writer.WriteStartElement("category"); writer.WriteAttributeString("domain", "category"); writer.WriteAttributeString("nicename", categoryName); writer.WriteCData(categoryName); writer.WriteEndElement(); // category writer.WriteStartElement("guid"); writer.WriteAttributeString("isPermalink", "false"); writer.WriteString(String.Format("{0}?p={1}", website, article["article_id"].ToString())); writer.WriteEndElement(); // guid writer.WriteElementString("description", String.Empty); writer.WriteStartElement("content:encoded"); writer.WriteCData(FormatContent(article["article_content"].ToString())); writer.WriteEndElement(); writer.WriteStartElement("excerpt:encoded"); writer.WriteCData(String.Empty); writer.WriteEndElement(); writer.WriteElementString("wp:post_id", article["article_id"].ToString()); writer.WriteElementString("wp:post_date", ((DateTime)article["article_datetime"]).ToString("yyyy-MM-dd hh:mm:ss")); writer.WriteElementString("wp:post_date_gmt", ((DateTime)article["article_datetime"]).AddHours(6).ToString("yyyy-MM-dd hh:mm:ss")); writer.WriteElementString("wp:comment_status", "closed"); writer.WriteElementString("wp:ping_status", "closed"); writer.WriteElementString("wp:post_name", article["article_permalink"].ToString()); writer.WriteElementString("wp:status", "publish"); writer.WriteElementString("wp:post_parent", "0"); writer.WriteElementString("wp:menu_order", "0"); writer.WriteElementString("wp:post_type", "post"); writer.WriteElementString("wp:post_password", String.Empty); writer.WriteElementString("wp:is_sticky", "0"); // meta tags writer.WriteStartElement("wp:postmeta"); writer.WriteElementString("wp:meta_key", "title"); writer.WriteElementString("wp:meta_value", article["article_page_title"].ToString()); writer.WriteEndElement(); writer.WriteStartElement("wp:postmeta"); writer.WriteElementString("wp:meta_key", "description"); writer.WriteElementString("wp:meta_value", article["article_meta_desc"].ToString()); writer.WriteEndElement(); // comments for this article System.Data.DataRow[] theComments = article.GetChildRows("article_comment"); foreach (System.Data.DataRow comment in theComments) { writer.WriteStartElement("wp:comment"); string commenterName = db.Tables["users"].Rows.Find(comment["UserID"])["Fullname"].ToString(); string commenterEmail = db.Tables["users"].Rows.Find(comment["UserID"])["EmailAddress"].ToString(); string commenterIP = db.Tables["users"].Rows.Find(comment["UserID"])["IPAddress"].ToString(); writer.WriteElementString("wp:comment_id", comment["CommentID"].ToString()); writer.WriteElementString("wp:comment_author", commenterName); writer.WriteElementString("wp:comment_author_email", commenterEmail); writer.WriteElementString("wp:comment_author_url", String.Empty); writer.WriteElementString("wp:comment_author_IP", commenterIP); writer.WriteElementString("wp:comment_date", ((DateTime)comment["CreateDate"]).ToString("yyyy-MM-dd hh:mm:ss")); writer.WriteElementString("wp:comment_date_gmt", ((DateTime)comment["CreateDate"]).AddHours(6).ToString("yyyy-MM-dd hh:mm:ss")); writer.WriteStartElement("wp:comment_content"); writer.WriteCData(FormatContent(comment["Content"].ToString())); writer.WriteEndElement(); writer.WriteElementString("wp:comment_approved", "1"); writer.WriteElementString("wp:comment_type", String.Empty); writer.WriteElementString("wp:comment_parent", "0"); writer.WriteElementString("wp:comment_user_id", "0"); writer.WriteEndElement(); // wp:comment } writer.WriteEndElement(); // item } writer.WriteEndElement(); // channel writer.WriteEndElement(); // rss // To be safe, flush the document to the memory stream. writer.Flush(); // Convert the memory stream to an array of bytes. byte[] byteArray = ms.ToArray(); // Send the XML file to the web browser for download. context.Response.Clear(); context.Response.AppendHeader("Content-Disposition", "filename=wordpress.xml"); context.Response.AppendHeader("Content-Length", byteArray.Length.ToString()); context.Response.ContentType = "text/xml"; context.Response.BinaryWrite(byteArray); } } } public string FormatContent(string text) { return text.Replace("<p>", "").Replace("</p>", ""); } public bool IsReusable { get { return false; } } }
How to use the wordpress importer script?
You will need to change the sql statements where I’ve put the comments (change this) to select your posts, comments, users and categories from your database. When you run the script, it will prompt you to download an xml file (or show you an output in your browser which you can “View Source” and save to your PC. This will be your wordpress WRX xml file which you will use to import your blog into wordpress.
For wordpress 3.2, you will have to go to Tools -> Import and then select WordPress from the list. It will then ask you to install the WordPress Importer plugin and activate it. Once this is done, you can then upload this WRX file and your blogposts will be imported in your new blog.
WordPress blog posts being truncated during import
If you happen to see truncated data in your posts, it might be caused by character encoding problems. To fix mine, I opened the WRX file in Notepad++ and went to Encoding -> Encode in UTF-8 and it showed me invalid characters. Instead of replacing the bad characters, I ran the script again, copy the data from view source, pasted in Notepad++, selected the encoding to be UTF-8 before saving the WRX file. When I imported the xml file this time, no blog posts were truncated.