Script to transfer custom CMS into WordPress

This is a script I wrote to import my blog posts from a custom CMS into WordPress. Some things have been hard coded because I needed to do this quickly but you can easily change the script to adapt to your needs. A couple of things worth noting:

  1. You need to be using Windows and have IIS server
  2. I was importing from a MySql database and I have the MySql .NET connector installed on my machine
  3. This is a quick and dirty script and it grabs only the posts and comments (no images etc)

Here’s the wordpress importer script (it’s an asp.net http handler):

<%@ WebHandler Language="C#" %>
using System;
using System.Web;
using System.IO;
using System.Xml;
using System.Collections.Generic;
using MySql.Data.MySqlClient;
public class GisToWP : IHttpHandler {
public void ProcessRequest (HttpContext context)
{
string website = "http://www.example.co.uk/";
string username = "MrCoder";
string title = "Blog Title";
string desc = "Another wordpress blog";
// dataset
System.Data.DataSet db = new System.Data.DataSet();
string conString = "Database=database;Data Source=example.com;User Id=username;Password=password;";
// get the articles/blogposts
using (MySqlConnection con = new MySqlConnection(conString))
{
con.Open();
MySqlCommand cmd = new MySqlCommand();
cmd.CommandType = System.Data.CommandType.Text;
cmd.CommandText = "select * from articles where article_status = 1"; // change this
cmd.Connection = con;
MySqlDataAdapter dr = new MySqlDataAdapter();
dr.SelectCommand = cmd;
dr.Fill(db, "articles");
con.Close();
}
// get the comments
using (MySqlConnection con = new MySqlConnection(conString))
{
con.Open();
MySqlCommand cmd = new MySqlCommand();
cmd.CommandType = System.Data.CommandType.Text;
cmd.CommandText = "select * from tblComments where StatusID = 1"; // change this
cmd.Connection = con;
MySqlDataAdapter dr = new MySqlDataAdapter();
dr.SelectCommand = cmd;
dr.Fill(db, "comments");
con.Close();
}
// get the users
using (MySqlConnection con = new MySqlConnection(conString))
{
con.Open();
MySqlCommand cmd = new MySqlCommand();
cmd.CommandType = System.Data.CommandType.Text;
cmd.CommandText = "select * from tblUsers"; // change this
cmd.Connection = con;
MySqlDataAdapter dr = new MySqlDataAdapter();
dr.SelectCommand = cmd;
dr.Fill(db, "users");
con.Close();
}
// get the categories
using (MySqlConnection con = new MySqlConnection(conString))
{
con.Open();
MySqlCommand cmd = new MySqlCommand();
cmd.CommandType = System.Data.CommandType.Text;
cmd.CommandText = "select * from article_categories where status = 1"; // change this
cmd.Connection = con;
MySqlDataAdapter dr = new MySqlDataAdapter();
dr.SelectCommand = cmd;
dr.Fill(db, "categories");
con.Close();
}
// link articles to comments
db.Relations.Add("article_comment", db.Tables["articles"].Columns["article_id"], db.Tables["comments"].Columns["ArticleID"], false);
// link articles to categories
db.Relations.Add("article_category", db.Tables["articles"].Columns["cat_id"], db.Tables["categories"].Columns["cat_id"], false);
// link comments to users
db.Relations.Add("comment_user", db.Tables["comments"].Columns["UserID"], db.Tables["users"].Columns["UserID"], false);
// set primary keys
db.Tables["articles"].PrimaryKey = new System.Data.DataColumn[1] { db.Tables["articles"].Columns["article_id"] };
db.Tables["comments"].PrimaryKey = new System.Data.DataColumn[1] { db.Tables["comments"].Columns["CommentID"] };
db.Tables["users"].PrimaryKey = new System.Data.DataColumn[1] { db.Tables["users"].Columns["UserID"] };
db.Tables["categories"].PrimaryKey = new System.Data.DataColumn[1] { db.Tables["categories"].Columns["cat_id"] };
// write the xml file
using (MemoryStream ms = new MemoryStream()) // use memory stream
{
using (TextWriter textWriter = new StreamWriter(ms, System.Text.Encoding.UTF8))
{
XmlTextWriter writer = new XmlTextWriter(textWriter);
writer.Formatting = Formatting.Indented;
writer.WriteStartDocument();
writer.WriteStartElement("rss");
writer.WriteAttributeString("version", "2.0");
writer.WriteAttributeString("xmlns:excerpt", "http://wordpress.org/export/1.0/excerpt/");
writer.WriteAttributeString("xmlns:content", "http://purl.org/rss/1.0/modules/content/");
writer.WriteAttributeString("xmlns:wfw", "http://wellformedweb.org/CommentAPI/");
writer.WriteAttributeString("xmlns:dc", "http://purl.org/dc/elements/1.1/");
writer.WriteAttributeString("xmlns:wp", "http://wordpress.org/export/1.0/");
writer.WriteStartElement("channel");
writer.WriteElementString("title", title);
writer.WriteElementString("link", website);
writer.WriteElementString("description", desc);
writer.WriteElementString("pubDate", DateTime.Now.ToString("ddd, dd MMM yyyy hh:mm:ss +0000"));
writer.WriteElementString("generator", "http://wordpress.org/?v=2.9.2");
writer.WriteElementString("language", "en");
writer.WriteElementString("wp:wxr_version", "1.0");
writer.WriteElementString("wp:base_site_url", website);
writer.WriteElementString("wp:base_blog_url", website);
// categories
foreach (System.Data.DataRow cat in db.Tables["categories"].Rows)
{
writer.WriteStartElement("wp:category");
writer.WriteElementString("wp:category_nicename", cat["cat_name"].ToString());
writer.WriteElementString("wp:category_parent", String.Empty);
writer.WriteStartElement("wp:cat_name");
writer.WriteCData(cat["cat_real_name"].ToString());
writer.WriteEndElement(); // wp:cat_name
writer.WriteEndElement(); // wp:category
}
writer.WriteStartElement("xhtml:meta");
writer.WriteAttributeString("xmlns:xhtml", "http://www.w3.org/1999/xhtml");
writer.WriteAttributeString("name", "robots");
writer.WriteAttributeString("content", "index");
writer.WriteEndElement();
// loop through all articles
foreach (System.Data.DataRow article in db.Tables["articles"].Rows)
{
writer.WriteStartElement("item");
// category name
string categorySlug = db.Tables["categories"].Rows.Find(article["cat_id"])["cat_name"].ToString();
string categoryName = db.Tables["categories"].Rows.Find(article["cat_id"])["cat_real_name"].ToString();
writer.WriteElementString("title", article["article_title"].ToString());
writer.WriteElementString("link", String.Format("{0}{1}/{2}/", website, categorySlug, article["article_permalink"].ToString()));
writer.WriteElementString("pubDate", ((DateTime)article["post_datetime"]).ToString("ddd, dd MMM yyyy hh:mm:ss +0000"));
writer.WriteStartElement("dc:creator");
writer.WriteCData(username);
writer.WriteEndElement();
writer.WriteStartElement("category");
writer.WriteCData(categorySlug);
writer.WriteEndElement();
writer.WriteStartElement("category");
writer.WriteAttributeString("domain", "category");
writer.WriteAttributeString("nicename", categoryName);
writer.WriteCData(categoryName);
writer.WriteEndElement(); // category
writer.WriteStartElement("guid");
writer.WriteAttributeString("isPermalink", "false");
writer.WriteString(String.Format("{0}?p={1}", website, article["article_id"].ToString()));
writer.WriteEndElement(); // guid
writer.WriteElementString("description", String.Empty);
writer.WriteStartElement("content:encoded");
writer.WriteCData(FormatContent(article["article_content"].ToString()));
writer.WriteEndElement();
writer.WriteStartElement("excerpt:encoded");
writer.WriteCData(String.Empty);
writer.WriteEndElement();
writer.WriteElementString("wp:post_id", article["article_id"].ToString());
writer.WriteElementString("wp:post_date", ((DateTime)article["article_datetime"]).ToString("yyyy-MM-dd hh:mm:ss"));
writer.WriteElementString("wp:post_date_gmt", ((DateTime)article["article_datetime"]).AddHours(6).ToString("yyyy-MM-dd hh:mm:ss"));
writer.WriteElementString("wp:comment_status", "closed");
writer.WriteElementString("wp:ping_status", "closed");
writer.WriteElementString("wp:post_name", article["article_permalink"].ToString());
writer.WriteElementString("wp:status", "publish");
writer.WriteElementString("wp:post_parent", "0");
writer.WriteElementString("wp:menu_order", "0");
writer.WriteElementString("wp:post_type", "post");
writer.WriteElementString("wp:post_password", String.Empty);
writer.WriteElementString("wp:is_sticky", "0");
// meta tags
writer.WriteStartElement("wp:postmeta");
writer.WriteElementString("wp:meta_key", "title");
writer.WriteElementString("wp:meta_value", article["article_page_title"].ToString());
writer.WriteEndElement();
writer.WriteStartElement("wp:postmeta");
writer.WriteElementString("wp:meta_key", "description");
writer.WriteElementString("wp:meta_value", article["article_meta_desc"].ToString());
writer.WriteEndElement();
// comments for this article
System.Data.DataRow[] theComments = article.GetChildRows("article_comment");
foreach (System.Data.DataRow comment in theComments)
{
writer.WriteStartElement("wp:comment");
string commenterName = db.Tables["users"].Rows.Find(comment["UserID"])["Fullname"].ToString();
string commenterEmail = db.Tables["users"].Rows.Find(comment["UserID"])["EmailAddress"].ToString();
string commenterIP = db.Tables["users"].Rows.Find(comment["UserID"])["IPAddress"].ToString();
writer.WriteElementString("wp:comment_id", comment["CommentID"].ToString());
writer.WriteElementString("wp:comment_author", commenterName);
writer.WriteElementString("wp:comment_author_email", commenterEmail);
writer.WriteElementString("wp:comment_author_url", String.Empty);
writer.WriteElementString("wp:comment_author_IP", commenterIP);
writer.WriteElementString("wp:comment_date", ((DateTime)comment["CreateDate"]).ToString("yyyy-MM-dd hh:mm:ss"));
writer.WriteElementString("wp:comment_date_gmt", ((DateTime)comment["CreateDate"]).AddHours(6).ToString("yyyy-MM-dd hh:mm:ss"));
writer.WriteStartElement("wp:comment_content");
writer.WriteCData(FormatContent(comment["Content"].ToString()));
writer.WriteEndElement();
writer.WriteElementString("wp:comment_approved", "1");
writer.WriteElementString("wp:comment_type", String.Empty);
writer.WriteElementString("wp:comment_parent", "0");
writer.WriteElementString("wp:comment_user_id", "0");
writer.WriteEndElement(); // wp:comment
}
writer.WriteEndElement(); // item
}
writer.WriteEndElement(); // channel
writer.WriteEndElement(); // rss
// To be safe, flush the document to the memory stream.
writer.Flush();
// Convert the memory stream to an array of bytes.
byte[] byteArray = ms.ToArray();
// Send the XML file to the web browser for download.
context.Response.Clear();
context.Response.AppendHeader("Content-Disposition", "filename=wordpress.xml");
context.Response.AppendHeader("Content-Length", byteArray.Length.ToString());
context.Response.ContentType = "text/xml";
context.Response.BinaryWrite(byteArray);
}
}
}
public string FormatContent(string text)
{
return text.Replace("<p>", "").Replace("</p>", "");
}
public bool IsReusable {
get {
return false;
}
}
}

How to use the wordpress importer script?

You will need to change the sql statements where I’ve put the comments (change this) to select your posts, comments, users and categories from your database. When you run the script, it will prompt you to download an xml file (or show you an output in your browser which you can “View Source” and save to your PC. This will be your wordpress WRX xml file which you will use to import your blog into wordpress.

For wordpress 3.2, you will have to go to Tools -> Import and then select WordPress from the list. It will then ask you to install the WordPress Importer plugin and activate it. Once this is done, you can then upload this WRX file and your blogposts will be imported in your new blog.

WordPress blog posts being truncated during import

If you happen to see truncated data in your posts, it might be caused by character encoding problems. To fix mine, I opened the WRX file in Notepad++ and went to Encoding -> Encode in UTF-8 and it showed me invalid characters. Instead of replacing the bad characters, I ran the script again, copy the data from view source, pasted in Notepad++, selected the encoding to be UTF-8 before saving the WRX file. When I imported the xml file this time, no blog posts were truncated.

comments powered by Disqus