--- wordpress_import-DRUPAL-6--1/wordpress_import.module 2009-05-13 22:14:46.000000000 -0300 +++ wordpress_import/wordpress_import.module 2009-06-10 21:39:51.000000000 -0300 @@ -1,5 +1,5 @@ 'checkbox', '#title' => 'Create path aliases', - '#description' => t('This option tries to preserve the path of the wordpress original posts. It is useful only if the url of your drupal site root is the same as the wordpress site (!root) and if clean urls are activated', array('!root' => $wordpress['link'])) + '#description' => t('This option tries to preserve the path of the wordpress original posts. It is useful only if the url of your drupal site root is the same as the wordpress site (!root) and if clean urls are activated. Warning: disable Pathauto module before importing, otherwise imported path aliases will be overwritten by Pathauto. After importing, enable it again.', array('!root' => $wordpress['link'])) ); break; @@ -431,28 +431,56 @@ $category_mapping = wordpress_import_get_terms($category_vocabulary); + $permalink_maker = array(); + // Import categories if (is_array($wordpress['categories'])) { foreach ($wordpress['categories'] as $key => $value) { - $category_name = wordpress_import_get_tag($value, 'wp:cat_name'); - $category_parent = wordpress_import_get_tag($value, 'wp:category_parent'); + $category_name = html_entity_decode(wordpress_import_get_tag($value, 'wp:cat_name'), ENT_COMPAT, 'UTF-8'); + $category_parent = html_entity_decode(wordpress_import_get_tag($value, 'wp:category_parent'), ENT_COMPAT, 'UTF-8'); + $category_desc = str_replace(array(''), '', html_entity_decode(wordpress_import_get_tag($value, 'wp:category_description'), ENT_COMPAT, 'UTF-8')); + $category_nicename = html_entity_decode(wordpress_import_get_tag($value, 'wp:category_nicename'), ENT_COMPAT, 'UTF-8'); if (!$category_mapping[$category_name]) { + $permalink_maker[$category_name] = array( + 'nicename' => $category_nicename + ); + $category_term = array( 'name' => $category_name, + 'description' => $category_desc, 'vid' => $category_vocabulary ); if ($category_parent !== NULL && $category_parent !== '') { $category_term['parent'] = $category_mapping[$category_parent]; + $permalink_maker[$category_name]['parent'] = $category_parent; } taxonomy_save_term($category_term); + + $_SESSION['wordpress_import']['report']['@created_categories']++; - // Save mapping between Wordpress and Drupal categories + // Save mapping between Wordpress and Drupal categories $category_mapping[$category_name] = $category_term['tid']; + + // Make WP-like category permalinks + if ($params['alias']) { + $link=''; + while ($category_name !== '') { + $link="/" . $permalink_maker[$category_name]['nicename'] . $link; + if ($permalink_maker[$category_name]['parent'] !== NULL && $permalink_maker[$category_name]['parent'] !== '') { + $category_name=$permalink_maker[$category_name]['parent']; + } else { + $category_name = ''; + } + } + $link="category" . $link; + path_set_alias("taxonomy/term/" . $category_term['tid'],$link); + path_set_alias("taxonomy/term/" . $category_term['tid'] . "/feed",$link . "/feed"); + } } } @@ -461,14 +489,6 @@ 'categories_vocabulary' => $category_vocabulary ); } - - // Wordpress MU doesn't set categories when exporting - - return array( - 'categories_map' => null, - 'categories_vocabulary' => null - ); - } /** @@ -521,10 +541,13 @@ $post_info = wordpress_import_post($post_value, $params); if ($post_info && $params['alias']) { + // This only works if Pathauto module is disabled or not + // installed, otherwise Pathauto will overwrite what this piece of code does $link = wordpress_import_get_tag($post_value, 'link'); $link = substr($link, strlen($wordpress['link'])); $link = rtrim($link, '/'); path_set_alias('node/'. $post_info['nid'], $link); + path_set_alias('node/'. $post_info['nid'] . "/feed", $link . "/feed"); } } } @@ -569,7 +592,7 @@ $categories = $categories[1]; foreach ($categories as $key => $value) { - $category = str_replace(array(''), '', $value); + $category = html_entity_decode(str_replace(array(''), '', $value), ENT_COMPAT, 'UTF-8'); $drupal_category = $params['categories_map'][$category]; if (!empty($drupal_category)) { @@ -611,9 +634,21 @@ $content = wordpress_import_get_tag($post, 'content:encoded'); $content = str_replace('', '', $content); + $teaser = wordpress_import_get_tag($post, 'excerpt:encoded'); + if (empty($teaser)) { + $teaser=node_teaser($content, $params['format']); + } + + // TODO: fix changed time + $changed=wordpress_import_get_tag($post, 'wp:postmeta'); + preg_match_all('|_edit_lock\n(.*?)|is', $changed, $changetime); + $changed=$changetime[1]; + + $content = wpautop($content); + $node = array( 'type' => $type, - 'teaser' => node_teaser($content, $params['format']), + 'teaser' => $teaser, 'uid' => $uid, 'title' => html_entity_decode(wordpress_import_get_tag($post, 'title'), ENT_COMPAT, 'UTF-8'), 'body' => $content, @@ -621,7 +656,7 @@ 'status' => $status, 'promote' => $promote, 'created' => $timestamp, - 'changed' => $timestamp, + 'changed' => $changed, // will be overwritten by Node API, but just in case... 'comment' => wordpress_import_get_tag($post, 'wp:comment_status')=='open'?COMMENT_NODE_READ_WRITE:COMMENT_NODE_READ_ONLY, 'taxonomy' => $drupal_categories ); @@ -631,6 +666,14 @@ $_SESSION['wordpress_import']['report']['@created_posts'] ++; $return['nid'] = $node->nid; + //-------- BEGIN OF NOT-VERY-PORTABLE CODE --------- + // Currently this is the only way to not loose the very important node + // change time. Altough this is not the kosher way to do it, its quite + // safe and portable. + db_query('UPDATE {node} SET changed=%s WHERE nid = %d', $changed, $node->nid); + fwrite(STDERR, "UPDATE {node} SET changed=" . $changed . " WHERE nid = " . $node->nid . "\n"); + //-------- END OF NOT-VERY-PORTABLE CODE --------- + taxonomy_node_save($node->nid, $drupal_categories); drupal_get_messages(); // Clear message queue to prevent filling of $_SESSION table @@ -656,9 +699,10 @@ } $timestamp = strtotime(wordpress_import_get_tag($comment, 'wp:comment_date')); - $comment_content = wordpress_import_get_tag($comment, 'wp:comment_content'); + $comment_content = wpautop(wordpress_import_get_tag($comment, 'wp:comment_content')); switch (wordpress_import_get_tag($comment, 'wp:comment_type')) { + case 'pingback': case 'trackback': db_query( "INSERT INTO {trackback_received} (nid, created, site, name, subject, url, excerpt, status) VALUES (%d, %d, '%s', '%s', '%s', '%s', '%s', %d)", @@ -871,4 +915,61 @@ // $return = $wpdb->escape( trim( $return ) ); // TODO : echapper le tag return $r; -} \ No newline at end of file +} + + + +// Accepts matches array from preg_replace_callback in wpautop() +// or a string +function clean_pre($matches) { + if ( is_array($matches) ) + $text = $matches[1] . $matches[2] . ""; + else + $text = $matches; + + $text = str_replace('
', '', $text); + $text = str_replace('

', "\n", $text); + $text = str_replace('

', '', $text); + + return $text; +} + +function wpautop($pee, $br = 1) { + $pee = $pee . "\n"; // just to make things a little easier, pad the end + $pee = preg_replace('|
\s*
|', "\n\n", $pee); + // Space things out a little + $allblocks = '(?:table|thead|tfoot|caption|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|form|map|area|blockquote|address|math|style|input|p|h[1-6]|hr)'; + $pee = preg_replace('!(<' . $allblocks . '[^>]*>)!', "\n$1", $pee); + $pee = preg_replace('!()!', "$1\n\n", $pee); + $pee = str_replace(array("\r\n", "\r"), "\n", $pee); // cross-platform newlines + if ( strpos($pee, ']*)>\s*|', "", $pee); // no pee inside object/embed + $pee = preg_replace('|\s*\s*|', '', $pee); + } + $pee = preg_replace("/\n\n+/", "\n\n", $pee); // take care of duplicates + $pee = preg_replace('/\n?(.+?)(?:\n\s*\n|\z)/s', "

$1

\n", $pee); // make paragraphs, including one at the end + $pee = preg_replace('|

\s*?

|', '', $pee); // under certain strange conditions it could create a P of entirely whitespace + $pee = preg_replace('!

([^<]+)\s*?(]*>)!', "

$1

$2", $pee); + $pee = preg_replace( '|

|', "$1

", $pee ); + $pee = preg_replace('!

\s*(]*>)\s*

!', "$1", $pee); // don't pee all over a tag + $pee = preg_replace("|

(|", "$1", $pee); // problem with nested lists + $pee = preg_replace('|

]*)>|i', "

", $pee); + $pee = str_replace('

', '

', $pee); + $pee = preg_replace('!

\s*(]*>)!', "$1", $pee); + $pee = preg_replace('!(]*>)\s*

!', "$1", $pee); + if ($br) { + $pee = preg_replace_callback('/<(script|style).*?<\/\\1>/s', create_function('$matches', 'return str_replace("\n", "", $matches[0]);'), $pee); + $pee = preg_replace('|(?)\s*\n|', "
\n", $pee); // optionally make line breaks + $pee = str_replace('', "\n", $pee); + } + $pee = preg_replace('!(]*>)\s*
!', "$1", $pee); + $pee = preg_replace('!
(\s*]*>)!', '$1', $pee); + if (strpos($pee, ')(.*?)!is', 'clean_pre', $pee ); + $pee = preg_replace( "|\n

$|", '

', $pee ); +// $pee = preg_replace('/

\s*?(' . get_shortcode_regex() . ')\s*<\/p>/s', '$1', $pee); // don't auto-p wrap shortcodes that stand alone + + return $pee; +} + +?>