Drupal search_files module Original file from http://drupal.org/project/modules?text=search_files was the download titled 6.x-1.x-dev 2009-Feb-05 Development snapshot Changes between 1.18 and 1.18.thl.1 - feature: added dashboard as default local task to search_files menu to allow some status insight (ugly but useful) Hunks at original line 16 and 557 - cleanup: manually deleting a directory clears associated data from search_dataset, search_files_files, search_files_directories tables in a single SQL statement. Create and leverage function search_files_delete_content_by_directory_fullpath() to complement similar new but automatically called function search_files_delete_content_by_file_fullpath(); Hunk at original line 178 - feature: detect PHP safe_mode, display status in dashboard and skip over shell_exec() which would fail anyway. At the end of hunk at original line 178 and in hung at original line 584 - feature: do not confuse users with trailing slash issues. Hunks at original line 228 and 314 - feature: print current working directory and turn relative pathes into absolute pathes Hunks at original line 228 and 314 - typo: funciton -> function At the end of hunk at original line 557 - feature: turn hardcoded "Directory Rescan Age" into a configurable parameter Hunk at original line 568 and start of hunk at original line 584 - bugfix: configuration of "Search Label" must use #default_value not #value Hunk at original line 568 - bugfix: there is a functional gap where vanished files were not handled at all. Regularly compare database to filesystem and remove vanished files from database. Hunk at original line 584 - bugfix: while indexing files, skip over files which vanished before being indexed. Otherwise helper applications will get passed a nonexistent file name and fail. Hunk at original line 640 - cleanup: remove debugging Hunk at original line 715 - bugfix: while preparing search results, skip over files which vanished after being indexed. Otherwise functions like filectime() will get passed a nonexistent file name and fail. Hunk at original line 812 --- search_files.module-1.18 2009-04-10 16:02:26.000000000 +0200 +++ search_files.module-1.18.thl.1 2009-04-10 16:01:56.000000000 +0200 @@ -1,5 +1,5 @@ t('Search Files'), 'description' => t('Manage files search'), - 'page callback' => 'drupal_get_form', - 'page arguments' => array('search_files_settings'), + 'page callback' => 'search_files_dashboard', 'access arguments' => array('administer search_files configuration'), 'type' => MENU_NORMAL_ITEM, ); + $items['admin/settings/search_files/dashboard'] = array( + 'title' => t('Dashboard'), + 'description' => t('Dashboard for Search Files Module'), + 'page callback' => 'search_files_dashboard', + 'access arguments' => array('administer search_files configuration'), + 'type' => MENU_DEFAULT_LOCAL_TASK, + ); $items['admin/settings/search_files/settings'] = array( 'title' => t('Settings'), 'description' => t('Change settings for Search Files Module'), 'page callback' => 'drupal_get_form', 'page arguments' => array('search_files_settings'), 'access arguments' => array('administer search_files configuration'), - 'type' => MENU_DEFAULT_LOCAL_TASK, + 'type' => MENU_LOCAL_TASK, ); $items['admin/settings/search_files/helpers'] = array( 'title' => t('Helpers'), @@ -178,29 +184,38 @@ */ function search_files_directory_confirm_delete_form_submit($form_id, $form_values) { //drupal_set_message('form_values =
'.print_r($form_values, true).'
'); - $sql = " - DELETE FROM - {search_files_directories} - WHERE - `id`='%s' - "; - $result = db_query($sql, $form_values['values']['search_files_directory_id']); - if ($result) { - drupal_set_message(t('Directory %directory removed from search index', array('%directory' => $form_values['values']['search_files_directory_path']))); - $sql = "SELECT * FROM {search_files_files} WHERE `id`='%s'"; - $files_result = db_query($sql, $form_values['values']['search_files_directory_id']); - while ($file = db_fetch_object($files_result)) { - $sql = "DELETE FROM {search_dataset} WHERE `type`='search_files' AND `sid`='%s'"; - $dataset_result = db_query($sql, $file->id); - if (dataset_result) { - $sql = "DELETE FROM {search_files_files} WHERE `id`='%s'"; - db_query($sql, $file->id); - } - } - - drupal_set_message(t('You still need to write the code to remove the files from the removed directory from the "search_files_files" table and the "search_dataset" table')); - drupal_goto('admin/settings/search_files/directories/list'); - } + $directory_path = $form_values['values']['search_files_directory_path']; + search_files_delete_content_by_directory_fullpath($directory_path); + drupal_set_message(t('Directory %directory removed from search index', array('%directory' => $directory_path))); + drupal_goto('admin/settings/search_files/directories/list'); +} + +function search_files_delete_content_by_directory_fullpath($full_path) { + $sql = "DELETE s.*, f.*, d.* + FROM {search_dataset} AS s + INNER JOIN {search_files_files} AS f ON s.`sid` = f.`id` + INNER JOIN {search_files_directories} AS d ON f.`directory_id` = d.`id` + WHERE ( s.`type` = 'search_files' + AND d.`directory` = '%s' + )"; + db_query($sql, $full_path); +} + +function search_files_delete_content_by_file_fullpath($full_path) { + $sql = "DELETE s.*, f.* + FROM {search_dataset} AS s + INNER JOIN {search_files_files} AS f ON s.`sid` = f.`id` + WHERE ( s.`type` = 'search_files' + AND f.`full_path` = '%s' + )"; + db_query($sql, $full_path); +} + +/** + * Check whether we run in PHP safe_mode + */ +function search_files_issafemode() { + return preg_match('/(1|on)/i', @ini_get("safe_mode")); } /** @@ -289,8 +304,8 @@ '#title' => t('Directory Path'), '#size' => 80, '#maxlength' => 150, - '#required' => TRUE, - '#description' => t('The path of the directory to search. Do not end with a trailing \'/\''), + '#required' => FALSE, + '#description' => t('The path of the directory to search. Relative pathes are based on %directory.', array('%directory' => getcwd())), ); $form['submit'] = array( '#type' => 'submit', @@ -314,9 +329,14 @@ SET `directory`='%s' "; - $result = db_query($sql, $form_values['values']['search_files_directory'], $form_values['values']['search_files_label']); + $directory = $form_values['values']['search_files_directory']; + if (!preg_match('/^\//', $directory)) { + $directory = sprintf("%s/%s", getcwd(), $directory); + } + $directory = preg_replace('/\/+$/', '', $directory); + $result = db_query($sql, $directory, $form_values['values']['search_files_label']); if ($result) { - drupal_set_message(t('Directory %directory has been added. I may take up to 1 day to start indexing this directory.', array('%directory' => $form_values['values']['search_files_directory']))); + drupal_set_message(t('Directory %directory has been added.', array('%directory' => $form_values['values']['search_files_directory']))); drupal_goto('admin/settings/search_files/directories/list'); } } @@ -557,8 +577,64 @@ } /** + * generate the dashboard page + */ +function search_files_dashboard() { + $lastindex = variable_get('search_files_last_index', '0'); + $output .= sprintf("%s = %s UTC
\n", t('Last Index'), + format_date($lastindex, $type = 'custom', $format = 'Y-m-d H:i:s', $timezone = NULL, $langcode = NULL)); + + $directoryrescanage = variable_get('search_files_directoryrescanage', 'unset'); + $output .= sprintf("%s = %s [sec]
\n", t('Directory Rescan Age'), $directoryrescanage); + + $nextdirectoryrescan = $lastindex + $directoryrescanage; + $output .= sprintf("%s = %s UTC
\n", t('Next Directory (Re-)Scan at or after'), + format_date($nextdirectoryrescan, $type = 'custom', $format = 'Y-m-d H:i:s', $timezone = NULL, $langcode = NULL)); + + $sql = "SELECT count(*) FROM {search_files_directories};"; + $result = db_query($sql); + $result = db_result($result); + $output .= sprintf("Number of Directories configured = %s
\n", $result); + + $sql = "SELECT count(*) FROM {search_files_files};"; + $result = db_query($sql); + $result = db_result($result); + $output .= sprintf("Files found in configured Directories and Subdirectories = %s
\n", $result); + + $sql = "SELECT count(*) FROM {search_files_files} WHERE ( `index_attempts` = 0 );"; + $result = db_query($sql); + $result = db_result($result); + $output .= sprintf("Files without index attempt = %s
\n", $result); + + $sql = "SELECT count(*) from {search_dataset} WHERE ( `type` = 'search_files' );"; + $result = db_query($sql); + $result = db_result($result); + $output .= sprintf("Files indexed = %s
\n", $result); + + $sql = "SELECT count(*) from {search_dataset} WHERE ( `type` = 'search_files' AND `reindex` > 0 );"; + $result = db_query($sql); + $result = db_result($result); + $output .= sprintf("Files indexed and scheduled for reindexing = %s
\n", $result); + + $sql = "SELECT count(*) FROM {search_files_helpers};"; + $result = db_query($sql); + $result = db_result($result); + $output .= sprintf("Number of Helpers configured = %s
\n", $result); + + // safe_mode will inhibit shell_exec() + if (search_files_issafemode()) { + $output .= t('WARNING! This server has safe_mode enabled, which inhibits use of helper applications'); + } + else { + $output .= t('Good. This server has safe_mode disabled, which allows use of helper applications'); + } + + return $output; +} + +/** * generate the settings form for the search_files module using the - * system_settings_form()funciton + * system_settings_form() function * * @return unknown */ @@ -568,11 +644,26 @@ '#title' => 'Search Label', '#type' => 'textfield', '#description' => 'What do you want the Search tab to be labeled?', - '#value' => variable_get('search_files_label', 'Server Files'), + '#default_value' => variable_get('search_files_label', 'Server Files'), + ); + $form['search_files_directoryrescanage'] = array( + '#title' => t('Directory Rescan Age'), + '#type' => 'textfield', + '#description' => 'Minimum time to wait before directories are (re)scanned for new files.', + '#default_value' => search_files_variable_get_directoryrescanage(), + '#field_suffix' => t('[sec]'), ); return system_settings_form($form); } +/* + * Handle configuration setting and provide global default + */ +function search_files_variable_get_directoryrescanage() +{ + return variable_get('search_files_directoryrescanage', '86400'); +} + /** * Implementation of hook_update_index() * @@ -584,12 +675,33 @@ function search_files_update_index() { $helpers = search_files_get_helpers(); // only update the list of files in the directories once per day - if (variable_get('search_files_last_index', 0) < (time() - 86400)) { + if (variable_get('search_files_last_index', 0) < (time() - search_files_variable_get_directoryrescanage())) { variable_set('search_files_last_index', time()); + + // hunt configured directories for new files and add them to the database $result = db_query('SELECT * FROM {search_files_directories}'); while ($directory = db_fetch_object($result)) { search_files_list_directory($directory->directory, $directory->id); } + + // compare database to filesystem and remove vanished files from database + $vanished = 0; + $result = db_query('SELECT full_path FROM {search_files_files}'); + while ($file = db_fetch_object($result)) { + $full_path = $file->full_path; + if(!file_exists($full_path)) { + $vanished++; + search_files_delete_content_by_file_fullpath($full_path); + } + } + if ($vanished > 0) { + watchdog('Search Files', t('removed %vanished vanished files from index', array('%vanished' => $vanished)), array(), WATCHDOG_NOTICE); + } + } + + // premature end, safe_mode will inhibit shell_exec() + if (search_files_issafemode()) { + return; } $index_number = (int)variable_get('search_cron_limit', 100); @@ -640,6 +752,12 @@ watchdog('Search Files', t('failed to index %full_path after %attempts attempts', array('%full_path' => $file->full_path, '%attempts' => $file->index_attempts)), array(), WATCHDOG_ERROR); continue; } + + // skip over files which vanished before being indexed - cleanup is left to search_files_update_index() + if(!file_exists($full_path)) { + continue; + } + // %file% is a token that is placed in the helper's parameter list to represent the file path to the attachment. // We need to put the filename in quotes in case it contains spaces. $quoted_file_path = '"'. escapeshellcmd($full_path) .'"'; @@ -715,7 +833,6 @@ `full_path`=\"%s\", `directory_id`='%s' "; - echo "filename: ". escapeshellcmd($file) ." - filetype: {$type}
\n"; // MySQL keeps throwing errors because of some file names that it doesn't like, this needs fixed $insert_result = db_query($insert_sql, $full_path, $id); } @@ -812,13 +929,16 @@ if (strpos($link, '/') !== 0) { $link = '/'. $link; } - $results[] = array( - 'link' => $link, - 'date' => filectime($result->full_path), - 'type' => $file_extensions[$file_extension] .' file', - 'title' => $file_name .' ('. format_size(filesize($result->full_path)) .')', - 'snippet' => search_excerpt($keywords, $dataset->data), - ); + // skip over files which vanished after being indexed - cleanup is left to search_files_update_index() + if(file_exists($result->full_path)) { + $results[] = array( + 'link' => $link, + 'date' => filectime($result->full_path), + 'type' => $file_extensions[$file_extension] .' file', + 'title' => $file_name .' ('. format_size(filesize($result->full_path)) .')', + 'snippet' => search_excerpt($keywords, $dataset->data), + ); + } } date_default_timezone_set('MST'); }