1522 lines
44 KiB
PHP
1522 lines
44 KiB
PHP
|
<?php
|
||
|
/**
|
||
|
* Zend Framework
|
||
|
*
|
||
|
* LICENSE
|
||
|
*
|
||
|
* This source file is subject to the new BSD license that is bundled
|
||
|
* with this package in the file LICENSE.txt.
|
||
|
* It is also available through the world-wide-web at this URL:
|
||
|
* http://framework.zend.com/license/new-bsd
|
||
|
* If you did not receive a copy of the license and are unable to
|
||
|
* obtain it through the world-wide-web, please send an email
|
||
|
* to license@zend.com so we can send you a copy immediately.
|
||
|
*
|
||
|
* @category Zend
|
||
|
* @package Zend_Search_Lucene
|
||
|
* @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
|
||
|
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||
|
* @version $Id: Lucene.php 18602 2009-10-16 19:33:40Z alexander $
|
||
|
*/
|
||
|
|
||
|
/** Zend_Search_Lucene_Document */
|
||
|
require_once 'Zend/Search/Lucene/Document.php';
|
||
|
|
||
|
/** Zend_Search_Lucene_Document_Html */
|
||
|
require_once 'Zend/Search/Lucene/Document/Html.php';
|
||
|
|
||
|
/** Zend_Search_Lucene_Document_Docx */
|
||
|
require_once 'Zend/Search/Lucene/Document/Docx.php';
|
||
|
|
||
|
/** Zend_Search_Lucene_Document_Pptx */
|
||
|
require_once 'Zend/Search/Lucene/Document/Pptx.php';
|
||
|
|
||
|
/** Zend_Search_Lucene_Document_Xlsx */
|
||
|
require_once 'Zend/Search/Lucene/Document/Xlsx.php';
|
||
|
|
||
|
/** Zend_Search_Lucene_Storage_Directory_Filesystem */
|
||
|
require_once 'Zend/Search/Lucene/Storage/Directory/Filesystem.php';
|
||
|
|
||
|
/** Zend_Search_Lucene_Storage_File_Memory */
|
||
|
require_once 'Zend/Search/Lucene/Storage/File/Memory.php';
|
||
|
|
||
|
/** Zend_Search_Lucene_Index_Term */
|
||
|
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||
|
|
||
|
/** Zend_Search_Lucene_Index_TermInfo */
|
||
|
require_once 'Zend/Search/Lucene/Index/TermInfo.php';
|
||
|
|
||
|
/** Zend_Search_Lucene_Index_SegmentInfo */
|
||
|
require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
|
||
|
|
||
|
/** Zend_Search_Lucene_Index_FieldInfo */
|
||
|
require_once 'Zend/Search/Lucene/Index/FieldInfo.php';
|
||
|
|
||
|
/** Zend_Search_Lucene_Index_Writer */
|
||
|
require_once 'Zend/Search/Lucene/Index/Writer.php';
|
||
|
|
||
|
/** Zend_Search_Lucene_Search_QueryParser */
|
||
|
require_once 'Zend/Search/Lucene/Search/QueryParser.php';
|
||
|
|
||
|
/** Zend_Search_Lucene_Search_QueryHit */
|
||
|
require_once 'Zend/Search/Lucene/Search/QueryHit.php';
|
||
|
|
||
|
/** Zend_Search_Lucene_Search_Similarity */
|
||
|
require_once 'Zend/Search/Lucene/Search/Similarity.php';
|
||
|
|
||
|
/** Zend_Search_Lucene_Index_TermsPriorityQueue */
|
||
|
require_once 'Zend/Search/Lucene/Index/TermsPriorityQueue.php';
|
||
|
|
||
|
/** Zend_Search_Lucene_TermStreamsPriorityQueue */
|
||
|
require_once 'Zend/Search/Lucene/TermStreamsPriorityQueue.php';
|
||
|
|
||
|
/** Zend_Search_Lucene_Index_DocsFilter */
|
||
|
require_once 'Zend/Search/Lucene/Index/DocsFilter.php';
|
||
|
|
||
|
/** Zend_Search_Lucene_LockManager */
|
||
|
require_once 'Zend/Search/Lucene/LockManager.php';
|
||
|
|
||
|
/** Zend_Search_Lucene_Interface */
|
||
|
require_once 'Zend/Search/Lucene/Interface.php';
|
||
|
|
||
|
/** Zend_Search_Lucene_Proxy */
|
||
|
require_once 'Zend/Search/Lucene/Proxy.php';
|
||
|
|
||
|
/**
|
||
|
* @category Zend
|
||
|
* @package Zend_Search_Lucene
|
||
|
* @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
|
||
|
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||
|
*/
|
||
|
class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
|
||
|
{
|
||
|
/**
|
||
|
* Default field name for search
|
||
|
*
|
||
|
* Null means search through all fields
|
||
|
*
|
||
|
* @var string
|
||
|
*/
|
||
|
private static $_defaultSearchField = null;
|
||
|
|
||
|
/**
|
||
|
* Result set limit
|
||
|
*
|
||
|
* 0 means no limit
|
||
|
*
|
||
|
* @var integer
|
||
|
*/
|
||
|
private static $_resultSetLimit = 0;
|
||
|
|
||
|
/**
|
||
|
* Terms per query limit
|
||
|
*
|
||
|
* 0 means no limit
|
||
|
*
|
||
|
* @var integer
|
||
|
*/
|
||
|
private static $_termsPerQueryLimit = 1024;
|
||
|
|
||
|
/**
|
||
|
* File system adapter.
|
||
|
*
|
||
|
* @var Zend_Search_Lucene_Storage_Directory
|
||
|
*/
|
||
|
private $_directory = null;
|
||
|
|
||
|
/**
|
||
|
* File system adapter closing option
|
||
|
*
|
||
|
* @var boolean
|
||
|
*/
|
||
|
private $_closeDirOnExit = true;
|
||
|
|
||
|
/**
|
||
|
* Writer for this index, not instantiated unless required.
|
||
|
*
|
||
|
* @var Zend_Search_Lucene_Index_Writer
|
||
|
*/
|
||
|
private $_writer = null;
|
||
|
|
||
|
/**
|
||
|
* Array of Zend_Search_Lucene_Index_SegmentInfo objects for this index.
|
||
|
*
|
||
|
* @var array Zend_Search_Lucene_Index_SegmentInfo
|
||
|
*/
|
||
|
private $_segmentInfos = array();
|
||
|
|
||
|
/**
|
||
|
* Number of documents in this index.
|
||
|
*
|
||
|
* @var integer
|
||
|
*/
|
||
|
private $_docCount = 0;
|
||
|
|
||
|
/**
|
||
|
* Flag for index changes
|
||
|
*
|
||
|
* @var boolean
|
||
|
*/
|
||
|
private $_hasChanges = false;
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Signal, that index is already closed, changes are fixed and resources are cleaned up
|
||
|
*
|
||
|
* @var boolean
|
||
|
*/
|
||
|
private $_closed = false;
|
||
|
|
||
|
/**
|
||
|
* Number of references to the index object
|
||
|
*
|
||
|
* @var integer
|
||
|
*/
|
||
|
private $_refCount = 0;
|
||
|
|
||
|
/**
|
||
|
* Current segment generation
|
||
|
*
|
||
|
* @var integer
|
||
|
*/
|
||
|
private $_generation;
|
||
|
|
||
|
const FORMAT_PRE_2_1 = 0;
|
||
|
const FORMAT_2_1 = 1;
|
||
|
const FORMAT_2_3 = 2;
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Index format version
|
||
|
*
|
||
|
* @var integer
|
||
|
*/
|
||
|
private $_formatVersion;
|
||
|
|
||
|
/**
|
||
|
* Create index
|
||
|
*
|
||
|
* @param mixed $directory
|
||
|
* @return Zend_Search_Lucene_Interface
|
||
|
*/
|
||
|
public static function create($directory)
|
||
|
{
|
||
|
return new Zend_Search_Lucene_Proxy(new Zend_Search_Lucene($directory, true));
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Open index
|
||
|
*
|
||
|
* @param mixed $directory
|
||
|
* @return Zend_Search_Lucene_Interface
|
||
|
*/
|
||
|
public static function open($directory)
|
||
|
{
|
||
|
return new Zend_Search_Lucene_Proxy(new Zend_Search_Lucene($directory, false));
|
||
|
}
|
||
|
|
||
|
/** Generation retrieving counter */
|
||
|
const GENERATION_RETRIEVE_COUNT = 10;
|
||
|
|
||
|
/** Pause between generation retrieving attempts in milliseconds */
|
||
|
const GENERATION_RETRIEVE_PAUSE = 50;
|
||
|
|
||
|
/**
|
||
|
* Get current generation number
|
||
|
*
|
||
|
* Returns generation number
|
||
|
* 0 means pre-2.1 index format
|
||
|
* -1 means there are no segments files.
|
||
|
*
|
||
|
* @param Zend_Search_Lucene_Storage_Directory $directory
|
||
|
* @return integer
|
||
|
* @throws Zend_Search_Lucene_Exception
|
||
|
*/
|
||
|
public static function getActualGeneration(Zend_Search_Lucene_Storage_Directory $directory)
|
||
|
{
|
||
|
/**
|
||
|
* Zend_Search_Lucene uses segments.gen file to retrieve current generation number
|
||
|
*
|
||
|
* Apache Lucene index format documentation mentions this method only as a fallback method
|
||
|
*
|
||
|
* Nevertheless we use it according to the performance considerations
|
||
|
*
|
||
|
* @todo check if we can use some modification of Apache Lucene generation determination algorithm
|
||
|
* without performance problems
|
||
|
*/
|
||
|
|
||
|
require_once 'Zend/Search/Lucene/Exception.php';
|
||
|
try {
|
||
|
for ($count = 0; $count < self::GENERATION_RETRIEVE_COUNT; $count++) {
|
||
|
// Try to get generation file
|
||
|
$genFile = $directory->getFileObject('segments.gen', false);
|
||
|
|
||
|
$format = $genFile->readInt();
|
||
|
if ($format != (int)0xFFFFFFFE) {
|
||
|
throw new Zend_Search_Lucene_Exception('Wrong segments.gen file format');
|
||
|
}
|
||
|
|
||
|
$gen1 = $genFile->readLong();
|
||
|
$gen2 = $genFile->readLong();
|
||
|
|
||
|
if ($gen1 == $gen2) {
|
||
|
return $gen1;
|
||
|
}
|
||
|
|
||
|
usleep(self::GENERATION_RETRIEVE_PAUSE * 1000);
|
||
|
}
|
||
|
|
||
|
// All passes are failed
|
||
|
throw new Zend_Search_Lucene_Exception('Index is under processing now');
|
||
|
} catch (Zend_Search_Lucene_Exception $e) {
|
||
|
if (strpos($e->getMessage(), 'is not readable') !== false) {
|
||
|
try {
|
||
|
// Try to open old style segments file
|
||
|
$segmentsFile = $directory->getFileObject('segments', false);
|
||
|
|
||
|
// It's pre-2.1 index
|
||
|
return 0;
|
||
|
} catch (Zend_Search_Lucene_Exception $e) {
|
||
|
if (strpos($e->getMessage(), 'is not readable') !== false) {
|
||
|
return -1;
|
||
|
} else {
|
||
|
throw $e;
|
||
|
}
|
||
|
}
|
||
|
} else {
|
||
|
throw $e;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Get segments file name
|
||
|
*
|
||
|
* @param integer $generation
|
||
|
* @return string
|
||
|
*/
|
||
|
public static function getSegmentFileName($generation)
|
||
|
{
|
||
|
if ($generation == 0) {
|
||
|
return 'segments';
|
||
|
}
|
||
|
|
||
|
return 'segments_' . base_convert($generation, 10, 36);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Get index format version
|
||
|
*
|
||
|
* @return integer
|
||
|
*/
|
||
|
public function getFormatVersion()
|
||
|
{
|
||
|
return $this->_formatVersion;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Set index format version.
|
||
|
* Index is converted to this format at the nearest upfdate time
|
||
|
*
|
||
|
* @param int $formatVersion
|
||
|
* @throws Zend_Search_Lucene_Exception
|
||
|
*/
|
||
|
public function setFormatVersion($formatVersion)
|
||
|
{
|
||
|
if ($formatVersion != self::FORMAT_PRE_2_1 &&
|
||
|
$formatVersion != self::FORMAT_2_1 &&
|
||
|
$formatVersion != self::FORMAT_2_3) {
|
||
|
require_once 'Zend/Search/Lucene/Exception.php';
|
||
|
throw new Zend_Search_Lucene_Exception('Unsupported index format');
|
||
|
}
|
||
|
|
||
|
$this->_formatVersion = $formatVersion;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Read segments file for pre-2.1 Lucene index format
|
||
|
*
|
||
|
* @throws Zend_Search_Lucene_Exception
|
||
|
*/
|
||
|
private function _readPre21SegmentsFile()
|
||
|
{
|
||
|
$segmentsFile = $this->_directory->getFileObject('segments');
|
||
|
|
||
|
$format = $segmentsFile->readInt();
|
||
|
|
||
|
if ($format != (int)0xFFFFFFFF) {
|
||
|
require_once 'Zend/Search/Lucene/Exception.php';
|
||
|
throw new Zend_Search_Lucene_Exception('Wrong segments file format');
|
||
|
}
|
||
|
|
||
|
// read version
|
||
|
$segmentsFile->readLong();
|
||
|
|
||
|
// read segment name counter
|
||
|
$segmentsFile->readInt();
|
||
|
|
||
|
$segments = $segmentsFile->readInt();
|
||
|
|
||
|
$this->_docCount = 0;
|
||
|
|
||
|
// read segmentInfos
|
||
|
for ($count = 0; $count < $segments; $count++) {
|
||
|
$segName = $segmentsFile->readString();
|
||
|
$segSize = $segmentsFile->readInt();
|
||
|
$this->_docCount += $segSize;
|
||
|
|
||
|
$this->_segmentInfos[$segName] =
|
||
|
new Zend_Search_Lucene_Index_SegmentInfo($this->_directory,
|
||
|
$segName,
|
||
|
$segSize);
|
||
|
}
|
||
|
|
||
|
// Use 2.1 as a target version. Index will be reorganized at update time.
|
||
|
$this->_formatVersion = self::FORMAT_2_1;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Read segments file
|
||
|
*
|
||
|
* @throws Zend_Search_Lucene_Exception
|
||
|
*/
|
||
|
private function _readSegmentsFile()
|
||
|
{
|
||
|
$segmentsFile = $this->_directory->getFileObject(self::getSegmentFileName($this->_generation));
|
||
|
|
||
|
$format = $segmentsFile->readInt();
|
||
|
|
||
|
if ($format == (int)0xFFFFFFFC) {
|
||
|
$this->_formatVersion = self::FORMAT_2_3;
|
||
|
} else if ($format == (int)0xFFFFFFFD) {
|
||
|
$this->_formatVersion = self::FORMAT_2_1;
|
||
|
} else {
|
||
|
require_once 'Zend/Search/Lucene/Exception.php';
|
||
|
throw new Zend_Search_Lucene_Exception('Unsupported segments file format');
|
||
|
}
|
||
|
|
||
|
// read version
|
||
|
$segmentsFile->readLong();
|
||
|
|
||
|
// read segment name counter
|
||
|
$segmentsFile->readInt();
|
||
|
|
||
|
$segments = $segmentsFile->readInt();
|
||
|
|
||
|
$this->_docCount = 0;
|
||
|
|
||
|
// read segmentInfos
|
||
|
for ($count = 0; $count < $segments; $count++) {
|
||
|
$segName = $segmentsFile->readString();
|
||
|
$segSize = $segmentsFile->readInt();
|
||
|
|
||
|
// 2.1+ specific properties
|
||
|
$delGen = $segmentsFile->readLong();
|
||
|
|
||
|
if ($this->_formatVersion == self::FORMAT_2_3) {
|
||
|
$docStoreOffset = $segmentsFile->readInt();
|
||
|
|
||
|
if ($docStoreOffset != (int)0xFFFFFFFF) {
|
||
|
$docStoreSegment = $segmentsFile->readString();
|
||
|
$docStoreIsCompoundFile = $segmentsFile->readByte();
|
||
|
|
||
|
$docStoreOptions = array('offset' => $docStoreOffset,
|
||
|
'segment' => $docStoreSegment,
|
||
|
'isCompound' => ($docStoreIsCompoundFile == 1));
|
||
|
} else {
|
||
|
$docStoreOptions = null;
|
||
|
}
|
||
|
} else {
|
||
|
$docStoreOptions = null;
|
||
|
}
|
||
|
|
||
|
$hasSingleNormFile = $segmentsFile->readByte();
|
||
|
$numField = $segmentsFile->readInt();
|
||
|
|
||
|
$normGens = array();
|
||
|
if ($numField != (int)0xFFFFFFFF) {
|
||
|
for ($count1 = 0; $count1 < $numField; $count1++) {
|
||
|
$normGens[] = $segmentsFile->readLong();
|
||
|
}
|
||
|
|
||
|
require_once 'Zend/Search/Lucene/Exception.php';
|
||
|
throw new Zend_Search_Lucene_Exception('Separate norm files are not supported. Optimize index to use it with Zend_Search_Lucene.');
|
||
|
}
|
||
|
|
||
|
$isCompoundByte = $segmentsFile->readByte();
|
||
|
|
||
|
if ($isCompoundByte == 0xFF) {
|
||
|
// The segment is not a compound file
|
||
|
$isCompound = false;
|
||
|
} else if ($isCompoundByte == 0x00) {
|
||
|
// The status is unknown
|
||
|
$isCompound = null;
|
||
|
} else if ($isCompoundByte == 0x01) {
|
||
|
// The segment is a compound file
|
||
|
$isCompound = true;
|
||
|
}
|
||
|
|
||
|
$this->_docCount += $segSize;
|
||
|
|
||
|
$this->_segmentInfos[$segName] =
|
||
|
new Zend_Search_Lucene_Index_SegmentInfo($this->_directory,
|
||
|
$segName,
|
||
|
$segSize,
|
||
|
$delGen,
|
||
|
$docStoreOptions,
|
||
|
$hasSingleNormFile,
|
||
|
$isCompound);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Opens the index.
|
||
|
*
|
||
|
* IndexReader constructor needs Directory as a parameter. It should be
|
||
|
* a string with a path to the index folder or a Directory object.
|
||
|
*
|
||
|
* @param mixed $directory
|
||
|
* @throws Zend_Search_Lucene_Exception
|
||
|
*/
|
||
|
public function __construct($directory = null, $create = false)
|
||
|
{
|
||
|
if ($directory === null) {
|
||
|
require_once 'Zend/Search/Lucene/Exception.php';
|
||
|
throw new Zend_Search_Exception('No index directory specified');
|
||
|
}
|
||
|
|
||
|
if ($directory instanceof Zend_Search_Lucene_Storage_Directory_Filesystem) {
|
||
|
$this->_directory = $directory;
|
||
|
$this->_closeDirOnExit = false;
|
||
|
} else {
|
||
|
$this->_directory = new Zend_Search_Lucene_Storage_Directory_Filesystem($directory);
|
||
|
$this->_closeDirOnExit = true;
|
||
|
}
|
||
|
|
||
|
$this->_segmentInfos = array();
|
||
|
|
||
|
// Mark index as "under processing" to prevent other processes from premature index cleaning
|
||
|
Zend_Search_Lucene_LockManager::obtainReadLock($this->_directory);
|
||
|
|
||
|
$this->_generation = self::getActualGeneration($this->_directory);
|
||
|
|
||
|
if ($create) {
|
||
|
require_once 'Zend/Search/Lucene/Exception.php';
|
||
|
try {
|
||
|
Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory);
|
||
|
} catch (Zend_Search_Lucene_Exception $e) {
|
||
|
Zend_Search_Lucene_LockManager::releaseReadLock($this->_directory);
|
||
|
|
||
|
if (strpos($e->getMessage(), 'Can\'t obtain exclusive index lock') === false) {
|
||
|
throw $e;
|
||
|
} else {
|
||
|
throw new Zend_Search_Lucene_Exception('Can\'t create index. It\'s under processing now');
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if ($this->_generation == -1) {
|
||
|
// Directory doesn't contain existing index, start from 1
|
||
|
$this->_generation = 1;
|
||
|
$nameCounter = 0;
|
||
|
} else {
|
||
|
// Directory contains existing index
|
||
|
$segmentsFile = $this->_directory->getFileObject(self::getSegmentFileName($this->_generation));
|
||
|
$segmentsFile->seek(12); // 12 = 4 (int, file format marker) + 8 (long, index version)
|
||
|
|
||
|
$nameCounter = $segmentsFile->readInt();
|
||
|
$this->_generation++;
|
||
|
}
|
||
|
|
||
|
Zend_Search_Lucene_Index_Writer::createIndex($this->_directory, $this->_generation, $nameCounter);
|
||
|
|
||
|
Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
|
||
|
}
|
||
|
|
||
|
if ($this->_generation == -1) {
|
||
|
require_once 'Zend/Search/Lucene/Exception.php';
|
||
|
throw new Zend_Search_Lucene_Exception('Index doesn\'t exists in the specified directory.');
|
||
|
} else if ($this->_generation == 0) {
|
||
|
$this->_readPre21SegmentsFile();
|
||
|
} else {
|
||
|
$this->_readSegmentsFile();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Close current index and free resources
|
||
|
*/
|
||
|
private function _close()
|
||
|
{
|
||
|
if ($this->_closed) {
|
||
|
// index is already closed and resources are cleaned up
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
$this->commit();
|
||
|
|
||
|
// Release "under processing" flag
|
||
|
Zend_Search_Lucene_LockManager::releaseReadLock($this->_directory);
|
||
|
|
||
|
if ($this->_closeDirOnExit) {
|
||
|
$this->_directory->close();
|
||
|
}
|
||
|
|
||
|
$this->_directory = null;
|
||
|
$this->_writer = null;
|
||
|
$this->_segmentInfos = null;
|
||
|
|
||
|
$this->_closed = true;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Add reference to the index object
|
||
|
*
|
||
|
* @internal
|
||
|
*/
|
||
|
public function addReference()
|
||
|
{
|
||
|
$this->_refCount++;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Remove reference from the index object
|
||
|
*
|
||
|
* When reference count becomes zero, index is closed and resources are cleaned up
|
||
|
*
|
||
|
* @internal
|
||
|
*/
|
||
|
public function removeReference()
|
||
|
{
|
||
|
$this->_refCount--;
|
||
|
|
||
|
if ($this->_refCount == 0) {
|
||
|
$this->_close();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Object destructor
|
||
|
*/
|
||
|
public function __destruct()
|
||
|
{
|
||
|
$this->_close();
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Returns an instance of Zend_Search_Lucene_Index_Writer for the index
|
||
|
*
|
||
|
* @return Zend_Search_Lucene_Index_Writer
|
||
|
*/
|
||
|
private function _getIndexWriter()
|
||
|
{
|
||
|
if (!$this->_writer instanceof Zend_Search_Lucene_Index_Writer) {
|
||
|
$this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory, $this->_segmentInfos, $this->_formatVersion);
|
||
|
}
|
||
|
|
||
|
return $this->_writer;
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Returns the Zend_Search_Lucene_Storage_Directory instance for this index.
|
||
|
*
|
||
|
* @return Zend_Search_Lucene_Storage_Directory
|
||
|
*/
|
||
|
public function getDirectory()
|
||
|
{
|
||
|
return $this->_directory;
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Returns the total number of documents in this index (including deleted documents).
|
||
|
*
|
||
|
* @return integer
|
||
|
*/
|
||
|
public function count()
|
||
|
{
|
||
|
return $this->_docCount;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Returns one greater than the largest possible document number.
|
||
|
* This may be used to, e.g., determine how big to allocate a structure which will have
|
||
|
* an element for every document number in an index.
|
||
|
*
|
||
|
* @return integer
|
||
|
*/
|
||
|
public function maxDoc()
|
||
|
{
|
||
|
return $this->count();
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Returns the total number of non-deleted documents in this index.
|
||
|
*
|
||
|
* @return integer
|
||
|
*/
|
||
|
public function numDocs()
|
||
|
{
|
||
|
$numDocs = 0;
|
||
|
|
||
|
foreach ($this->_segmentInfos as $segmentInfo) {
|
||
|
$numDocs += $segmentInfo->numDocs();
|
||
|
}
|
||
|
|
||
|
return $numDocs;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Checks, that document is deleted
|
||
|
*
|
||
|
* @param integer $id
|
||
|
* @return boolean
|
||
|
* @throws Zend_Search_Lucene_Exception Exception is thrown if $id is out of the range
|
||
|
*/
|
||
|
public function isDeleted($id)
|
||
|
{
|
||
|
if ($id >= $this->_docCount) {
|
||
|
require_once 'Zend/Search/Lucene/Exception.php';
|
||
|
throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
|
||
|
}
|
||
|
|
||
|
$segmentStartId = 0;
|
||
|
foreach ($this->_segmentInfos as $segmentInfo) {
|
||
|
if ($segmentStartId + $segmentInfo->count() > $id) {
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
$segmentStartId += $segmentInfo->count();
|
||
|
}
|
||
|
|
||
|
return $segmentInfo->isDeleted($id - $segmentStartId);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Set default search field.
|
||
|
*
|
||
|
* Null means, that search is performed through all fields by default
|
||
|
*
|
||
|
* Default value is null
|
||
|
*
|
||
|
* @param string $fieldName
|
||
|
*/
|
||
|
public static function setDefaultSearchField($fieldName)
|
||
|
{
|
||
|
self::$_defaultSearchField = $fieldName;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Get default search field.
|
||
|
*
|
||
|
* Null means, that search is performed through all fields by default
|
||
|
*
|
||
|
* @return string
|
||
|
*/
|
||
|
public static function getDefaultSearchField()
|
||
|
{
|
||
|
return self::$_defaultSearchField;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Set result set limit.
|
||
|
*
|
||
|
* 0 (default) means no limit
|
||
|
*
|
||
|
* @param integer $limit
|
||
|
*/
|
||
|
public static function setResultSetLimit($limit)
|
||
|
{
|
||
|
self::$_resultSetLimit = $limit;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Get result set limit.
|
||
|
*
|
||
|
* 0 means no limit
|
||
|
*
|
||
|
* @return integer
|
||
|
*/
|
||
|
public static function getResultSetLimit()
|
||
|
{
|
||
|
return self::$_resultSetLimit;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Set terms per query limit.
|
||
|
*
|
||
|
* 0 means no limit
|
||
|
*
|
||
|
* @param integer $limit
|
||
|
*/
|
||
|
public static function setTermsPerQueryLimit($limit)
|
||
|
{
|
||
|
self::$_termsPerQueryLimit = $limit;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Get result set limit.
|
||
|
*
|
||
|
* 0 (default) means no limit
|
||
|
*
|
||
|
* @return integer
|
||
|
*/
|
||
|
public static function getTermsPerQueryLimit()
|
||
|
{
|
||
|
return self::$_termsPerQueryLimit;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Retrieve index maxBufferedDocs option
|
||
|
*
|
||
|
* maxBufferedDocs is a minimal number of documents required before
|
||
|
* the buffered in-memory documents are written into a new Segment
|
||
|
*
|
||
|
* Default value is 10
|
||
|
*
|
||
|
* @return integer
|
||
|
*/
|
||
|
public function getMaxBufferedDocs()
|
||
|
{
|
||
|
return $this->_getIndexWriter()->maxBufferedDocs;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Set index maxBufferedDocs option
|
||
|
*
|
||
|
* maxBufferedDocs is a minimal number of documents required before
|
||
|
* the buffered in-memory documents are written into a new Segment
|
||
|
*
|
||
|
* Default value is 10
|
||
|
*
|
||
|
* @param integer $maxBufferedDocs
|
||
|
*/
|
||
|
public function setMaxBufferedDocs($maxBufferedDocs)
|
||
|
{
|
||
|
$this->_getIndexWriter()->maxBufferedDocs = $maxBufferedDocs;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Retrieve index maxMergeDocs option
|
||
|
*
|
||
|
* maxMergeDocs is a largest number of documents ever merged by addDocument().
|
||
|
* Small values (e.g., less than 10,000) are best for interactive indexing,
|
||
|
* as this limits the length of pauses while indexing to a few seconds.
|
||
|
* Larger values are best for batched indexing and speedier searches.
|
||
|
*
|
||
|
* Default value is PHP_INT_MAX
|
||
|
*
|
||
|
* @return integer
|
||
|
*/
|
||
|
public function getMaxMergeDocs()
|
||
|
{
|
||
|
return $this->_getIndexWriter()->maxMergeDocs;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Set index maxMergeDocs option
|
||
|
*
|
||
|
* maxMergeDocs is a largest number of documents ever merged by addDocument().
|
||
|
* Small values (e.g., less than 10,000) are best for interactive indexing,
|
||
|
* as this limits the length of pauses while indexing to a few seconds.
|
||
|
* Larger values are best for batched indexing and speedier searches.
|
||
|
*
|
||
|
* Default value is PHP_INT_MAX
|
||
|
*
|
||
|
* @param integer $maxMergeDocs
|
||
|
*/
|
||
|
public function setMaxMergeDocs($maxMergeDocs)
|
||
|
{
|
||
|
$this->_getIndexWriter()->maxMergeDocs = $maxMergeDocs;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Retrieve index mergeFactor option
|
||
|
*
|
||
|
* mergeFactor determines how often segment indices are merged by addDocument().
|
||
|
* With smaller values, less RAM is used while indexing,
|
||
|
* and searches on unoptimized indices are faster,
|
||
|
* but indexing speed is slower.
|
||
|
* With larger values, more RAM is used during indexing,
|
||
|
* and while searches on unoptimized indices are slower,
|
||
|
* indexing is faster.
|
||
|
* Thus larger values (> 10) are best for batch index creation,
|
||
|
* and smaller values (< 10) for indices that are interactively maintained.
|
||
|
*
|
||
|
* Default value is 10
|
||
|
*
|
||
|
* @return integer
|
||
|
*/
|
||
|
public function getMergeFactor()
|
||
|
{
|
||
|
return $this->_getIndexWriter()->mergeFactor;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Set index mergeFactor option
|
||
|
*
|
||
|
* mergeFactor determines how often segment indices are merged by addDocument().
|
||
|
* With smaller values, less RAM is used while indexing,
|
||
|
* and searches on unoptimized indices are faster,
|
||
|
* but indexing speed is slower.
|
||
|
* With larger values, more RAM is used during indexing,
|
||
|
* and while searches on unoptimized indices are slower,
|
||
|
* indexing is faster.
|
||
|
* Thus larger values (> 10) are best for batch index creation,
|
||
|
* and smaller values (< 10) for indices that are interactively maintained.
|
||
|
*
|
||
|
* Default value is 10
|
||
|
*
|
||
|
* @param integer $maxMergeDocs
|
||
|
*/
|
||
|
public function setMergeFactor($mergeFactor)
|
||
|
{
|
||
|
$this->_getIndexWriter()->mergeFactor = $mergeFactor;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Performs a query against the index and returns an array
|
||
|
* of Zend_Search_Lucene_Search_QueryHit objects.
|
||
|
* Input is a string or Zend_Search_Lucene_Search_Query.
|
||
|
*
|
||
|
* @param mixed $query
|
||
|
* @return array Zend_Search_Lucene_Search_QueryHit
|
||
|
* @throws Zend_Search_Lucene_Exception
|
||
|
*/
|
||
|
public function find($query)
|
||
|
{
|
||
|
if (is_string($query)) {
|
||
|
$query = Zend_Search_Lucene_Search_QueryParser::parse($query);
|
||
|
}
|
||
|
|
||
|
if (!$query instanceof Zend_Search_Lucene_Search_Query) {
|
||
|
require_once 'Zend/Search/Lucene/Exception.php';
|
||
|
throw new Zend_Search_Lucene_Exception('Query must be a string or Zend_Search_Lucene_Search_Query object');
|
||
|
}
|
||
|
|
||
|
$this->commit();
|
||
|
|
||
|
$hits = array();
|
||
|
$scores = array();
|
||
|
$ids = array();
|
||
|
|
||
|
$query = $query->rewrite($this)->optimize($this);
|
||
|
|
||
|
$query->execute($this);
|
||
|
|
||
|
$topScore = 0;
|
||
|
|
||
|
foreach ($query->matchedDocs() as $id => $num) {
|
||
|
$docScore = $query->score($id, $this);
|
||
|
if( $docScore != 0 ) {
|
||
|
$hit = new Zend_Search_Lucene_Search_QueryHit($this);
|
||
|
$hit->id = $id;
|
||
|
$hit->score = $docScore;
|
||
|
|
||
|
$hits[] = $hit;
|
||
|
$ids[] = $id;
|
||
|
$scores[] = $docScore;
|
||
|
|
||
|
if ($docScore > $topScore) {
|
||
|
$topScore = $docScore;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (self::$_resultSetLimit != 0 && count($hits) >= self::$_resultSetLimit) {
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (count($hits) == 0) {
|
||
|
// skip sorting, which may cause a error on empty index
|
||
|
return array();
|
||
|
}
|
||
|
|
||
|
if ($topScore > 1) {
|
||
|
foreach ($hits as $hit) {
|
||
|
$hit->score /= $topScore;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (func_num_args() == 1) {
|
||
|
// sort by scores
|
||
|
array_multisort($scores, SORT_DESC, SORT_NUMERIC,
|
||
|
$ids, SORT_ASC, SORT_NUMERIC,
|
||
|
$hits);
|
||
|
} else {
|
||
|
// sort by given field names
|
||
|
|
||
|
$argList = func_get_args();
|
||
|
$fieldNames = $this->getFieldNames();
|
||
|
$sortArgs = array();
|
||
|
|
||
|
// PHP 5.3 now expects all arguments to array_multisort be passed by
|
||
|
// reference (if it's invoked through call_user_func_array());
|
||
|
// since constants can't be passed by reference, create some placeholder variables.
|
||
|
$sortReg = SORT_REGULAR;
|
||
|
$sortAsc = SORT_ASC;
|
||
|
$sortNum = SORT_NUMERIC;
|
||
|
|
||
|
require_once 'Zend/Search/Lucene/Exception.php';
|
||
|
for ($count = 1; $count < count($argList); $count++) {
|
||
|
$fieldName = $argList[$count];
|
||
|
|
||
|
if (!is_string($fieldName)) {
|
||
|
throw new Zend_Search_Lucene_Exception('Field name must be a string.');
|
||
|
}
|
||
|
|
||
|
if (!in_array($fieldName, $fieldNames)) {
|
||
|
throw new Zend_Search_Lucene_Exception('Wrong field name.');
|
||
|
}
|
||
|
|
||
|
$valuesArray = array();
|
||
|
foreach ($hits as $hit) {
|
||
|
try {
|
||
|
$value = $hit->getDocument()->getFieldValue($fieldName);
|
||
|
} catch (Zend_Search_Lucene_Exception $e) {
|
||
|
if (strpos($e->getMessage(), 'not found') === false) {
|
||
|
throw $e;
|
||
|
} else {
|
||
|
$value = null;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
$valuesArray[] = $value;
|
||
|
}
|
||
|
|
||
|
$sortArgs[] = &$valuesArray;
|
||
|
unset($valuesArray);
|
||
|
|
||
|
if ($count + 1 < count($argList) && is_integer($argList[$count+1])) {
|
||
|
$count++;
|
||
|
$sortArgs[] = &$argList[$count];
|
||
|
|
||
|
if ($count + 1 < count($argList) && is_integer($argList[$count+1])) {
|
||
|
$count++;
|
||
|
$sortArgs[] = &$argList[$count];
|
||
|
} else {
|
||
|
if ($argList[$count] == SORT_ASC || $argList[$count] == SORT_DESC) {
|
||
|
$sortArgs[] = &$sortReg;
|
||
|
} else {
|
||
|
$sortArgs[] = &$sortAsc;
|
||
|
}
|
||
|
}
|
||
|
} else {
|
||
|
$sortArgs[] = &$sortAsc;
|
||
|
$sortArgs[] = &$sortReg;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Sort by id's if values are equal
|
||
|
$sortArgs[] = &$ids;
|
||
|
$sortArgs[] = &$sortAsc;
|
||
|
$sortArgs[] = &$sortNum;
|
||
|
|
||
|
// Array to be sorted
|
||
|
$sortArgs[] = &$hits;
|
||
|
|
||
|
// Do sort
|
||
|
call_user_func_array('array_multisort', $sortArgs);
|
||
|
}
|
||
|
|
||
|
return $hits;
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Returns a list of all unique field names that exist in this index.
|
||
|
*
|
||
|
* @param boolean $indexed
|
||
|
* @return array
|
||
|
*/
|
||
|
public function getFieldNames($indexed = false)
|
||
|
{
|
||
|
$result = array();
|
||
|
foreach( $this->_segmentInfos as $segmentInfo ) {
|
||
|
$result = array_merge($result, $segmentInfo->getFields($indexed));
|
||
|
}
|
||
|
return $result;
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Returns a Zend_Search_Lucene_Document object for the document
|
||
|
* number $id in this index.
|
||
|
*
|
||
|
* @param integer|Zend_Search_Lucene_Search_QueryHit $id
|
||
|
* @return Zend_Search_Lucene_Document
|
||
|
* @throws Zend_Search_Lucene_Exception Exception is thrown if $id is out of the range
|
||
|
*/
|
||
|
public function getDocument($id)
|
||
|
{
|
||
|
if ($id instanceof Zend_Search_Lucene_Search_QueryHit) {
|
||
|
/* @var $id Zend_Search_Lucene_Search_QueryHit */
|
||
|
$id = $id->id;
|
||
|
}
|
||
|
|
||
|
if ($id >= $this->_docCount) {
|
||
|
require_once 'Zend/Search/Lucene/Exception.php';
|
||
|
throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
|
||
|
}
|
||
|
|
||
|
$segmentStartId = 0;
|
||
|
foreach ($this->_segmentInfos as $segmentInfo) {
|
||
|
if ($segmentStartId + $segmentInfo->count() > $id) {
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
$segmentStartId += $segmentInfo->count();
|
||
|
}
|
||
|
|
||
|
$fdxFile = $segmentInfo->openCompoundFile('.fdx');
|
||
|
$fdxFile->seek(($id-$segmentStartId)*8, SEEK_CUR);
|
||
|
$fieldValuesPosition = $fdxFile->readLong();
|
||
|
|
||
|
$fdtFile = $segmentInfo->openCompoundFile('.fdt');
|
||
|
$fdtFile->seek($fieldValuesPosition, SEEK_CUR);
|
||
|
$fieldCount = $fdtFile->readVInt();
|
||
|
|
||
|
$doc = new Zend_Search_Lucene_Document();
|
||
|
for ($count = 0; $count < $fieldCount; $count++) {
|
||
|
$fieldNum = $fdtFile->readVInt();
|
||
|
$bits = $fdtFile->readByte();
|
||
|
|
||
|
$fieldInfo = $segmentInfo->getField($fieldNum);
|
||
|
|
||
|
if (!($bits & 2)) { // Text data
|
||
|
$field = new Zend_Search_Lucene_Field($fieldInfo->name,
|
||
|
$fdtFile->readString(),
|
||
|
'UTF-8',
|
||
|
true,
|
||
|
$fieldInfo->isIndexed,
|
||
|
$bits & 1 );
|
||
|
} else { // Binary data
|
||
|
$field = new Zend_Search_Lucene_Field($fieldInfo->name,
|
||
|
$fdtFile->readBinary(),
|
||
|
'',
|
||
|
true,
|
||
|
$fieldInfo->isIndexed,
|
||
|
$bits & 1,
|
||
|
true );
|
||
|
}
|
||
|
|
||
|
$doc->addField($field);
|
||
|
}
|
||
|
|
||
|
return $doc;
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Returns true if index contain documents with specified term.
|
||
|
*
|
||
|
* Is used for query optimization.
|
||
|
*
|
||
|
* @param Zend_Search_Lucene_Index_Term $term
|
||
|
* @return boolean
|
||
|
*/
|
||
|
public function hasTerm(Zend_Search_Lucene_Index_Term $term)
|
||
|
{
|
||
|
foreach ($this->_segmentInfos as $segInfo) {
|
||
|
if ($segInfo->getTermInfo($term) instanceof Zend_Search_Lucene_Index_TermInfo) {
|
||
|
return true;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Returns IDs of all documents containing term.
|
||
|
*
|
||
|
* @param Zend_Search_Lucene_Index_Term $term
|
||
|
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||
|
* @return array
|
||
|
*/
|
||
|
public function termDocs(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
|
||
|
{
|
||
|
$subResults = array();
|
||
|
$segmentStartDocId = 0;
|
||
|
|
||
|
foreach ($this->_segmentInfos as $segmentInfo) {
|
||
|
$subResults[] = $segmentInfo->termDocs($term, $segmentStartDocId, $docsFilter);
|
||
|
|
||
|
$segmentStartDocId += $segmentInfo->count();
|
||
|
}
|
||
|
|
||
|
if (count($subResults) == 0) {
|
||
|
return array();
|
||
|
} else if (count($subResults) == 1) {
|
||
|
// Index is optimized (only one segment)
|
||
|
// Do not perform array reindexing
|
||
|
return reset($subResults);
|
||
|
} else {
|
||
|
$result = call_user_func_array('array_merge', $subResults);
|
||
|
}
|
||
|
|
||
|
return $result;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Returns documents filter for all documents containing term.
|
||
|
*
|
||
|
* It performs the same operation as termDocs, but return result as
|
||
|
* Zend_Search_Lucene_Index_DocsFilter object
|
||
|
*
|
||
|
* @param Zend_Search_Lucene_Index_Term $term
|
||
|
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||
|
* @return Zend_Search_Lucene_Index_DocsFilter
|
||
|
*/
|
||
|
public function termDocsFilter(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
|
||
|
{
|
||
|
$segmentStartDocId = 0;
|
||
|
$result = new Zend_Search_Lucene_Index_DocsFilter();
|
||
|
|
||
|
foreach ($this->_segmentInfos as $segmentInfo) {
|
||
|
$subResults[] = $segmentInfo->termDocs($term, $segmentStartDocId, $docsFilter);
|
||
|
|
||
|
$segmentStartDocId += $segmentInfo->count();
|
||
|
}
|
||
|
|
||
|
if (count($subResults) == 0) {
|
||
|
return array();
|
||
|
} else if (count($subResults) == 1) {
|
||
|
// Index is optimized (only one segment)
|
||
|
// Do not perform array reindexing
|
||
|
return reset($subResults);
|
||
|
} else {
|
||
|
$result = call_user_func_array('array_merge', $subResults);
|
||
|
}
|
||
|
|
||
|
return $result;
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Returns an array of all term freqs.
|
||
|
* Result array structure: array(docId => freq, ...)
|
||
|
*
|
||
|
* @param Zend_Search_Lucene_Index_Term $term
|
||
|
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||
|
* @return integer
|
||
|
*/
|
||
|
public function termFreqs(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
|
||
|
{
|
||
|
$result = array();
|
||
|
$segmentStartDocId = 0;
|
||
|
foreach ($this->_segmentInfos as $segmentInfo) {
|
||
|
$result += $segmentInfo->termFreqs($term, $segmentStartDocId, $docsFilter);
|
||
|
|
||
|
$segmentStartDocId += $segmentInfo->count();
|
||
|
}
|
||
|
|
||
|
return $result;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Returns an array of all term positions in the documents.
|
||
|
* Result array structure: array(docId => array(pos1, pos2, ...), ...)
|
||
|
*
|
||
|
* @param Zend_Search_Lucene_Index_Term $term
|
||
|
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||
|
* @return array
|
||
|
*/
|
||
|
public function termPositions(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
|
||
|
{
|
||
|
$result = array();
|
||
|
$segmentStartDocId = 0;
|
||
|
foreach ($this->_segmentInfos as $segmentInfo) {
|
||
|
$result += $segmentInfo->termPositions($term, $segmentStartDocId, $docsFilter);
|
||
|
|
||
|
$segmentStartDocId += $segmentInfo->count();
|
||
|
}
|
||
|
|
||
|
return $result;
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Returns the number of documents in this index containing the $term.
|
||
|
*
|
||
|
* @param Zend_Search_Lucene_Index_Term $term
|
||
|
* @return integer
|
||
|
*/
|
||
|
public function docFreq(Zend_Search_Lucene_Index_Term $term)
|
||
|
{
|
||
|
$result = 0;
|
||
|
foreach ($this->_segmentInfos as $segInfo) {
|
||
|
$termInfo = $segInfo->getTermInfo($term);
|
||
|
if ($termInfo !== null) {
|
||
|
$result += $termInfo->docFreq;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return $result;
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Retrive similarity used by index reader
|
||
|
*
|
||
|
* @return Zend_Search_Lucene_Search_Similarity
|
||
|
*/
|
||
|
public function getSimilarity()
|
||
|
{
|
||
|
return Zend_Search_Lucene_Search_Similarity::getDefault();
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Returns a normalization factor for "field, document" pair.
|
||
|
*
|
||
|
* @param integer $id
|
||
|
* @param string $fieldName
|
||
|
* @return float
|
||
|
*/
|
||
|
public function norm($id, $fieldName)
|
||
|
{
|
||
|
if ($id >= $this->_docCount) {
|
||
|
return null;
|
||
|
}
|
||
|
|
||
|
$segmentStartId = 0;
|
||
|
foreach ($this->_segmentInfos as $segInfo) {
|
||
|
if ($segmentStartId + $segInfo->count() > $id) {
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
$segmentStartId += $segInfo->count();
|
||
|
}
|
||
|
|
||
|
if ($segInfo->isDeleted($id - $segmentStartId)) {
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
return $segInfo->norm($id - $segmentStartId, $fieldName);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Returns true if any documents have been deleted from this index.
|
||
|
*
|
||
|
* @return boolean
|
||
|
*/
|
||
|
public function hasDeletions()
|
||
|
{
|
||
|
foreach ($this->_segmentInfos as $segmentInfo) {
|
||
|
if ($segmentInfo->hasDeletions()) {
|
||
|
return true;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Deletes a document from the index.
|
||
|
* $id is an internal document id
|
||
|
*
|
||
|
* @param integer|Zend_Search_Lucene_Search_QueryHit $id
|
||
|
* @throws Zend_Search_Lucene_Exception
|
||
|
*/
|
||
|
public function delete($id)
|
||
|
{
|
||
|
if ($id instanceof Zend_Search_Lucene_Search_QueryHit) {
|
||
|
/* @var $id Zend_Search_Lucene_Search_QueryHit */
|
||
|
$id = $id->id;
|
||
|
}
|
||
|
|
||
|
if ($id >= $this->_docCount) {
|
||
|
require_once 'Zend/Search/Lucene/Exception.php';
|
||
|
throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
|
||
|
}
|
||
|
|
||
|
$segmentStartId = 0;
|
||
|
foreach ($this->_segmentInfos as $segmentInfo) {
|
||
|
if ($segmentStartId + $segmentInfo->count() > $id) {
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
$segmentStartId += $segmentInfo->count();
|
||
|
}
|
||
|
$segmentInfo->delete($id - $segmentStartId);
|
||
|
|
||
|
$this->_hasChanges = true;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Adds a document to this index.
|
||
|
*
|
||
|
* @param Zend_Search_Lucene_Document $document
|
||
|
*/
|
||
|
public function addDocument(Zend_Search_Lucene_Document $document)
|
||
|
{
|
||
|
$this->_getIndexWriter()->addDocument($document);
|
||
|
$this->_docCount++;
|
||
|
|
||
|
$this->_hasChanges = true;
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Update document counter
|
||
|
*/
|
||
|
private function _updateDocCount()
|
||
|
{
|
||
|
$this->_docCount = 0;
|
||
|
foreach ($this->_segmentInfos as $segInfo) {
|
||
|
$this->_docCount += $segInfo->count();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Commit changes resulting from delete() or undeleteAll() operations.
|
||
|
*
|
||
|
* @todo undeleteAll processing.
|
||
|
*/
|
||
|
public function commit()
|
||
|
{
|
||
|
if ($this->_hasChanges) {
|
||
|
$this->_getIndexWriter()->commit();
|
||
|
|
||
|
$this->_updateDocCount();
|
||
|
|
||
|
$this->_hasChanges = false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Optimize index.
|
||
|
*
|
||
|
* Merges all segments into one
|
||
|
*/
|
||
|
public function optimize()
|
||
|
{
|
||
|
// Commit changes if any changes have been made
|
||
|
$this->commit();
|
||
|
|
||
|
if (count($this->_segmentInfos) > 1 || $this->hasDeletions()) {
|
||
|
$this->_getIndexWriter()->optimize();
|
||
|
$this->_updateDocCount();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Returns an array of all terms in this index.
|
||
|
*
|
||
|
* @return array
|
||
|
*/
|
||
|
public function terms()
|
||
|
{
|
||
|
$result = array();
|
||
|
|
||
|
$segmentInfoQueue = new Zend_Search_Lucene_Index_TermsPriorityQueue();
|
||
|
|
||
|
foreach ($this->_segmentInfos as $segmentInfo) {
|
||
|
$segmentInfo->resetTermsStream();
|
||
|
|
||
|
// Skip "empty" segments
|
||
|
if ($segmentInfo->currentTerm() !== null) {
|
||
|
$segmentInfoQueue->put($segmentInfo);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
while (($segmentInfo = $segmentInfoQueue->pop()) !== null) {
|
||
|
if ($segmentInfoQueue->top() === null ||
|
||
|
$segmentInfoQueue->top()->currentTerm()->key() !=
|
||
|
$segmentInfo->currentTerm()->key()) {
|
||
|
// We got new term
|
||
|
$result[] = $segmentInfo->currentTerm();
|
||
|
}
|
||
|
|
||
|
if ($segmentInfo->nextTerm() !== null) {
|
||
|
// Put segment back into the priority queue
|
||
|
$segmentInfoQueue->put($segmentInfo);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return $result;
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Terms stream priority queue object
|
||
|
*
|
||
|
* @var Zend_Search_Lucene_TermStreamsPriorityQueue
|
||
|
*/
|
||
|
private $_termsStream = null;
|
||
|
|
||
|
/**
|
||
|
* Reset terms stream.
|
||
|
*/
|
||
|
public function resetTermsStream()
|
||
|
{
|
||
|
if ($this->_termsStream === null) {
|
||
|
$this->_termsStream = new Zend_Search_Lucene_TermStreamsPriorityQueue($this->_segmentInfos);
|
||
|
} else {
|
||
|
$this->_termsStream->resetTermsStream();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Skip terms stream up to specified term preffix.
|
||
|
*
|
||
|
* Prefix contains fully specified field info and portion of searched term
|
||
|
*
|
||
|
* @param Zend_Search_Lucene_Index_Term $prefix
|
||
|
*/
|
||
|
public function skipTo(Zend_Search_Lucene_Index_Term $prefix)
|
||
|
{
|
||
|
$this->_termsStream->skipTo($prefix);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Scans terms dictionary and returns next term
|
||
|
*
|
||
|
* @return Zend_Search_Lucene_Index_Term|null
|
||
|
*/
|
||
|
public function nextTerm()
|
||
|
{
|
||
|
return $this->_termsStream->nextTerm();
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Returns term in current position
|
||
|
*
|
||
|
* @return Zend_Search_Lucene_Index_Term|null
|
||
|
*/
|
||
|
public function currentTerm()
|
||
|
{
|
||
|
return $this->_termsStream->currentTerm();
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Close terms stream
|
||
|
*
|
||
|
* Should be used for resources clean up if stream is not read up to the end
|
||
|
*/
|
||
|
public function closeTermsStream()
|
||
|
{
|
||
|
$this->_termsStream->closeTermsStream();
|
||
|
$this->_termsStream = null;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*************************************************************************
|
||
|
@todo UNIMPLEMENTED
|
||
|
*************************************************************************/
|
||
|
/**
|
||
|
* Undeletes all documents currently marked as deleted in this index.
|
||
|
*
|
||
|
* @todo Implementation
|
||
|
*/
|
||
|
public function undeleteAll()
|
||
|
{}
|
||
|
}
|