您现在的位置是:网站首页 > 博客日记 >

php-jieba分词

作者:YXN-php 阅读量:50 发布日期:2024-05-09

安装

下载地址:https://github.com/fukuball/jieba-php,下载zip解压到项目目录

分词

基本使用

根据实际情况修改地址

<?php
ini_set('memory_limit', '1024M');

require_once "./jieba-php/vendor/multi-array/MultiArray.php";
require_once "./jieba-php/vendor/multi-array/Factory/MultiArrayFactory.php";
require_once "./jieba-php/class/Jieba.php";
require_once "./jieba-php/class/Finalseg.php";
use Fukuball\Jieba\Jieba;
use Fukuball\Jieba\Finalseg;
Jieba::init();
Finalseg::init();

$seg_list = Jieba::cut("怜香惜玉也得要看对象啊!");
var_dump($seg_list);

$seg_list = Jieba::cut("我来到北京清华大学", true);
var_dump($seg_list); #全模式

$seg_list = Jieba::cut("我来到北京清华大学", false);
var_dump($seg_list); #默認精確模式

$seg_list = Jieba::cut("他来到了网易杭研大厦");
var_dump($seg_list);

$seg_list = Jieba::cutForSearch("小明硕士毕业于中国科学院计算所,后在日本京都大学深造"); #搜索引擎模式
var_dump($seg_list);

简单封装

segment.php

<?php
ini_set('memory_limit', '1024M');

require_once "jieba-php/vendor/multi-array/MultiArray.php";
require_once "jieba-php/vendor/multi-array/Factory/MultiArrayFactory.php";
require_once "jieba-php/class/Jieba.php";
require_once "jieba-php/class/Finalseg.php";
use Fukuball\Jieba\Jieba;
use Fukuball\Jieba\Finalseg;
const SEARCH_MODE = 0;
const DEFAULT_MODE = 1;

class Segment
{
    
    public static function segmentString($str, $mode = SEARCH_MODE)
    {
        Jieba::init();
        Finalseg::init();
        
        switch ($mode) {
            case SEARCH_MODE:  #搜索引擎模式
                return Jieba::cutForSearch($str);
            case DEFAULT_MODE:  #精確模式
                return Jieba::cut($str);
            default:  #全模式
                return Jieba::cut($str);
        }
    }
}
调用
<?php
require_once 'segment.php'; // 引入分词类

$text = "小明硕士毕业于中国科学院计算所,后在日本京都大学深造";
$segResult = Segment::segmentString($text);

// var_dump($segResult);
echo implode(" ", $segResult); // 输出分词结果

 

 

 

YXN-php

2024-05-09